In [1]:
import pandas as pd
%load_ext autoreload
%autoreload 2
import sys
import os
root_path = '../../../' # path to project root
sys.path.append('{}/code'.format(root_path))
sys.path.append('{}/code/core'.format(root_path))
sys.path.append('{}/code/datasets/'.format(root_path))
sys.path.insert(0,'{}/code/ptranking'.format(root_path))

from core.ranking_utils import *
from core.mallows import *
from core.ws_ranking import *
from core.ws_real_workflow import * 
from datasets.imdb_tmdb_dataset import * 
from datasets.basic_clmn_dataset import * 
from core.labelling.feature_lf import *
from ptranking_wrapper import PtrankingWrapper
import datasets_factory 
import numpy as np 
import yaml
import matplotlib.pyplot as plt
import pickle

In [2]:
for seed in range(5):
    for n_train in [50, 100, 250, 500, 1000]:
        config_file_path = '{}/configs/extended-imdb-tmdb_ranking_experiment.yaml'.format(root_path)
    
        with open(config_file_path,'r') as conf_file:
            conf = yaml.full_load(conf_file)
            conf['project_root'] = root_path 
    
        exp_name = f'n_train-{n_train}'
        # fix configuration
        conf['data_conf']['n_train'] = n_train
        conf['data_conf']['n_test'] = 500
        conf['data_conf']['processed_data_path'] = os.path.join(conf['data_conf']['processed_data_path'],
                                                                exp_name)
        
        # l2r_training_conf
        conf['l2r_training_conf']['use_weak_labels'] = False
        conf['l2r_training_conf']['model_checkpoint'] = os.path.join(conf['l2r_training_conf']['model_checkpoint'],
                                                                exp_name)
        
        # weak_sup_conf
        conf['weak_sup_conf']['checkpoint_path'] = os.path.join(conf['weak_sup_conf']['checkpoint_path'],
                                                                exp_name)
        # result_path
        conf['results_path'] = os.path.join(conf['results_path'], exp_name)
        
        
        data_conf = conf['data_conf']
        weak_sup_conf = conf['weak_sup_conf'] # For partial ranking experiments, we should give
        l2r_training_conf = conf['l2r_training_conf']
        data_conf['project_root'] = root_path
        
        dataset= datasets_factory.create_dataset(data_conf)
        dataset.create_samples()
        
        if l2r_training_conf['use_weak_labels']:
            Y_tilde, thetas = get_weak_labels(dataset, weak_sup_conf, root_path=root_path)
            r_utils = RankingUtils(data_conf['dimension'])
            kt = r_utils.mean_kt_distance(Y_tilde,dataset.Y)
            print('kt distance: ', kt)
            dataset.set_Y_tilde(Y_tilde)
        else:
            kt = None
            
        ptwrapper = PtrankingWrapper(data_conf=data_conf, weak_sup_conf=weak_sup_conf,
                             l2r_training_conf=l2r_training_conf, result_path=conf['results_path'],
                             wl_kt_distance = kt)
        X_train, X_test, Y_train, Y_test = dataset.get_train_test_torch(use_weak_labels=l2r_training_conf['use_weak_labels'])
        ptwrapper.set_data(X_train=X_train, X_test=X_test,
                          Y_train=Y_train, Y_test=Y_test)
        model = ptwrapper.get_model()
        result = ptwrapper.train_model(model, verbose=1)
        
#         result_path = os.path.join('results', f"smalltrue_{int(n_train)}_{seed}.pickle")
#         with open(result_path, 'wb') as f:
#             pickle.dump(result, f)

Generate samples...
use_weak_labels:False, we will use true labels
Training data shape, X_train.shape torch.Size([50, 5, 16]) Y_train.shape torch.Size([50, 5])
set_and_load_data in LTREvaluator
(50, 5, 16) (50, 5) (50,)
data_dict {'data_id': 'imdb_tmdb', 'dir_data': 'data/imdb-tmdb/processed/extended-imdb-default/n_train-50', 'min_docs': 10, 'min_rele': 1, 'scale_data': False, 'scaler_id': None, 'scaler_level': None, 'train_presort': True, 'validation_presort': True, 'test_presort': True, 'train_batch_size': 64, 'validation_batch_size': 1, 'test_batch_size': 1, 'unknown_as_zero': False, 'binary_rele': False, 'num_features': 16, 'has_comment': False, 'label_type': <LABEL_TYPE.Permutation: 2>, 'max_rele_level': None, 'fold_num': 1}
data_dict {'data_id': 'imdb_tmdb', 'dir_data': 'data/imdb-tmdb/processed/extended-imdb-default/n_train-50', 'min_docs': 10, 'min_rele': 1, 'scale_data': False, 'scaler_id': None, 'scaler_level': None, 'train_presort': True, 'validation_presort': True, 'test_pr

epoch 0, loss [234.2147], train tau 0.2935998737812042, test_tau 0.32499998807907104,train_ndcg@1 tensor([0.7040]), test_ndcg@1 tensor([0.7010])
epoch 1, loss [206.59752], train tau 0.26799988746643066, test_tau 0.3179999589920044,train_ndcg@1 tensor([0.7480]), test_ndcg@1 tensor([0.7065])
epoch 2, loss [198.29144], train tau 0.24519994854927063, test_tau 0.31679993867874146,train_ndcg@1 tensor([0.7730]), test_ndcg@1 tensor([0.7000])
epoch 3, loss [192.96562], train tau 0.23799997568130493, test_tau 0.3137999176979065,train_ndcg@1 tensor([0.7860]), test_ndcg@1 tensor([0.7175])
epoch 4, loss [188.52844], train tau 0.2255999743938446, test_tau 0.31579992175102234,train_ndcg@1 tensor([0.7910]), test_ndcg@1 tensor([0.7150])
epoch 5, loss [184.27512], train tau 0.21479997038841248, test_tau 0.3157999515533447,train_ndcg@1 tensor([0.7990]), test_ndcg@1 tensor([0.7195])
epoch 6, loss [180.42981], train tau 0.2071998119354248, test_tau 0.31800001859664917,train_ndcg@1 tensor([0.8090]), test_nd

Generate samples...
use_weak_labels:False, we will use true labels
Training data shape, X_train.shape torch.Size([50, 5, 16]) Y_train.shape torch.Size([50, 5])
set_and_load_data in LTREvaluator
(50, 5, 16) (50, 5) (50,)
data_dict {'data_id': 'imdb_tmdb', 'dir_data': 'data/imdb-tmdb/processed/extended-imdb-default/n_train-50', 'min_docs': 10, 'min_rele': 1, 'scale_data': False, 'scaler_id': None, 'scaler_level': None, 'train_presort': True, 'validation_presort': True, 'test_presort': True, 'train_batch_size': 64, 'validation_batch_size': 1, 'test_batch_size': 1, 'unknown_as_zero': False, 'binary_rele': False, 'num_features': 16, 'has_comment': False, 'label_type': <LABEL_TYPE.Permutation: 2>, 'max_rele_level': None, 'fold_num': 1}
data_dict {'data_id': 'imdb_tmdb', 'dir_data': 'data/imdb-tmdb/processed/extended-imdb-default/n_train-50', 'min_docs': 10, 'min_rele': 1, 'scale_data': False, 'scaler_id': None, 'scaler_level': None, 'train_presort': True, 'validation_presort': True, 'test_pr

epoch 0, loss [239.20232], train tau 0.30879974365234375, test_tau 0.34460002183914185,train_ndcg@1 tensor([0.7190]), test_ndcg@1 tensor([0.6835])
epoch 1, loss [215.94139], train tau 0.2879996597766876, test_tau 0.3296000063419342,train_ndcg@1 tensor([0.7450]), test_ndcg@1 tensor([0.6910])
epoch 2, loss [208.0192], train tau 0.2723998725414276, test_tau 0.3198000192642212,train_ndcg@1 tensor([0.7570]), test_ndcg@1 tensor([0.6925])
epoch 3, loss [202.81322], train tau 0.25639986991882324, test_tau 0.3181999921798706,train_ndcg@1 tensor([0.7840]), test_ndcg@1 tensor([0.6985])
epoch 4, loss [198.8264], train tau 0.24799972772598267, test_tau 0.31539997458457947,train_ndcg@1 tensor([0.7930]), test_ndcg@1 tensor([0.7005])
epoch 5, loss [195.6335], train tau 0.24159973859786987, test_tau 0.31519997119903564,train_ndcg@1 tensor([0.8000]), test_ndcg@1 tensor([0.6970])
epoch 6, loss [192.72961], train tau 0.2343997359275818, test_tau 0.3179999887943268,train_ndcg@1 tensor([0.8010]), test_ndcg@

Generate samples...
use_weak_labels:False, we will use true labels
Training data shape, X_train.shape torch.Size([50, 5, 16]) Y_train.shape torch.Size([50, 5])
set_and_load_data in LTREvaluator
(50, 5, 16) (50, 5) (50,)
data_dict {'data_id': 'imdb_tmdb', 'dir_data': 'data/imdb-tmdb/processed/extended-imdb-default/n_train-50', 'min_docs': 10, 'min_rele': 1, 'scale_data': False, 'scaler_id': None, 'scaler_level': None, 'train_presort': True, 'validation_presort': True, 'test_presort': True, 'train_batch_size': 64, 'validation_batch_size': 1, 'test_batch_size': 1, 'unknown_as_zero': False, 'binary_rele': False, 'num_features': 16, 'has_comment': False, 'label_type': <LABEL_TYPE.Permutation: 2>, 'max_rele_level': None, 'fold_num': 1}
data_dict {'data_id': 'imdb_tmdb', 'dir_data': 'data/imdb-tmdb/processed/extended-imdb-default/n_train-50', 'min_docs': 10, 'min_rele': 1, 'scale_data': False, 'scaler_id': None, 'scaler_level': None, 'train_presort': True, 'validation_presort': True, 'test_pr

epoch 0, loss [229.84734], train tau 0.3003997802734375, test_tau 0.32159996032714844,train_ndcg@1 tensor([0.7310]), test_ndcg@1 tensor([0.7030])
epoch 1, loss [208.65373], train tau 0.275199830532074, test_tau 0.3075999915599823,train_ndcg@1 tensor([0.7750]), test_ndcg@1 tensor([0.7205])
epoch 2, loss [202.07071], train tau 0.26439976692199707, test_tau 0.3051999807357788,train_ndcg@1 tensor([0.7850]), test_ndcg@1 tensor([0.7340])
epoch 3, loss [197.14877], train tau 0.2527996897697449, test_tau 0.29899996519088745,train_ndcg@1 tensor([0.7880]), test_ndcg@1 tensor([0.7380])
epoch 4, loss [192.47894], train tau 0.24599972367286682, test_tau 0.2953999638557434,train_ndcg@1 tensor([0.7870]), test_ndcg@1 tensor([0.7360])
epoch 5, loss [189.12373], train tau 0.23959973454475403, test_tau 0.29319998621940613,train_ndcg@1 tensor([0.7880]), test_ndcg@1 tensor([0.7425])
epoch 6, loss [185.15257], train tau 0.23199987411499023, test_tau 0.2979999780654907,train_ndcg@1 tensor([0.8040]), test_ndc

Generate samples...
use_weak_labels:False, we will use true labels
Training data shape, X_train.shape torch.Size([50, 5, 16]) Y_train.shape torch.Size([50, 5])
set_and_load_data in LTREvaluator
(50, 5, 16) (50, 5) (50,)
data_dict {'data_id': 'imdb_tmdb', 'dir_data': 'data/imdb-tmdb/processed/extended-imdb-default/n_train-50', 'min_docs': 10, 'min_rele': 1, 'scale_data': False, 'scaler_id': None, 'scaler_level': None, 'train_presort': True, 'validation_presort': True, 'test_presort': True, 'train_batch_size': 64, 'validation_batch_size': 1, 'test_batch_size': 1, 'unknown_as_zero': False, 'binary_rele': False, 'num_features': 16, 'has_comment': False, 'label_type': <LABEL_TYPE.Permutation: 2>, 'max_rele_level': None, 'fold_num': 1}
data_dict {'data_id': 'imdb_tmdb', 'dir_data': 'data/imdb-tmdb/processed/extended-imdb-default/n_train-50', 'min_docs': 10, 'min_rele': 1, 'scale_data': False, 'scaler_id': None, 'scaler_level': None, 'train_presort': True, 'validation_presort': True, 'test_pr

epoch 0, loss [232.90149], train tau 0.2911997437477112, test_tau 0.3253999948501587,train_ndcg@1 tensor([0.7520]), test_ndcg@1 tensor([0.6985])
epoch 1, loss [207.65271], train tau 0.2635996341705322, test_tau 0.3140000104904175,train_ndcg@1 tensor([0.7750]), test_ndcg@1 tensor([0.7070])
epoch 2, loss [200.29626], train tau 0.250399649143219, test_tau 0.30799999833106995,train_ndcg@1 tensor([0.7970]), test_ndcg@1 tensor([0.7105])
epoch 3, loss [195.0483], train tau 0.2427997887134552, test_tau 0.31139999628067017,train_ndcg@1 tensor([0.8010]), test_ndcg@1 tensor([0.7050])
epoch 4, loss [190.581], train tau 0.2355998158454895, test_tau 0.30559998750686646,train_ndcg@1 tensor([0.8140]), test_ndcg@1 tensor([0.7130])
epoch 5, loss [187.53851], train tau 0.22479978203773499, test_tau 0.3051999807357788,train_ndcg@1 tensor([0.8180]), test_ndcg@1 tensor([0.7220])
epoch 6, loss [183.85648], train tau 0.21399983763694763, test_tau 0.30379998683929443,train_ndcg@1 tensor([0.8310]), test_ndcg@1 

Generate samples...
use_weak_labels:False, we will use true labels
Training data shape, X_train.shape torch.Size([50, 5, 16]) Y_train.shape torch.Size([50, 5])
set_and_load_data in LTREvaluator
(50, 5, 16) (50, 5) (50,)
data_dict {'data_id': 'imdb_tmdb', 'dir_data': 'data/imdb-tmdb/processed/extended-imdb-default/n_train-50', 'min_docs': 10, 'min_rele': 1, 'scale_data': False, 'scaler_id': None, 'scaler_level': None, 'train_presort': True, 'validation_presort': True, 'test_presort': True, 'train_batch_size': 64, 'validation_batch_size': 1, 'test_batch_size': 1, 'unknown_as_zero': False, 'binary_rele': False, 'num_features': 16, 'has_comment': False, 'label_type': <LABEL_TYPE.Permutation: 2>, 'max_rele_level': None, 'fold_num': 1}
data_dict {'data_id': 'imdb_tmdb', 'dir_data': 'data/imdb-tmdb/processed/extended-imdb-default/n_train-50', 'min_docs': 10, 'min_rele': 1, 'scale_data': False, 'scaler_id': None, 'scaler_level': None, 'train_presort': True, 'validation_presort': True, 'test_pr

epoch 0, loss [236.62682], train tau 0.3096001148223877, test_tau 0.34199994802474976,train_ndcg@1 tensor([0.7140]), test_ndcg@1 tensor([0.6895])
epoch 1, loss [210.63301], train tau 0.29279983043670654, test_tau 0.3263999819755554,train_ndcg@1 tensor([0.7400]), test_ndcg@1 tensor([0.6960])
epoch 2, loss [203.87129], train tau 0.27439987659454346, test_tau 0.3157999515533447,train_ndcg@1 tensor([0.7590]), test_ndcg@1 tensor([0.7025])
epoch 3, loss [199.4181], train tau 0.2655998468399048, test_tau 0.31199994683265686,train_ndcg@1 tensor([0.7700]), test_ndcg@1 tensor([0.7085])
epoch 4, loss [195.81026], train tau 0.24759960174560547, test_tau 0.30699998140335083,train_ndcg@1 tensor([0.7950]), test_ndcg@1 tensor([0.7105])
epoch 5, loss [192.17244], train tau 0.24759963154792786, test_tau 0.30699998140335083,train_ndcg@1 tensor([0.7930]), test_ndcg@1 tensor([0.7175])
epoch 6, loss [189.20985], train tau 0.2315996289253235, test_tau 0.3019999861717224,train_ndcg@1 tensor([0.8060]), test_nd