In [1]:
import pandas as pd
%load_ext autoreload
%autoreload 2
import sys
import os
root_path = '../../../' # path to project root
sys.path.append('{}/code'.format(root_path))
sys.path.append('{}/code/core'.format(root_path))
sys.path.append('{}/code/datasets/'.format(root_path))
sys.path.insert(0,'{}/code/ptranking'.format(root_path))

from core.ranking_utils import *
from core.mallows import *
from core.ws_ranking import *
from core.ws_real_workflow import * 
from datasets.imdb_tmdb_dataset import * 
from datasets.basic_clmn_dataset import * 
from core.labelling.feature_lf import *
from ptranking_wrapper import PtrankingWrapper
import datasets_factory 
import numpy as np 
import yaml
import matplotlib.pyplot as plt
import pickle

In [2]:
for feature in ['imdb_rating', 'tomato_user_rating', 'mv_lens_avg_rating']:
    for seed in range(5):
        config_file_path = '{}/configs/extended-imdb-tmdb_ranking_experiment_three.yaml'.format(root_path)
        with open(config_file_path,'r') as conf_file:
            conf = yaml.full_load(conf_file)
            conf['project_root'] = root_path 
            exp_name = feature

            # fix configuration
            conf['data_conf']['n_train'] = 1000
            conf['data_conf']['n_test'] = 500
            conf['data_conf']['processed_data_path'] = os.path.join(conf['data_conf']['processed_data_path'],
                                                                    exp_name)

            # l2r_training_conf
            conf['l2r_training_conf']['use_weak_labels'] = True
            conf['l2r_training_conf']['model_checkpoint'] = os.path.join(conf['l2r_training_conf']['model_checkpoint'],
                                                                    exp_name)

            # weak_sup_conf
            conf['weak_sup_conf']['checkpoint_path'] = os.path.join(conf['weak_sup_conf']['checkpoint_path'],
                                                                    exp_name)
            # result_path
            conf['results_path'] = os.path.join(conf['results_path'], exp_name)


            data_conf = conf['data_conf']
            weak_sup_conf = conf['weak_sup_conf'] # For partial ranking experiments, we should give
            l2r_training_conf = conf['l2r_training_conf']
            data_conf['project_root'] = root_path

            dataset= datasets_factory.create_dataset(data_conf)
            dataset.create_samples()

#             Y_tilde = []
#             d = conf['data_conf']['dimension']
#             lf = FeatureRankingLF(feature, d=d, highest_first=True)
#             for row in dataset.lst_feature_map:
#                 Y_tilde.append(lf.apply(row))

#             with open(feature+'.pickle', 'wb') as f:
#                 pickle.dump(Y_tilde, f)

            with open(feature+'.pickle', 'rb') as f:
                Y_tilde = pickle.load(f)
            
            r_utils = RankingUtils(data_conf['dimension'])
            kt = r_utils.mean_kt_distance(Y_tilde,dataset.Y)
            print('kt distance: ', kt)
            dataset.set_Y_tilde(Y_tilde)

            ptwrapper = PtrankingWrapper(data_conf=data_conf, weak_sup_conf=weak_sup_conf,
                                 l2r_training_conf=l2r_training_conf, result_path=conf['results_path'],
                                 wl_kt_distance = kt)
            X_train, X_test, Y_train, Y_test = dataset.get_train_test_torch(use_weak_labels=l2r_training_conf['use_weak_labels'])
            ptwrapper.set_data(X_train=X_train, X_test=X_test,
                              Y_train=Y_train, Y_test=Y_test)
            model = ptwrapper.get_model()
            result = ptwrapper.train_model(model, verbose=1)

#             result_path = os.path.join('results', f"{exp_name}_{seed}.pickle")
            result_path = os.path.join('results', f"refactoring_test_{exp_name}+_{seed}.pickle")
            with open(result_path, 'wb') as f:
                pickle.dump(result, f)

Generate samples...
kt distance:  0.08959999999999999
use_weak_labels:True, we will use weak labels
Training data shape, X_train.shape torch.Size([1000, 5, 16]) Y_train.shape torch.Size([1000, 5])
set_and_load_data in LTREvaluator
(1000, 5, 16) (1000, 5) (1000,)
data_dict {'data_id': 'imdb_tmdb', 'dir_data': 'data/imdb-tmdb/processed/extended-imdb-default/imdb_rating', 'min_docs': 10, 'min_rele': 1, 'scale_data': False, 'scaler_id': None, 'scaler_level': None, 'train_presort': True, 'validation_presort': True, 'test_presort': True, 'train_batch_size': 64, 'validation_batch_size': 1, 'test_batch_size': 1, 'unknown_as_zero': False, 'binary_rele': False, 'num_features': 16, 'has_comment': False, 'label_type': <LABEL_TYPE.Permutation: 2>, 'max_rele_level': None, 'fold_num': 1}
data_dict {'data_id': 'imdb_tmdb', 'dir_data': 'data/imdb-tmdb/processed/extended-imdb-default/imdb_rating', 'min_docs': 10, 'min_rele': 1, 'scale_data': False, 'scaler_id': None, 'scaler_level': None, 'train_presort

epoch 0, loss [855.22266], train tau 0.27340084314346313, test_tau 0.2947999835014343,train_ndcg@1 tensor([0.7462]), test_ndcg@1 tensor([0.7245])
epoch 1, loss [797.1118], train tau 0.2555009722709656, test_tau 0.28600001335144043,train_ndcg@1 tensor([0.7690]), test_ndcg@1 tensor([0.7390])
epoch 2, loss [779.70874], train tau 0.24730104207992554, test_tau 0.28299999237060547,train_ndcg@1 tensor([0.7883]), test_ndcg@1 tensor([0.7345])
epoch 3, loss [768.9298], train tau 0.24210086464881897, test_tau 0.27720004320144653,train_ndcg@1 tensor([0.7920]), test_ndcg@1 tensor([0.7505])
epoch 4, loss [759.99506], train tau 0.23640087246894836, test_tau 0.27740001678466797,train_ndcg@1 tensor([0.7952]), test_ndcg@1 tensor([0.7420])
epoch 5, loss [753.2139], train tau 0.22890067100524902, test_tau 0.2754000127315521,train_ndcg@1 tensor([0.8050]), test_ndcg@1 tensor([0.7420])
epoch 6, loss [746.15967], train tau 0.2272007167339325, test_tau 0.2742000222206116,train_ndcg@1 tensor([0.8050]), test_ndc

Generate samples...
kt distance:  0.17213333333333336
use_weak_labels:True, we will use weak labels
Training data shape, X_train.shape torch.Size([1000, 5, 16]) Y_train.shape torch.Size([1000, 5])
set_and_load_data in LTREvaluator
(1000, 5, 16) (1000, 5) (1000,)
data_dict {'data_id': 'imdb_tmdb', 'dir_data': 'data/imdb-tmdb/processed/extended-imdb-default/tomato_user_rating', 'min_docs': 10, 'min_rele': 1, 'scale_data': False, 'scaler_id': None, 'scaler_level': None, 'train_presort': True, 'validation_presort': True, 'test_presort': True, 'train_batch_size': 64, 'validation_batch_size': 1, 'test_batch_size': 1, 'unknown_as_zero': False, 'binary_rele': False, 'num_features': 16, 'has_comment': False, 'label_type': <LABEL_TYPE.Permutation: 2>, 'max_rele_level': None, 'fold_num': 1}
data_dict {'data_id': 'imdb_tmdb', 'dir_data': 'data/imdb-tmdb/processed/extended-imdb-default/tomato_user_rating', 'min_docs': 10, 'min_rele': 1, 'scale_data': False, 'scaler_id': None, 'scaler_level': None, 

epoch 0, loss [893.4982], train tau 0.308300256729126, test_tau 0.3214000165462494,train_ndcg@1 tensor([0.7253]), test_ndcg@1 tensor([0.7150])
epoch 1, loss [850.22], train tau 0.28980058431625366, test_tau 0.32199999690055847,train_ndcg@1 tensor([0.7445]), test_ndcg@1 tensor([0.7160])
epoch 2, loss [836.403], train tau 0.28610047698020935, test_tau 0.3141999840736389,train_ndcg@1 tensor([0.7500]), test_ndcg@1 tensor([0.7320])
epoch 3, loss [826.87415], train tau 0.28140053153038025, test_tau 0.30800002813339233,train_ndcg@1 tensor([0.7605]), test_ndcg@1 tensor([0.7415])
epoch 4, loss [819.2495], train tau 0.27700096368789673, test_tau 0.3078000247478485,train_ndcg@1 tensor([0.7650]), test_ndcg@1 tensor([0.7320])
epoch 5, loss [813.2149], train tau 0.27030113339424133, test_tau 0.3052000105381012,train_ndcg@1 tensor([0.7720]), test_ndcg@1 tensor([0.7325])
epoch 6, loss [807.6652], train tau 0.26240110397338867, test_tau 0.3128000497817993,train_ndcg@1 tensor([0.7788]), test_ndcg@1 tens

Generate samples...
kt distance:  0.12086666666666668
use_weak_labels:True, we will use weak labels
Training data shape, X_train.shape torch.Size([1000, 5, 16]) Y_train.shape torch.Size([1000, 5])
set_and_load_data in LTREvaluator
(1000, 5, 16) (1000, 5) (1000,)
data_dict {'data_id': 'imdb_tmdb', 'dir_data': 'data/imdb-tmdb/processed/extended-imdb-default/mv_lens_avg_rating', 'min_docs': 10, 'min_rele': 1, 'scale_data': False, 'scaler_id': None, 'scaler_level': None, 'train_presort': True, 'validation_presort': True, 'test_presort': True, 'train_batch_size': 64, 'validation_batch_size': 1, 'test_batch_size': 1, 'unknown_as_zero': False, 'binary_rele': False, 'num_features': 16, 'has_comment': False, 'label_type': <LABEL_TYPE.Permutation: 2>, 'max_rele_level': None, 'fold_num': 1}
data_dict {'data_id': 'imdb_tmdb', 'dir_data': 'data/imdb-tmdb/processed/extended-imdb-default/mv_lens_avg_rating', 'min_docs': 10, 'min_rele': 1, 'scale_data': False, 'scaler_id': None, 'scaler_level': None, 

epoch 0, loss [885.6145], train tau 0.28300023078918457, test_tau 0.29899996519088745,train_ndcg@1 tensor([0.7580]), test_ndcg@1 tensor([0.7260])
epoch 1, loss [819.1794], train tau 0.2684008479118347, test_tau 0.28939998149871826,train_ndcg@1 tensor([0.7653]), test_ndcg@1 tensor([0.7435])
epoch 2, loss [802.74585], train tau 0.2602007985115051, test_tau 0.2879999876022339,train_ndcg@1 tensor([0.7747]), test_ndcg@1 tensor([0.7480])
epoch 3, loss [795.24414], train tau 0.25560057163238525, test_tau 0.29360002279281616,train_ndcg@1 tensor([0.7728]), test_ndcg@1 tensor([0.7385])
epoch 4, loss [786.81067], train tau 0.2513008117675781, test_tau 0.29659998416900635,train_ndcg@1 tensor([0.7780]), test_ndcg@1 tensor([0.7415])
epoch 5, loss [781.9218], train tau 0.2499009668827057, test_tau 0.29580003023147583,train_ndcg@1 tensor([0.7840]), test_ndcg@1 tensor([0.7375])
epoch 6, loss [773.4536], train tau 0.24570104479789734, test_tau 0.29740002751350403,train_ndcg@1 tensor([0.7895]), test_ndcg