In [1]:
import pandas as pd
%load_ext autoreload
%autoreload 2
import sys
import os
root_path = '../../../' # path to project root
sys.path.append('{}/code'.format(root_path))
sys.path.append('{}/code/core'.format(root_path))
sys.path.append('{}/code/datasets/'.format(root_path))
sys.path.insert(0,'{}/code/ptranking'.format(root_path))

from core.ranking_utils import *
from core.mallows import *
from core.ws_ranking import *
from core.ws_real_workflow import * 
from datasets.imdb_tmdb_dataset import * 
from datasets.basic_clmn_dataset import * 
from core.labelling.feature_lf import *
from ptranking_wrapper import PtrankingWrapper
import datasets_factory 
import numpy as np 
import yaml
import matplotlib.pyplot as plt
import pickle

In [3]:
for seed in range(5):
    config_file_path = '{}/configs/extended-imdb-tmdb_ranking_experiment_three.yaml'.format(root_path)
    
    with open(config_file_path,'r') as conf_file:
        conf = yaml.full_load(conf_file)
        conf['project_root'] = root_path 
        exp_name = f'three'
        
        # fix configuration
        conf['data_conf']['n_train'] = 1000
        conf['data_conf']['n_test'] = 500
        conf['data_conf']['processed_data_path'] = os.path.join(conf['data_conf']['processed_data_path'],
                                                                exp_name)
        
        # l2r_training_conf
        conf['l2r_training_conf']['use_weak_labels'] = True
        conf['l2r_training_conf']['model_checkpoint'] = os.path.join(conf['l2r_training_conf']['model_checkpoint'],
                                                                exp_name)
        
        # weak_sup_conf
        conf['weak_sup_conf']['checkpoint_path'] = os.path.join(conf['weak_sup_conf']['checkpoint_path'],
                                                                exp_name)
        # result_path
        conf['results_path'] = os.path.join(conf['results_path'], exp_name)
        
        
        data_conf = conf['data_conf']
        weak_sup_conf = conf['weak_sup_conf'] # For partial ranking experiments, we should give
        l2r_training_conf = conf['l2r_training_conf']
        data_conf['project_root'] = root_path
        
        dataset= datasets_factory.create_dataset(data_conf)
        dataset.create_samples()
        
        if l2r_training_conf['use_weak_labels']:
            Y_tilde, thetas = get_weak_labels(dataset, weak_sup_conf, root_path=root_path)
            r_utils = RankingUtils(data_conf['dimension'])
            kt = r_utils.mean_kt_distance(Y_tilde,dataset.Y)
            print('kt distance: ', kt)
            dataset.set_Y_tilde(Y_tilde)
        else:
            kt = None
            
        ptwrapper = PtrankingWrapper(data_conf=data_conf, weak_sup_conf=weak_sup_conf,
                             l2r_training_conf=l2r_training_conf, result_path=conf['results_path'],
                             wl_kt_distance = kt)
        X_train, X_test, Y_train, Y_test = dataset.get_train_test_torch(use_weak_labels=l2r_training_conf['use_weak_labels'])
        ptwrapper.set_data(X_train=X_train, X_test=X_test,
                          Y_train=Y_train, Y_test=Y_test)
        model = ptwrapper.get_model()
        result = ptwrapper.train_model(model, verbose=1)
        
#         result_path = os.path.join('results', f"{exp_name}+_{seed}.pickle")
        result_path = os.path.join('results', f"refactoring_test_{exp_name}+_{seed}.pickle")
        with open(result_path, 'wb') as f:
            pickle.dump(result, f)

Generate samples...
Weak labels generated and saved in ../../../data/imdb-tmdb/processed/extended-default/LFs/three/weak_labels.pkl
Use our weak supervision...train_method: triplet_opt,inference_rule: weighted_kemeny
kt distance:  0.09386666666666667
use_weak_labels:True, we will use weak labels
Training data shape, X_train.shape torch.Size([1000, 5, 16]) Y_train.shape torch.Size([1000, 5])
set_and_load_data in LTREvaluator
(1000, 5, 16) (1000, 5) (1000,)
data_dict {'data_id': 'imdb_tmdb', 'dir_data': 'data/imdb-tmdb/processed/extended-imdb-default/three', 'min_docs': 10, 'min_rele': 1, 'scale_data': False, 'scaler_id': None, 'scaler_level': None, 'train_presort': True, 'validation_presort': True, 'test_presort': True, 'train_batch_size': 64, 'validation_batch_size': 1, 'test_batch_size': 1, 'unknown_as_zero': False, 'binary_rele': False, 'num_features': 16, 'has_comment': False, 'label_type': <LABEL_TYPE.Permutation: 2>, 'max_rele_level': None, 'fold_num': 1}
data_dict {'data_id': 'im

epoch 0, loss [859.53656], train tau 0.2750004529953003, test_tau 0.30080002546310425,train_ndcg@1 tensor([0.7585]), test_ndcg@1 tensor([0.7220])
epoch 1, loss [802.9465], train tau 0.2633003294467926, test_tau 0.2946000099182129,train_ndcg@1 tensor([0.7720]), test_ndcg@1 tensor([0.7300])
epoch 2, loss [786.4833], train tau 0.25520065426826477, test_tau 0.2889999747276306,train_ndcg@1 tensor([0.7780]), test_ndcg@1 tensor([0.7440])
epoch 3, loss [775.85315], train tau 0.2497006058692932, test_tau 0.28939998149871826,train_ndcg@1 tensor([0.7857]), test_ndcg@1 tensor([0.7355])
epoch 4, loss [768.53766], train tau 0.2457006573677063, test_tau 0.28859999775886536,train_ndcg@1 tensor([0.7895]), test_ndcg@1 tensor([0.7255])
epoch 5, loss [762.0388], train tau 0.23930075764656067, test_tau 0.2897999882698059,train_ndcg@1 tensor([0.7962]), test_ndcg@1 tensor([0.7370])
epoch 6, loss [755.9141], train tau 0.23670059442520142, test_tau 0.2889999747276306,train_ndcg@1 tensor([0.7933]), test_ndcg@1 

epoch 8, loss [754.2547], train tau 0.2350013256072998, test_tau 0.27160000801086426,train_ndcg@1 tensor([0.7965]), test_ndcg@1 tensor([0.7545])
epoch 9, loss [748.3059], train tau 0.23290103673934937, test_tau 0.2746000289916992,train_ndcg@1 tensor([0.8010]), test_ndcg@1 tensor([0.7665])
The experiment result is saved in ../../../tmp/results/default/three/result_summary.pkl
