In [1]:
import sys
import torch
print(torch.cuda.device_count())
sys.path.append("..")
import numpy as np
import os
import pickle, importlib, random, Engine, tqdm, copy, json, time, argparse
import util.Generator as Generator
import util.Datahelper as dh

sys.argv = ' '
parser = argparse.ArgumentParser()
parser.add_argument('-test', action="store_true", default=False)
parser.add_argument('-ckptname', dest='ckptname', default=None, required=False)

parser.add_argument('-nl', dest='num_layers', default=2, required=False)
parser.add_argument('-nhd', dest='num_hidden_dims', default=2**9, required=False)
parser.add_argument('-nh', dest='num_heads', default=8, required=False)

parser.add_argument('-i', dest='use_item_feat', default=True, required=False)
parser.add_argument('-u', dest='use_user_feat', default=True, required=False)

parser.add_argument(
    '-pt_sample_func', dest='pt_sample_func', default='(lambda x:x)', required=False)
parser.add_argument(
    '-pt_sample_param', dest='pt_sample_param', default='0', required=False)
parser.add_argument(
    '-pt_history_func', dest='pt_history_func', default='(lambda x:x)', required=False)
parser.add_argument(
    '-pt_history_param', dest='pt_history_param', default='25', required=False)

parser.add_argument('-nonimprove_limit', dest='nonimprove_limit', default=10, required=False)
parser.add_argument('-seed', dest='seed', default=0, required=False, type=int)
args = parser.parse_args()
print(args)

Engine.set_random_seed(args.seed)

basic_config = {
    'cuda_num' : Engine.GPU_max_free_memory(),
    #'cuda_num' : 0,
    'course_file' : '../datasets/Taobao_data/Taobao.pkl',
    'num_times' : 25,
    'num_items' : 10000, 
    'batch_size' : 32, 
    'feats' : [5507, 69]
}
    
save_name = 'checkpoint/KNN'
print(basic_config)

os.environ['CUDA_VISIBLE_DEVICES'] = str(basic_config['cuda_num'])

with open(basic_config['course_file'], 'rb') as f:
    user_dict = pickle.load(f)
    print('Total Number of Users : ' + str(len(user_dict)))
    
all_keys = list(user_dict.keys())
all_keys.sort()
np.random.shuffle(all_keys)
#used_keys, _ = dh.list_partition(all_keys, 0.1, seed=0)

train_keys, tv_keys = dh.list_partition(all_keys, 0.7, seed=0)
test_keys, valid_keys = dh.list_partition(tv_keys, 0.5, seed=0)

8
Namespace(ckptname=None, nonimprove_limit=10, num_heads=8, num_hidden_dims=512, num_layers=2, pt_history_func='(lambda x:x)', pt_history_param='25', pt_sample_func='(lambda x:x)', pt_sample_param='0', seed=0, test=False, use_item_feat=True, use_user_feat=True)
{'cuda_num': 3, 'course_file': '../datasets/Taobao_data/Taobao.pkl', 'num_times': 25, 'num_items': 10000, 'batch_size': 32, 'feats': [5507, 69]}
Total Number of Users : 9847


In [2]:
def generator2feature(generator):
    dataset = generator.__getitem__(batch_id=0, batch_size='MAX')[0]
    course = dataset[0]
    target = dataset[-1]
    return course, target

if True:
    train_generator_config = {
        'name' : None,
        'training' : True, 
        'sample_func' : args.pt_sample_func,
        'sample_param' : args.pt_sample_param,
        'history_func' : args.pt_history_func,
        'history_param' : args.pt_history_param,
        'next_basket' : True, 
        'batch_size' : basic_config['batch_size'],
        'shuffle' : True,
        'fixed_seed' : False}

    train_generator = Generator.TimeMultihotGenerator(
        user_dict, train_keys, basic_config, train_generator_config)

train_courses, train_target = generator2feature(train_generator)

from sklearn.metrics.pairwise import cosine_similarity
from sklearn.feature_extraction.text import TfidfTransformer
from sklearn.neighbors import NearestNeighbors
import util.Metrics as Metrics

def userKNN_GPU(train_features, test_courses, cuda_num):
    # train_features, torch.FloatTensor in GPU [num_stu, num_courses]
    # test_courses, np.array in CPU [num_stu, num_sem, num_courses]
    test_features = torch.FloatTensor(test_courses.sum(1).astype(float)).cuda(cuda_num)
    #train_features += torch.rand(train_features.shape).cuda(cuda_num) * 1e-10
    #test_features += torch.rand(test_features.shape).cuda(cuda_num) * 1e-10

    sim = []
    for iter in range(test_features.shape[0]):
        sim.append(torch.cosine_similarity(train_features.unsqueeze(1), test_features[iter].unsqueeze(0).unsqueeze(0), dim=-1))
    sim = torch.cat(sim, dim=-1).T

    pred = []
    for iter in range(basic_config['num_times']):
        pred.append(torch.matmul(sim, torch.FloatTensor(train_courses[:, iter]).cuda(cuda_num))[:, np.newaxis])
    pred = torch.cat(pred, dim=1)
    #pred += torch.rand(pred.shape).cuda(cuda_num) * 1e-10
    pred = pred.cpu().numpy()
    return pred

In [3]:
test_generator_config = {
    'training' : False, 
    'max_sampling' : 25,
    'mask_rate' : None,
    'historical' : None,
    'batch_size' : 16,
    'shuffle' : False,
    'fixed_seed' : True}

results_mat = {}
for h in list(range(4)) + list(range(5, 21, 5)):
    results_mat[h] = {}
    for r in [0,1,2,3,5,7,10,15,20,25]:
        test_generator_config['sample_func'] = '(lambda x:x)'
        test_generator_config['sample_param'] = str(r)
        test_generator_config['history_func'] = '(lambda x:x)'
        test_generator_config['history_param'] = str(h)
        test_generator_config['name'] = 'H={1}_R={0}'.format(r, h)
        test_generator = Generator.TimeMultihotGenerator(
            user_dict, test_keys, basic_config, test_generator_config)
        
        print(test_generator.name + ' ' + str(test_generator.batch_size))
        train_features = torch.FloatTensor(train_courses.sum(1).astype(float))
        train_features_cuda = train_features.cuda(basic_config['cuda_num'])
        test_courses, test_target = generator2feature(test_generator)
        user_pred = userKNN_GPU(train_features_cuda, test_courses, basic_config['cuda_num'])
        recall, recall_per_sem = Metrics.recall(test_target[:, h:], user_pred[:, h:], at_n=10)
        print('Recall: {:.4f}'.format(recall))
        results_mat[h][r] = [recall, recall_per_sem]
save_name = save_name + '.npy'
print(save_name)
np.save(save_name, np.array(results_mat))

H=0_R=0 16
Recall: 0.0009
H=0_R=1 16
Recall: 0.0977
H=0_R=2 16
Recall: 0.1277
H=0_R=3 16
Recall: 0.1340
H=0_R=5 16
Recall: 0.1445
H=0_R=7 16
Recall: 0.1556
H=0_R=10 16
Recall: 0.1582
H=0_R=15 16
Recall: 0.1619
H=0_R=20 16
Recall: 0.1630
H=0_R=25 16
Recall: 0.1639
H=1_R=0 16
Recall: 0.0744
H=1_R=1 16
Recall: 0.1118
H=1_R=2 16
Recall: 0.1301
H=1_R=3 16
Recall: 0.1391
H=1_R=5 16
Recall: 0.1494
H=1_R=7 16
Recall: 0.1541
H=1_R=10 16
Recall: 0.1543
H=1_R=15 16
Recall: 0.1559
H=1_R=20 16
Recall: 0.1564
H=1_R=25 16
Recall: 0.1586
H=2_R=0 16
Recall: 0.0858
H=2_R=1 16
Recall: 0.1108
H=2_R=2 16
Recall: 0.1240
H=2_R=3 16
Recall: 0.1304
H=2_R=5 16
Recall: 0.1415
H=2_R=7 16
Recall: 0.1470
H=2_R=10 16
Recall: 0.1479
H=2_R=15 16
Recall: 0.1497
H=2_R=20 16
Recall: 0.1523
H=2_R=25 16
Recall: 0.1543
H=3_R=0 16
Recall: 0.1023
H=3_R=1 16
Recall: 0.1160
H=3_R=2 16
Recall: 0.1261
H=3_R=3 16
Recall: 0.1307
H=3_R=5 16
Recall: 0.1353
H=3_R=7 16
Recall: 0.1418
H=3_R=10 16
Recall: 0.1413
H=3_R=15 16
Recall: 0.145