In [40]:
%load_ext autoreload
%autoreload 2

from NewsContent import *
from UserContent import *
from preprocessing import *
from PEGenerator import *
import PEGenerator
from models import *
from utils import *
from Encoders import *

import os
import numpy as np
import json
import random

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [41]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
device

device(type='cuda')

In [42]:
data_root_path = "../data/Challenge/"
embedding_path = "../../"
KG_root_path = "../data/Challenge/entity"
popularity_path = "../data/Challenge/popularity"
config = {'title_length':30,
              'body_length':100,
              'max_clicked_news':50,
              'npratio':1,
              'news_encoder_name':"CNN",
              'user_encoder_name':"Att",
             'attrs':['title', 'entity','vert'],
             'word_filter':0,
             'data_root_path':data_root_path,
             'embedding_path':embedding_path,
             'KG_root_path':KG_root_path,
            'popularity_path':popularity_path,
            'batch_size': 32,
             'max_entity_num':5}

## ABLATION ##

model_config1 = { # normal model
        'news_encoder':1,
        'popularity_user_modeling':True,
        'rel':True,
        'ctr':True,
        'content':True,
        'rece_emb':True,
        'activity':True
    }

model_config2 = { # no popularity
        'news_encoder':1,
        'popularity_user_modeling':True,
        'rel':True,
        'ctr':False, #*
        'content':True,
        'rece_emb':True,
        'activity':True
    }


model_config3 = { # recency
        'news_encoder':1,
        'popularity_user_modeling':True,
        'rel':True,
        'ctr':True,
        'content':True,
        'rece_emb':False, #*
        'activity':True
    }

model_config4 = { # no relevancy
        'news_encoder':1,
        'popularity_user_modeling':True,
        'rel':False, #*
        'ctr':True,
        'content':True,
        'rece_emb':True,
        'activity':True
    }

model_config5 = { # no content
        'news_encoder':1,
        'popularity_user_modeling':True,
        'rel':True,
        'ctr':True,
        'content':False, #*
        'rece_emb':True,
        'activity':True
    }


model_config6 = { # no content
        'news_encoder':1,
        'popularity_user_modeling':False, #*
        'rel':True,
        'ctr':True,
        'content':True,
        'rece_emb':True,
        'activity':True
    }


## EXTENSION ##


In [43]:
model_config = model_config6

In [44]:
News = NewsContent(config)

TrainUsers = UserContent(News.news_index,config,'train.tsv',2)
ValidUsers = UserContent(News.news_index,config,'val.tsv',1)
TestUsers = UserContent(News.news_index,config,'test.tsv',2)

232887
24464
48930


In [45]:
train_sess,train_buckets, train_user_id, train_label = get_train_input(TrainUsers.session,News.news_index,config)
test_impressions, test_userids = get_test_input(TestUsers.session,News.news_index)
val_impressions, val_userids = get_test_input(ValidUsers.session,News.news_index)

232887
234277
48930
24464


In [46]:
title_word_embedding_matrix, have_word = load_matrix(embedding_path,News.word_dict)
entity_embedding_matrix, have_word2 = load_matrix(embedding_path, News.entity_dict)

In [47]:
train_loader = DataLoader(TrainDataset(News, TrainUsers, train_sess, train_user_id, train_buckets, train_label), config['batch_size'])
val_user_data = UserDataset(News,ValidUsers)
test_user_data = UserDataset(News,TestUsers)
news_data = NewsDataset(News)

In [48]:
from torch.optim import Adam

model,user_encoder,news_encoder,bias_news_encoder,bias_content_scorer,scaler,time_embedding_layer,activity_gater = \
create_pe_model(config, model_config, News, title_word_embedding_matrix, entity_embedding_matrix, device)

model.to(device)
for name, module in model.named_children():
    module.to(device)
    print(f'Moved {name} to {device}')



Moved news_encoder to cuda
Moved bias_news_encoder to cuda
Moved pop_aware_user_encoder to cuda
Moved bias_scorer to cuda
Moved activity_gater to cuda
Moved time_embedding_layer to cuda
Moved time_distributed1 to cuda
Moved time_distributed2 to cuda
Moved time_distributed3 to cuda
Moved scaler to cuda
Moved softmax to cuda


In [49]:
colds = [0,1,3,5]
topKs = 10

In [50]:
from tqdm import tqdm
val_metrics_epoch = []
num_epochs = 10
# Step 2: Create your Adam optimizer
optimizer = Adam(model.parameters(), lr=0.0001)

loss_fn = nn.CrossEntropyLoss()

# Step 3: Iterate over the data for the number of epochs
for epoch in range(num_epochs):

# Step 4: Iterate over each batch of data and compute the scores using the forward pass of the network
    model.train()
    for x, y in tqdm(train_loader):
        optimizer.zero_grad()
        x= [i.to(device) for i in x]
        y = y.to(device)
        out = model(x)
    
        # Step 5: Compute the lambda gradient values for the pairwise loss (spedup) with the compute_lambda_i method on the scores and the output labels
        loss = loss_fn(out, y)

        # Step 6: Bacward from the scores with the use of the lambda gradient values
        if loss is not None:
            # torch.autograd.backward(out, loss)
            loss.backward()
            
            # Step 7: Update the weights using the optimizer
            optimizer.step()

    model.eval()

    val_metrics = eval_model(model_config, News, user_encoder, val_impressions, val_user_data, val_userids,
               news_encoder, bias_news_encoder, activity_gater, time_embedding_layer, 
               bias_content_scorer, scaler, colds, topKs, ValidUsers, device)
    
    print("epoch: {}, metrics: {}".format(epoch, val_metrics))
    val_metrics_epoch.append(val_metrics)

    if epoch > 1:
        if (val_metrics_epoch[-1][0] < val_metrics_epoch[-2][0]):
            break



100%|██████████| 7322/7322 [21:12<00:00,  5.75it/s]


{0: 0, 1: 1, 3: 2, 5: 3}


100%|██████████| 24464/24464 [10:27<00:00, 38.98it/s]


epoch: 0, metrics: ([0.7331355262586816, 0.5148365988869711, 0.5734408795986045, 0.6127851933817108], [[nan, nan, nan, 0.72388498704716], [nan, nan, nan, 0.5104964051185683], [nan, nan, nan, 0.550610969100368], [nan, nan, nan, 0.5997508252858155]], [0.009470371497581412, 0.01865744128369672, 0.026043622294152535, 0.031021638081342763, 0.035655946674877946, 0.03875213034027551, 0.0410185440320044, 0.04219222255093544, 0.04344684441599964, 0.044883689485667136], [-6.532049241449569e-09, 0.28577904506514007, 0.37687651213792084, 0.41977830722616183, 0.444965803041869, 0.45856756743036353, 0.4663242288104376, 0.4711379461901651, 0.4741060449513366, 0.4759832600252234], [0.9999999934679508, 0.5715581010649452, 0.4256387094097873, 0.3582231318291844, 0.3195722929114918, 0.29894239643431514, 0.286108668716935, 0.2777424243853188, 0.27195392291386233, 0.2675670550537994])


100%|██████████| 7322/7322 [20:41<00:00,  5.90it/s]


{0: 0, 1: 1, 3: 2, 5: 3}


100%|██████████| 24464/24464 [10:33<00:00, 38.59it/s]


epoch: 1, metrics: ([0.7212129536234161, 0.5048916124454248, 0.5633855113826362, 0.6039440716677954], [[nan, nan, nan, 0.7111336117237997], [nan, nan, nan, 0.5096005509577385], [nan, nan, nan, 0.5472200356565731], [nan, nan, nan, 0.5964603985205157]], [0.0098346165551807, 0.017868344334701913, 0.02438428369842246, 0.03019206945928137, 0.0345227398290135, 0.038185526917343296, 0.04077571399360488, 0.04227316589706861, 0.04354812427446975, 0.04504557617793348], [-6.2348056728345575e-09, 0.2869977941450299, 0.37699875264672805, 0.4196215178415501, 0.44386514193671367, 0.4571305160076303, 0.4644192028218153, 0.46883476123291734, 0.4716584898794571, 0.4734270691864334], [0.9999999937651943, 0.5739955975984128, 0.42319872241986195, 0.3499252712871804, 0.30657185015601807, 0.28280285282207906, 0.2677389482161709, 0.2579350973009829, 0.2510057399835148, 0.24604510067139207])


100%|██████████| 7322/7322 [20:30<00:00,  5.95it/s]


{0: 0, 1: 1, 3: 2, 5: 3}


100%|██████████| 24464/24464 [10:45<00:00, 37.91it/s]


epoch: 2, metrics: ([0.7150974797478377, 0.49341703556426825, 0.5540988459797053, 0.5958825223599574], [[nan, nan, nan, 0.6925648004626794], [nan, nan, nan, 0.47782382484867914], [nan, nan, nan, 0.517394749602403], [nan, nan, nan, 0.572112534629809]], [0.008579994690116494, 0.016492307450437945, 0.0236153219101573, 0.029342164324883045, 0.034077551425281044, 0.03802364022507694, 0.040128167224539484, 0.04178750582026956, 0.043588595947536335, 0.04451944442806784], [-7.2142475956479576e-09, 0.2942090256954619, 0.38326574910488137, 0.4244468461787919, 0.4479280281677949, 0.46039268919714454, 0.4673860203852406, 0.4717234050656472, 0.4743700235463226, 0.4762482244979427], [0.9999999927857524, 0.5884180620880377, 0.4358853646380458, 0.35819342762397505, 0.31288463633539165, 0.2866162214040795, 0.27086577255417704, 0.26051330962934044, 0.25313795573809633, 0.24805653936298475])


In [51]:
colds = [5, 7, 10, 15]
topKs = 10
test_metrics, test_cold_metrics, test_topics, test_ILADs, test_ILMDs = eval_model(model_config, News, user_encoder, test_impressions, test_user_data, test_userids,
               news_encoder, bias_news_encoder, activity_gater, time_embedding_layer, 
               bias_content_scorer, scaler, colds, topKs, TestUsers, device)

{5: 0, 7: 1, 10: 2, 15: 3}


100%|██████████| 48930/48930 [21:53<00:00, 37.26it/s]


In [52]:
print("val metrics", val_metrics_epoch)
print("test metrics", test_metrics)
print("cold", test_cold_metrics)
print("diversity", [test_ILADs, test_ILMDs])

results = {"config": config,
           "model_config" : model_config,
            "val metrics": val_metrics_epoch, 
           "test metrics": test_metrics,
           "cold": test_cold_metrics,
           "diversity": [test_ILADs, test_ILMDs]}

import json
with open('results{}.json'.format(time.time()), 'w') as f:
    json.dump(results, f)

val metrics [([0.7331355262586816, 0.5148365988869711, 0.5734408795986045, 0.6127851933817108], [[nan, nan, nan, 0.72388498704716], [nan, nan, nan, 0.5104964051185683], [nan, nan, nan, 0.550610969100368], [nan, nan, nan, 0.5997508252858155]], [0.009470371497581412, 0.01865744128369672, 0.026043622294152535, 0.031021638081342763, 0.035655946674877946, 0.03875213034027551, 0.0410185440320044, 0.04219222255093544, 0.04344684441599964, 0.044883689485667136], [-6.532049241449569e-09, 0.28577904506514007, 0.37687651213792084, 0.41977830722616183, 0.444965803041869, 0.45856756743036353, 0.4663242288104376, 0.4711379461901651, 0.4741060449513366, 0.4759832600252234], [0.9999999934679508, 0.5715581010649452, 0.4256387094097873, 0.3582231318291844, 0.3195722929114918, 0.29894239643431514, 0.286108668716935, 0.2777424243853188, 0.27195392291386233, 0.2675670550537994]), ([0.7212129536234161, 0.5048916124454248, 0.5633855113826362, 0.6039440716677954], [[nan, nan, nan, 0.7111336117237997], [nan, n

## Finished
## ---