In [1]:
import os
import torch
import pickle

from MeLU import MeLU
from options import config
from model_training import training
from data_generation import generate
from evidence_candidate import selection
from tqdm import tqdm
import matplotlib.pyplot as plt
from torch.nn import functional as F
import numpy as np

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
master_path= "./testuserdata"
if not os.path.exists("{}/".format(master_path)):
    os.mkdir("{}/".format(master_path))
    # preparing dataset. It needs about 22GB of your hard disk space.
    generate(master_path)

4348


4348it [00:57, 75.98it/s] 
484it [00:05, 81.88it/s] 


1087


1087it [00:14, 77.50it/s]
121it [00:01, 70.93it/s]


4292


4292it [00:43, 98.64it/s] 
477it [00:05, 86.64it/s] 


1077


1077it [00:10, 98.67it/s] 
120it [00:01, 99.06it/s]


In [3]:
training_losses = []

In [4]:
testing_losses = []

In [5]:
epochs = [20,40,50,60,80]

In [6]:
state = "warm_state"
for epoch in epochs:
    # training model.
    melu = MeLU(config)
    state = "warm_state"
    model_filename = "{}/models_{}_{}.pkl".format(master_path,state,epoch)
    if not os.path.exists(model_filename):
        training_set_size = int(len(os.listdir("{}/{}".format(master_path,state))) / 4)
        supp_xs_s = []
        supp_ys_s = []
        query_xs_s = []
        query_ys_s = []
        for idx in tqdm(range(training_set_size)):
            supp_xs_s.append(pickle.load(open("{}/{}/supp_x_{}.pkl".format(master_path,state, idx), "rb")))
            supp_ys_s.append(pickle.load(open("{}/{}/supp_y_{}.pkl".format(master_path, state, idx), "rb")))
            query_xs_s.append(pickle.load(open("{}/{}/query_x_{}.pkl".format(master_path, state, idx), "rb")))
            query_ys_s.append(pickle.load(open("{}/{}/query_y_{}.pkl".format(master_path, state, idx), "rb")))
        total_dataset = list(zip(supp_xs_s, supp_ys_s, query_xs_s, query_ys_s))
        del(supp_xs_s, supp_ys_s, query_xs_s, query_ys_s)
        history = training(melu, total_dataset, batch_size=config['batch_size'], num_epoch=epoch, model_save=True, model_filename=model_filename)
        training_losses.append(history[-1])
    
    testing_set_size = int(len(os.listdir("{}/{}".format('testingtestuser',state))) / 4)
    supp_xs_s_testing = []
    supp_ys_s_testing = []
    query_xs_s_testing = []
    query_ys_s_testing = []
    for idx in tqdm(range(testing_set_size)):
        supp_xs_s_testing.append(pickle.load(open("{}/{}/supp_x_{}.pkl".format('testingtestuser',state, idx), "rb")))
        supp_ys_s_testing.append(pickle.load(open("{}/{}/supp_y_{}.pkl".format('testingtestuser', state, idx), "rb")))
        query_xs_s_testing.append(pickle.load(open("{}/{}/query_x_{}.pkl".format('testingtestuser', state, idx), "rb")))
        query_ys_s_testing.append(pickle.load(open("{}/{}/query_y_{}.pkl".format('testingtestuser', state, idx), "rb")))
    
    trained_state_dict = torch.load(model_filename)
    melu.load_state_dict(trained_state_dict)

    final_loss = []
    for i in range(len(supp_xs_s_testing)):
        prediction = melu.model(supp_xs_s_testing[i].cuda())
        temp_loss = F.mse_loss(prediction, supp_ys_s_testing[i].cuda().view(-1, 1))
        final_loss.append(temp_loss.item())

    for j in range(len(query_xs_s_testing)):
        prediction = melu.model(query_xs_s_testing[j].cuda())
        temp_loss = F.mse_loss(prediction, query_ys_s_testing[j].cuda().view(-1, 1))
        final_loss.append(temp_loss.item())
    testing_losses.append(np.mean(final_loss))


100%|██████████| 2393/2393 [00:34<00:00, 69.50it/s]
100%|██████████| 20/20 [07:50<00:00, 23.52s/it]
100%|██████████| 260/260 [00:03<00:00, 75.67it/s]
100%|██████████| 2393/2393 [00:08<00:00, 298.98it/s]
100%|██████████| 40/40 [15:33<00:00, 23.33s/it]
100%|██████████| 260/260 [00:00<00:00, 348.98it/s]
100%|██████████| 2393/2393 [00:07<00:00, 325.98it/s]
100%|██████████| 50/50 [19:29<00:00, 23.40s/it]
100%|██████████| 260/260 [00:00<00:00, 355.66it/s]
100%|██████████| 2393/2393 [00:07<00:00, 314.78it/s]
100%|██████████| 60/60 [23:22<00:00, 23.38s/it]
100%|██████████| 260/260 [00:00<00:00, 343.46it/s]
100%|██████████| 2393/2393 [00:07<00:00, 307.55it/s]
100%|██████████| 80/80 [31:10<00:00, 23.38s/it]
100%|██████████| 260/260 [00:00<00:00, 354.22it/s]


In [7]:
training_losses

[0.9072293263153742,
 0.8516971904959455,
 0.8358650119512673,
 0.8121487340271073,
 0.7694852420147633]

In [8]:
testing_losses

[1.035839932784438,
 1.0543584047745054,
 1.0526656252260391,
 1.0656850319069164,
 1.106159537543471]

In [9]:
testing_losses_1 = []
for epoch in tqdm(epochs):
    model_filename = "{}/models_{}_{}.pkl".format("testuserdata",state,epoch)
    trained_state_dict = torch.load(model_filename)
    melu.load_state_dict(trained_state_dict)

    final_loss = []
    for i in tqdm(range(testing_set_size)):
        prediction = melu.forward(supp_xs_s_testing[i].cuda(), supp_ys_s_testing[i].cuda(), query_xs_s_testing[i].cuda(), 5)
        temp_loss = F.mse_loss(prediction, query_ys_s_testing[i].cuda().view(-1, 1))
        final_loss.append(temp_loss.item())
    testing_losses_1.append(np.mean(final_loss))

100%|██████████| 260/260 [00:08<00:00, 32.09it/s]
100%|██████████| 260/260 [00:07<00:00, 32.88it/s]
100%|██████████| 260/260 [00:07<00:00, 33.02it/s]
100%|██████████| 260/260 [00:07<00:00, 32.68it/s]
100%|██████████| 260/260 [00:07<00:00, 32.91it/s]
100%|██████████| 5/5 [00:39<00:00,  7.96s/it]


In [10]:
testing_losses_1

[1.1159570348663972,
 1.1162564511769093,
 1.116311658718265,
 1.1161173694122297,
 1.116346623490636]

In [20]:
def getMovieInfo(id):
    movie_info = {}
    with open("./movielens/ml-1m/movies_extrainfos.dat", encoding="utf-8") as f:
        for line in f.readlines():
            tmp = line.strip().split("::")
            movie_info[tmp[0]] = "{} ({})".format(tmp[1], tmp[2])
    return movie_info[id]

In [22]:
getMovieInfo('100')

'City Hall (1996)'