In [1]:
import torch
import h5py
import numpy as np
from torch import nn
from torchvision import datasets
from torchvision.transforms import ToTensor, Lambda
from torch.utils.data import Dataset
from torch.utils.data import DataLoader
import pickle
from sklearn.preprocessing import normalize
from scipy.sparse import lil_matrix, csr_matrix, hstack
from tqdm import tqdm
import math
import os
import csv

In [2]:
# 변수 선언 block
rec_file_list = ["./final_recs/Chanho_recs/valid_recs/CF_rec_cpl_dim_64.pickle",
                       "./final_recs/Chanho_recs/valid_recs/Graph_rec_cpl_1_2_depth_5.pickle",
                       "./final_recs/Chanho_recs/valid_recs/Graph_rec_cpl_1_4_depth_3.pickle",
                       "./final_recs/Chanho_recs/valid_recs/Graph_rec_cpl_1_8_depth_3.pickle",
                       "./final_recs/Chanho_recs/valid_recs/Graph_rec_cpl_1_8_depth_1.pickle",
                       "./final_recs/Junwon_recs/inference_valid_cpl_completion_DNN_fc_layer_sizes_1024-1024-512-512_batch_16_seed_0.pkl",
                       "./final_recs/Hanseul_recs/rec_CCNet_valid_cpl_EncFC_PoolPMA_CplPooled_NumEnc5_NumDec0_Hid512_Emb512_Ind10.pickle",
                       "./final_recs/Hanseul_recs/rec_CCNet_valid_cpl_EncFC_PoolPMA_CplPooled_NumEnc8_NumDec0_Hid512_Emb512_Ind10.pickle",
                       "./final_recs/Hanseul_recs/rec_CCNet_valid_cpl_EncFC_PoolPMA_CplPooled_NumEnc8_NumDec0_Hid512_Emb512_Ind10.pickle",
                       "./final_recs/Hanseul_recs/rec_CCNet_valid_cpl_EncHYBRID_PoolPMA_CplPooled_NumEnc3_NumDec0_Hid512_Emb512_Ind16.pickle",
                       "./final_recs/Hanseul_recs/rec_CCNet_valid_cpl_EncHYBRID_PoolPMA_CplPooled_NumEnc3_NumDec3_Hid512_Emb512_Ind6.pickle",
                       "./final_recs/Hanseul_recs/rec_CCNet_valid_cpl_EncHYBRID_PoolPMA_CplPooled_NumEnc3_NumDec3_Hid512_Emb512_Ind7.pickle",
                       "./final_recs/Hanseul_recs/rec_CCNet_valid_cpl_EncHYBRID_PoolPMA_CplPooled_NumEnc3_NumDec3_Hid512_Emb512_Ind10.pickle",
                       "./final_recs/Hanseul_recs/rec_CCNet_valid_cpl_EncHYBRID_PoolPMA_CplPooled_NumEnc4_NumDec3_Hid512_Emb512_Ind6.pickle",
                       "./final_recs/Hanseul_recs/rec_CCNet_valid_cpl_EncHYBRID_PoolPMA_CplPooled_NumEnc6_NumDec0_Hid512_Emb512_Ind29.pickle",
                       "./final_recs/Hanseul_recs/rec_CCNet_valid_cpl_EncHYBRID_SA_PoolPMA_CplEncoded_NumEnc4_NumDec0_Hid512_Emb512_Ind10.pickle",
                       "./final_recs/Hanseul_recs/rec_CCNet_valid_cpl_EncHYBRID_SA_PoolPMA_CplEncoded_NumEnc6_NumDec0_Hid512_Emb512_Ind10.pickle",
                       "./final_recs/Hanseul_recs/rec_CCNet_valid_cpl_EncHYBRID_SA_PoolPMA_CplEncoded_NumEnc8_NumDec0_Hid512_Emb512_Ind10.pickle",
                      ]
state_dict_path = "./ensemble_model/ensemble_model_best_cpl.pt"
save_path = './ensemble_model/valid_cpl.csv'

In [3]:
class RecDataset(Dataset):
    def __init__(self, recs_list, query_num, item_num, transform=None, target_transform=None):
        # rec_matrix = [query num, model_num, item_num]
        self.rec_matrix = []
        for i in range(query_num):
            self.rec_matrix.append(lil_matrix((len(rec_file_list), item_num)))
        for i, recs in enumerate(recs_list):
            for query in tqdm(recs.keys()):
                rec = recs[query]
                rec_items, rec_scores = [rec_ for rec_, score in rec], [score for rec_, score in rec]
                rec_scores = normalize(np.array(rec_scores)[:,np.newaxis], axis=0).ravel()
                for item, score in zip(rec_items, rec_scores):
                    self.rec_matrix[query][i, item] = score
        self.transform = transform
        self.target_transform = target_transform
        self.device = "cuda" if torch.cuda.is_available() else "cpu"

    def __len__(self):
        return len(self.rec_matrix)

    def __getitem__(self, idx):
        rec_matrix = self.rec_matrix[idx].toarray()
        if self.transform:
            rec_matrix = self.transform(rec_matrix).to(self.device)
        return rec_matrix.to(self.device)

In [4]:
class Network(nn.Module):
    def __init__(self, model_len, k=10):
        super(Network, self).__init__()
        self.w1 = torch.nn.Parameter(torch.randn(k, model_len))
        self.w2 = torch.nn.Parameter(torch.randn(1, k))
        
    def forward(self, x):
        #import ipdb; ipdb.set_trace()
        x = x.float()
        x = torch.einsum('nm, bmp -> bnp', self.w1, x)
        x = torch.einsum('nm, bmp -> bnp', self.w2, x).squeeze(1)
        return x

In [5]:
recs_list = []
for rec_file in rec_file_list:
    with open(rec_file, 'rb') as f:
        recs = pickle.load(f)
        recs_list.append(recs)

query_num = len(recs_list[0])
item_num = 6714

test_data = RecDataset(recs_list, query_num, item_num, transform=torch.Tensor, target_transform=torch.tensor)
test_dataloader = DataLoader(test_data, batch_size=64, shuffle=False)

100%|███████████████████████████████████████████████████████████████████████████| 7848/7848 [00:00<00:00, 12223.72it/s]
100%|███████████████████████████████████████████████████████████████████████████| 7848/7848 [00:00<00:00, 12091.75it/s]
100%|███████████████████████████████████████████████████████████████████████████| 7848/7848 [00:00<00:00, 10809.48it/s]
100%|███████████████████████████████████████████████████████████████████████████| 7848/7848 [00:00<00:00, 12129.56it/s]
100%|███████████████████████████████████████████████████████████████████████████| 7848/7848 [00:00<00:00, 11854.92it/s]
100%|███████████████████████████████████████████████████████████████████████████| 7848/7848 [00:00<00:00, 12166.81it/s]
100%|███████████████████████████████████████████████████████████████████████████| 7848/7848 [00:00<00:00, 12262.62it/s]
100%|███████████████████████████████████████████████████████████████████████████| 7848/7848 [00:00<00:00, 11890.86it/s]
100%|███████████████████████████████████

In [7]:
device = "cuda" if torch.cuda.is_available() else "cpu"
model = Network(len(rec_file_list), k=50).to(device)
model_state_dict = torch.load(state_dict_path, map_location=device)
model.load_state_dict(model_state_dict)


def inference(dataloader, model):
    rec_lst = []
    with torch.no_grad():
        for batch, X in tqdm(enumerate(dataloader),total=len(dataloader)):
            pred = model(X)
            pred = pred.cpu().numpy()
            top_recommends = list(np.argmax(pred, axis=1))
            rec_lst.extend(top_recommends)
    return rec_lst
            

infer = inference(test_dataloader, model)

100%|████████████████████████████████████████████████████████████████████████████████| 123/123 [00:07<00:00, 16.45it/s]


In [8]:
infer_dict = {}
for i, item in enumerate(infer):
    infer_dict[i] = [(item, 1)]

In [9]:
from evaluation import get_metric
h5f_valid = h5py.File('./Container/valid_cpl', 'r')
answer = h5f_valid['labels_id'][:].astype(np.int64)
h5f_valid.close()
answer_dict = {}
for i, ans in enumerate(answer):
    answer_dict[i] = ans
metric = get_metric(infer_dict, answer_dict, n=1)

In [10]:
metric

{'macro': 0.024269577339615964,
 'micro': 0.1596585117227319,
 'accuracy': 0.1596585117227319,
 'map': 0.1596585117227319,
 'recall': 0.1596585117227319,
 'recall_rank': 1.0}