## Global Settings and Imports

In [1]:
# jupyter notebook에서 import 해서 쓰는 .py 모듈의 코드가 변경될 시, 변동 사항을 자동으로 반영해주는 기능 ON
%load_ext autoreload
%autoreload 2

In [2]:
import argparse
import yaml
from dotmap import DotMap
from os import path
import numpy as np
import torch
from torch.utils.data import DataLoader
import pytorch_lightning as pl
from pytorch_lightning.callbacks import ModelCheckpoint, EarlyStopping
from pytorch_lightning.loggers import TensorBoardLogger
from pytorch_lightning import Trainer
from models.lstur import LSTUR
from models.nrms import NRMS
from models.naml import NAML
from models.naml_simple import NAML_Simple
from models.sentirec import SENTIREC
from models.robust_sentirec import ROBUST_SENTIREC
from data.dataset import BaseDataset
from tqdm import tqdm

## Prepare parameters

In [3]:
args = argparse.Namespace(
    config = "config/model/sentirec/vader_lambda0p4_mu10.yaml",
    ckpt = "logs/lightning_logs/checkpoints/sentirec/vader_lambda0p4_mu10/epoch=20-val_auc_epoch=0.6618.ckpt"
)

with open(args.config, 'r') as ymlfile:
    config = yaml.load(ymlfile, Loader=yaml.FullLoader)
    config = DotMap(config)

assert(config.name in ["lstur", "nrms", "naml", "naml_simple", "sentirec", "robust_sentirec"])

pl.seed_everything(1234)

logger = TensorBoardLogger(
    **config.logger
)

Seed set to 1234


## Load data

In [4]:
preprocess_path = f"{config.preprocess_data_path}/{config.dataset_size}/"

test_dataset = BaseDataset(
    path.join(preprocess_path + config.test_behavior),
    path.join(preprocess_path + config.test_news), 
    config)
test_loader = DataLoader(
    test_dataset,
    **config.test_dataloader)

100%|██████████| 18723/18723 [00:01<00:00, 14035.01it/s]
100%|██████████| 7538/7538 [00:05<00:00, 1391.57it/s]


In [5]:
# load embedding pre-trained embedding weights
embedding_weights=[]
with open(path.join(preprocess_path + config.embedding_weights), 'r') as file: 
    lines = file.readlines()
    for line in tqdm(lines):
        weights = [float(w) for w in line.split(" ")]
        embedding_weights.append(weights)
pretrained_word_embedding = torch.from_numpy(
    np.array(embedding_weights, dtype=np.float32)
)

100%|██████████| 42562/42562 [00:03<00:00, 13440.62it/s]


## Load model from checkpoint

In [6]:
import os

print(args.ckpt)  # 경로 출력
print(path)  # 파일 여부 확인

logs/lightning_logs/checkpoints/sentirec/vader_lambda0p4_mu10/epoch=20-val_auc_epoch=0.6618.ckpt
<module 'ntpath' from 'c:\\Users\\nclud\\anaconda3\\envs\\newsrec\\lib\\ntpath.py'>


In [7]:
if config.name == "lstur":
    model = LSTUR.load_from_checkpoint(
        args.ckpt, 
        config=config, 
        pretrained_word_embedding=pretrained_word_embedding
    )
elif config.name == "nrms":
    model = NRMS.load_from_checkpoint(
        args.ckpt, 
        config=config, 
        pretrained_word_embedding=pretrained_word_embedding
    )
elif config.name == "naml":
    model = NAML.load_from_checkpoint(
        args.ckpt, 
        config=config, 
        pretrained_word_embedding=pretrained_word_embedding
    )
elif config.name == "naml_simple":
    model = NAML_Simple.load_from_checkpoint(
        args.ckpt, 
        config=config, 
        pretrained_word_embedding=pretrained_word_embedding
    )
elif config.name == "sentirec":
    model = SENTIREC.load_from_checkpoint(
        args.ckpt, 
        config=config, 
        pretrained_word_embedding=pretrained_word_embedding
    )
elif config.name == "robust_sentirec":
    model = ROBUST_SENTIREC.load_from_checkpoint(
        args.ckpt, 
        config=config, 
        pretrained_word_embedding=pretrained_word_embedding
    )
# elif:
    # UPCOMING MODELS

## Test model

In [None]:
trainer = Trainer(
    **config.trainer,
    logger=logger
)

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs


In [21]:
trainer.test(
    model=model, 
    dataloaders=test_loader
)

LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
c:\Users\USER\anaconda3\envs\newsrec\lib\site-packages\pytorch_lightning\trainer\connectors\data_connector.py:425: The 'test_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=11` in the `DataLoader` to improve performance.


Testing DataLoader 0: 100%|██████████| 7538/7538 [05:44<00:00, 21.89it/s]
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
         Test metric                 DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       test_auc_epoch             0.5661918520927429
test_ils_senti@10_bert_epoch       0.480234831571579
test_ils_senti@10_vader_epoch     0.22790543735027313
 test_ils_senti@5_bert_epoch      0.46508610248565674
test_ils_senti@5_vader_epoch      0.22426888346672058
   test_ils_topic@10_epoch        0.08412498235702515
   test_ils_topic@5_epoch         0.09400947391986847
       test_mrr_epoch             0.24936088919639587
     test_ndcg@10_epoch           0.33303865790367126
      test_ndcg@5_epoch           0.26616060733795166
  test_senti@10_bert_epoch        0.46898961067199707
  test_senti@10_vader_epoch       0.048109

[{'test_auc_epoch': 0.5661918520927429,
  'test_mrr_epoch': 0.24936088919639587,
  'test_ndcg@10_epoch': 0.33303865790367126,
  'test_ndcg@5_epoch': 0.26616060733795166,
  'test_senti@10_vader_epoch': 0.04810909926891327,
  'test_senti@5_vader_epoch': 0.036427032202482224,
  'test_senti_mrr_vader_epoch': 0.024893416091799736,
  'test_senti@10_bert_epoch': 0.46898961067199707,
  'test_senti@5_bert_epoch': 0.3051859736442566,
  'test_senti_mrr_bert_epoch': 0.19709980487823486,
  'test_topic_div@10_epoch': 0.4452049136161804,
  'test_topic_div@5_epoch': 0.36226359009742737,
  'test_topic_mrr_epoch': 0.43979042768478394,
  'test_ils_senti@10_vader_epoch': 0.22790543735027313,
  'test_ils_senti@5_vader_epoch': 0.22426888346672058,
  'test_ils_senti@10_bert_epoch': 0.480234831571579,
  'test_ils_senti@5_bert_epoch': 0.46508610248565674,
  'test_ils_topic@10_epoch': 0.08412498235702515,
  'test_ils_topic@5_epoch': 0.09400947391986847}]

In [22]:
test_dataset[0]

{'user': tensor(629),
 'h_title': tensor([[    0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
              0,     0,     0,     0,     0,     0,     0,     0,     0,     0],
         [    0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
              0,     0,     0,     0,     0,     0,     0,     0,     0,     0],
         [    0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
              0,     0,     0,     0,     0,     0,     0,     0,     0,     0],
         [    0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
              0,     0,     0,     0,     0,     0,     0,     0,     0,     0],
         [    0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
              0,     0,     0,     0,     0,     0,     0,     0,     0,     0],
         [    0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
              0,     0,     0,     0,     0,     0,     0,     0,     0,     0],
 

In [30]:
test_dataset[11]['c_title']

tensor([[ 2679,  4349,  4726,  2851,   550,    29,  5067,   164,  3585, 10645,
          2250,  1471,     0,     0,     0,     0,     0,     0,     0,     0],
        [  887,   128,  3392,  1401,  1764,   126,  4545,     0,     0,     0,
             0,     0,     0,     0,     0,     0,     0,     0,     0,     0]])