## Global Settings and Imports

In [1]:
# jupyter notebook에서 import 해서 쓰는 .py 모듈의 코드가 변경될 시, 변동 사항을 자동으로 반영해주는 기능 ON
%load_ext autoreload
%autoreload 2

In [2]:
import argparse
import yaml
from dotmap import DotMap
from os import path
import numpy as np
import torch
from torch.utils.data import DataLoader
import pytorch_lightning as pl
from pytorch_lightning.callbacks import ModelCheckpoint, EarlyStopping
from pytorch_lightning.loggers import TensorBoardLogger
from pytorch_lightning import Trainer
from models.lstur import LSTUR
from models.nrms import NRMS
from models.naml import NAML
from models.naml_simple import NAML_Simple
from models.sentirec import SENTIREC
from models.robust_sentirec import ROBUST_SENTIREC
from data.dataset import BaseDataset
from tqdm import tqdm

## Prepare parameters

In [None]:
args = argparse.Namespace(
    config = "config/model/nrms/exp1.yaml",
    resume = None
)

with open(args.config, 'r') as ymlfile:
    config = yaml.load(ymlfile, Loader=yaml.FullLoader)
    config = DotMap(config)

assert(config.name in ["lstur", "nrms", "naml", "naml_simple", "sentirec", "robust_sentirec"])

pl.seed_everything(1234)

logger = TensorBoardLogger(
    **config.logger
)

Seed set to 1234


In [4]:
checkpoint_callback = ModelCheckpoint(
    **config.checkpoint
)

## Load data

In [5]:
preprocess_path = f"{config.preprocess_data_path}/{config.dataset_size}/"

train_dataset = BaseDataset(
    path.join(preprocess_path+config.train_behavior),
    path.join(preprocess_path+config.train_news), 
    config)
val_dataset = BaseDataset(
    path.join(preprocess_path+config.val_behavior),
    path.join(preprocess_path+config.train_news), 
    config) 
train_loader = DataLoader(
    train_dataset,
    **config.train_dataloader)
val_loader = DataLoader(
    val_dataset,
    **config.val_dataloader)

100%|██████████| 26740/26740 [00:01<00:00, 14362.10it/s]
100%|██████████| 28994/28994 [00:15<00:00, 1898.30it/s]
100%|██████████| 26740/26740 [00:02<00:00, 12756.30it/s]
100%|██████████| 2204/2204 [00:01<00:00, 1198.58it/s]


In [6]:
# load embedding pre-trained embedding weights
embedding_weights=[]
with open(path.join(preprocess_path+config.embedding_weights), 'r') as file: 
    lines = file.readlines()
    for line in tqdm(lines):
        weights = [float(w) for w in line.split(" ")]
        embedding_weights.append(weights)
pretrained_word_embedding = torch.from_numpy(
    np.array(embedding_weights, dtype=np.float32)
)

100%|██████████| 42562/42562 [00:03<00:00, 13401.05it/s]


## Load model from checkpoint

In [10]:
if config.name == "lstur":
        model = LSTUR(config, pretrained_word_embedding)
elif config.name == "nrms":
    model = NRMS(config, pretrained_word_embedding)
elif config.name == "naml":
    model = NAML(config, pretrained_word_embedding)
elif config.name == "naml_simple":
    model = NAML_Simple(config, pretrained_word_embedding)
elif config.name == "sentirec":
    model = SENTIREC(config, pretrained_word_embedding)
elif config.name == "robust_sentirec":
    model = ROBUST_SENTIREC(config, pretrained_word_embedding)

## Train model

In [11]:
early_stop_callback = EarlyStopping(
    **config.early_stop
)
if args.resume is not None:
    model = model.load_from_checkpoint(
        args.resume, 
        config=config, 
        pretrained_word_embedding=pretrained_word_embedding)
    trainer = Trainer(
        **config.trainer,
        callbacks=[early_stop_callback, checkpoint_callback],
        logger=logger,
        #strategy=DDPStrategy(process_group_backend="gloo"),
        #plugins=DDPPlugin(find_unused_parameters=config.find_unused_parameters), 
        resume_from_checkpoint=args.resume
    )
else:
    trainer = Trainer(
        **config.trainer,
        callbacks=[early_stop_callback, checkpoint_callback],
        logger=logger,
        #strategy=DDPStrategy(process_group_backend="gloo")
        #plugins=DDPPlugin(find_unused_parameters=config.find_unused_parameters)
    )

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs


In [12]:
trainer.fit(
    model=model, 
    train_dataloaders=train_loader, 
    val_dataloaders=val_loader
)

You are using a CUDA device ('NVIDIA GeForce RTX 4060 Laptop GPU') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
c:\Users\nclud\anaconda3\envs\newsrec\lib\site-packages\pytorch_lightning\callbacks\model_checkpoint.py:654: Checkpoint directory C:\Users\nclud\Desktop\projects\python\NewsRecommendation\newsrec\project\logs\lightning_logs\checkpoints\sentirec\vader_lambda0p4_mu10 exists and is not empty.
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

   | Name                                   | Type             | Params | Mode 
-------------------------------------------------------------------------------------
0  | news_encoder                           | TimeDistributed  | 13.2 M | train
1  | user_encoder                           |

Sanity Checking DataLoader 0:   0%|          | 0/2 [00:00<?, ?it/s]

c:\Users\nclud\anaconda3\envs\newsrec\lib\site-packages\pytorch_lightning\trainer\connectors\data_connector.py:425: The 'val_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=15` in the `DataLoader` to improve performance.


                                                                           

c:\Users\nclud\anaconda3\envs\newsrec\lib\site-packages\pytorch_lightning\trainer\connectors\data_connector.py:425: The 'train_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=15` in the `DataLoader` to improve performance.


Epoch 0: 100%|██████████| 453/453 [02:22<00:00,  3.18it/s, v_num=mu10]

Metric val_auc_epoch improved. New best score: 0.513
Epoch 0, global step 453: 'val_auc_epoch' reached 0.51280 (best 0.51280), saving model to 'C:\\Users\\nclud\\Desktop\\projects\\python\\NewsRecommendation\\newsrec\\project\\logs\\lightning_logs\\checkpoints\\sentirec\\vader_lambda0p4_mu10\\epoch=0-val_auc_epoch=0.5128.ckpt' as top 3


Epoch 1: 100%|██████████| 453/453 [02:37<00:00,  2.88it/s, v_num=mu10]

Epoch 1, global step 906: 'val_auc_epoch' reached 0.51044 (best 0.51280), saving model to 'C:\\Users\\nclud\\Desktop\\projects\\python\\NewsRecommendation\\newsrec\\project\\logs\\lightning_logs\\checkpoints\\sentirec\\vader_lambda0p4_mu10\\epoch=1-val_auc_epoch=0.5104.ckpt' as top 3


Epoch 2: 100%|██████████| 453/453 [02:47<00:00,  2.71it/s, v_num=mu10]

Metric val_auc_epoch improved by 0.012 >= min_delta = 0.0001. New best score: 0.525
Epoch 2, global step 1359: 'val_auc_epoch' reached 0.52512 (best 0.52512), saving model to 'C:\\Users\\nclud\\Desktop\\projects\\python\\NewsRecommendation\\newsrec\\project\\logs\\lightning_logs\\checkpoints\\sentirec\\vader_lambda0p4_mu10\\epoch=2-val_auc_epoch=0.5251.ckpt' as top 3


Epoch 3: 100%|██████████| 453/453 [02:28<00:00,  3.04it/s, v_num=mu10]

Metric val_auc_epoch improved by 0.040 >= min_delta = 0.0001. New best score: 0.565
Epoch 3, global step 1812: 'val_auc_epoch' reached 0.56502 (best 0.56502), saving model to 'C:\\Users\\nclud\\Desktop\\projects\\python\\NewsRecommendation\\newsrec\\project\\logs\\lightning_logs\\checkpoints\\sentirec\\vader_lambda0p4_mu10\\epoch=3-val_auc_epoch=0.5650.ckpt' as top 3


Epoch 4: 100%|██████████| 453/453 [02:46<00:00,  2.72it/s, v_num=mu10]

Metric val_auc_epoch improved by 0.039 >= min_delta = 0.0001. New best score: 0.604
Epoch 4, global step 2265: 'val_auc_epoch' reached 0.60449 (best 0.60449), saving model to 'C:\\Users\\nclud\\Desktop\\projects\\python\\NewsRecommendation\\newsrec\\project\\logs\\lightning_logs\\checkpoints\\sentirec\\vader_lambda0p4_mu10\\epoch=4-val_auc_epoch=0.6045.ckpt' as top 3


Epoch 5: 100%|██████████| 453/453 [02:46<00:00,  2.73it/s, v_num=mu10]

Metric val_auc_epoch improved by 0.015 >= min_delta = 0.0001. New best score: 0.619
Epoch 5, global step 2718: 'val_auc_epoch' reached 0.61920 (best 0.61920), saving model to 'C:\\Users\\nclud\\Desktop\\projects\\python\\NewsRecommendation\\newsrec\\project\\logs\\lightning_logs\\checkpoints\\sentirec\\vader_lambda0p4_mu10\\epoch=5-val_auc_epoch=0.6192.ckpt' as top 3


Epoch 6: 100%|██████████| 453/453 [02:46<00:00,  2.72it/s, v_num=mu10]

Metric val_auc_epoch improved by 0.010 >= min_delta = 0.0001. New best score: 0.629
Epoch 6, global step 3171: 'val_auc_epoch' reached 0.62888 (best 0.62888), saving model to 'C:\\Users\\nclud\\Desktop\\projects\\python\\NewsRecommendation\\newsrec\\project\\logs\\lightning_logs\\checkpoints\\sentirec\\vader_lambda0p4_mu10\\epoch=6-val_auc_epoch=0.6289.ckpt' as top 3


Epoch 7: 100%|██████████| 453/453 [00:53<00:00,  8.39it/s, v_num=mu10]


Detected KeyboardInterrupt, attempting graceful shutdown ...


NameError: name 'exit' is not defined