In [1]:
from torch.utils.data import DataLoader
import torch
import torch.nn as nn
import torch.nn.functional as F
import time
import numpy as np
from tqdm import tqdm
import os
from pathlib import Path
import importlib
import datetime
from evaluate import evaluate
from config import model_name
from dataset import BaseDataset

In [2]:
Model = getattr(importlib.import_module(f"model.{model_name}"), model_name)
config = getattr(importlib.import_module('config'), f"{model_name}Config")

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

In [3]:
pretrained_word_embedding = torch.from_numpy(
            np.load('../data/train/pretrained_word_embedding.npy')).float()

In [4]:
model = Model(config, pretrained_word_embedding).to(device)

## NAML

In [12]:
from model.NAML.news_encoder import NewsEncoder
from model.NAML.user_encoder import UserEncoder
from model.general.click_predictor.dot_product import DotProductClickPredictor

In [15]:
config

config.NAMLConfig

In [None]:
class NAML(torch.nn.Module):
    def __init__(self, config, pretrained_word_embedding=None):
        super(NAML, self).__init__()
        self.config = config
        self.news_encoder = NewsEncoder(config, pretrained_word_embedding)


### NewsEncoder

In [63]:
news_encoder = NewsEncoder(config, pretrained_word_embedding)

In [None]:
class NewsEncoder(torch.nn.Module):
    def __init__(self, config, pretrained_word_embedding):
        super(NewsEncoder, self).__init__()
        

In [8]:
word_embedding = nn.Embedding(config.num_words, config.word_embedding_dim, padding_idx=0)
word_embedding = nn.Embedding.from_pretrained(pretrained_word_embedding, freeze=False, padding_idx=0)

In [18]:
text_encoders_candidates = ['title', 'abstract']

In [29]:
config.dropout_probability

0.2

In [None]:
text_encoders = nn.ModuleDict({
    name:
    TextEncoder(word_embedding, # 위에서 만든거
                config.word_embedding_dim, # 300
                config.num_filters, # 300
                config.window_size, # 3
                config.query_vector_dim, # 200
                config.dropout_probability # 0.2
                )

    for name in (set(config.dataset_attributes['news']) 
                & set(text_encoders_candidates))
})

#### TextEncoder

In [74]:
from model.general.attention.additive import AdditiveAttention

class TextEncoder(torch.nn.Module):
    def __init__(self, word_embedding, word_embedding_dim, num_filters,
                 window_size, query_vector_dim, dropout_probability):
        super(TextEncoder, self).__init__()
        self.word_embedding = word_embedding
        self.dropout_probability = dropout_probability
        self.CNN = nn.Conv2d(1,
                             num_filters, (window_size, word_embedding_dim),
                             padding=(int((window_size - 1) / 2), 0))
        self.additive_attention = AdditiveAttention(query_vector_dim,
                                                    num_filters)

    def forward(self, text):
        # batch_size, num_words_text, word_embedding_dim
        text_vector = F.dropout(self.word_embedding(text),
                                p=self.dropout_probability,
                                training=self.training)
        # batch_size, num_filters, num_words_title
        convoluted_text_vector = self.CNN(
            text_vector.unsqueeze(dim=1)).squeeze(dim=3)
        # batch_size, num_filters, num_words_title
        activated_text_vector = F.dropout(F.relu(convoluted_text_vector),
                                          p=self.dropout_probability,
                                          training=self.training)

        # batch_size, num_filters
        text_vector = self.additive_attention(
            activated_text_vector.transpose(1, 2))
        return text_vector

In [75]:
text_encoders = nn.ModuleDict({
            name:
            TextEncoder(word_embedding, config.word_embedding_dim,
                        config.num_filters, config.window_size,
                        config.query_vector_dim, config.dropout_probability)
            for name in (set(config.dataset_attributes['news'])
                         & set(text_encoders_candidates))
        })

##### Forward

In [84]:
news = candidate_news[0] 

In [99]:
text_vectors = [
    encoder(news[name].to(device))
    for name, encoder in text_encoders.items()
]

element_vectors = [
    encoder(news[name].to(device))
    for name, encoder in element_encoders.items()
]

In [104]:
all_vectors = text_vectors + [element_vectors[0]]

In [108]:
final_attention = AdditiveAttention(config.query_vector_dim,
                                            config.num_filters)

In [109]:
final_news_vector = final_attention(
    torch.stack(all_vectors, dim=1))

In [110]:
final_news_vector.shape

torch.Size([16, 300])

#### ElementEncoder

In [97]:
class ElementEncoder(torch.nn.Module):
    def __init__(self, embedding, linear_input_dim, linear_output_dim):
        super(ElementEncoder, self).__init__()
        self.embedding = embedding
        self.linear = nn.Linear(linear_input_dim, linear_output_dim)

    def forward(self, element):
        return F.relu(self.linear(self.embedding(element)))

In [98]:
element_encoders_candidates = ['category', 'subcategory']
element_encoders = nn.ModuleDict({
    name:
    ElementEncoder(category_embedding, config.category_embedding_dim,
                    config.num_filters)
    for name in (set(config.dataset_attributes['news'])
                    & set(element_encoders_candidates))
})

### Continue

In [None]:
if model_name != 'Exp1':
    print(model)
else:
    print(models[0])

NAML(
  (news_encoder): NewsEncoder(
    (text_encoders): ModuleDict(
      (abstract): TextEncoder(
        (word_embedding): Embedding(70975, 300, padding_idx=0)
        (CNN): Conv2d(1, 300, kernel_size=(3, 300), stride=(1, 1), padding=(1, 0))
        (additive_attention): AdditiveAttention(
          (linear): Linear(in_features=300, out_features=200, bias=True)
        )
      )
      (title): TextEncoder(
        (word_embedding): Embedding(70975, 300, padding_idx=0)
        (CNN): Conv2d(1, 300, kernel_size=(3, 300), stride=(1, 1), padding=(1, 0))
        (additive_attention): AdditiveAttention(
          (linear): Linear(in_features=300, out_features=200, bias=True)
        )
      )
    )
    (element_encoders): ModuleDict(
      (category): ElementEncoder(
        (embedding): Embedding(275, 100, padding_idx=0)
        (linear): Linear(in_features=100, out_features=300, bias=True)
      )
      (subcategory): ElementEncoder(
        (embedding): Embedding(275, 100, padding_id

In [40]:
dataset = BaseDataset('../data/train/behaviors_parsed.tsv',
                        '../data/train/news_parsed.tsv')

In [41]:
dataloader = iter(
    DataLoader(dataset,
               batch_size=config.batch_size,
               shuffle=True,
               num_workers=config.num_workers,
               drop_last=True,
               pin_memory=True))

In [42]:
criterion = nn.NLLLoss()
optimizers = [torch.optim.Adam(model.parameters(), lr=config.learning_rate)]

In [43]:
start_time = time.time()
loss_full = []
exhaustion_count = 0
step = 0
#early_stopping = EarlyStopping()

In [44]:
minibatch = next(dataloader)

In [60]:
y_pred = model(minibatch["candidate_news"], minibatch["clicked_news"])
y = torch.zeros(len(y_pred)).long().to(device)
loss = criterion(y_pred, y)

loss_full.append(loss.item())

for optimizer in optimizers:
    optimizer.zero_grad()

loss.backward()

#### Forward

In [61]:
candidate_news = minibatch["candidate_news"]
clicked_news = minibatch["clicked_news"]

In [111]:
candidate_news_vector = torch.stack(
    [news_encoder(x) for x in candidate_news], dim=1)

In [116]:
clicked_news_vector = torch.stack(
    [news_encoder(x) for x in clicked_news], dim=1)

In [121]:
user_vector = final_attention(clicked_news_vector)

In [123]:
candidate_news_vector.shape

torch.Size([16, 3, 300])

In [124]:
user_vector.shape

torch.Size([16, 300])

In [126]:
click_predictor = DotProductClickPredictor()

In [127]:
click_predictor(candidate_news_vector, user_vector)

tensor([[ 9.9181, 10.3862, 12.7622],
        [10.1982, 11.1527, 11.3974],
        [ 9.6340,  9.9506,  9.5034],
        [12.4641, 13.4475, 13.3934],
        [ 9.8207,  8.7124,  9.1460],
        [11.2624, 11.1223, 10.4270],
        [ 7.8568, 11.3698,  9.9877],
        [ 9.5206, 12.4250, 11.4712],
        [12.4032, 11.8798, 12.1187],
        [ 9.5549, 12.2385, 11.8226],
        [10.1315,  8.4419, 10.5272],
        [ 9.0040, 10.6466,  8.2396],
        [ 8.0947,  9.7876,  9.1364],
        [12.4731, 11.5728, 12.8593],
        [12.3729, 14.5744, 13.6423],
        [11.8174, 13.3805, 10.9955]], grad_fn=<SqueezeBackward1>)

#### Continue

In [None]:
for optimizer in optimizers:
    optimizer.step()

In [None]:
config.num_batches_validate

1000

In [None]:
minibatch["candidate_news"][0]

{'category': tensor([ 9,  9,  1, 13,  1,  1, 17, 40, 40, 11, 40, 81,  3,  1, 13, 33,  3, 40,
         59,  1,  3, 81, 40,  9,  1,  3,  5,  9,  5, 31,  3, 17,  5,  5,  9, 13,
          9,  5,  5, 33,  9,  1, 40, 40, 33, 31, 81, 59,  1, 40,  5,  5, 31,  3,
          5,  5,  3,  5, 40, 45, 13, 81,  5,  5, 59,  9, 17,  5,  9, 40, 81,  5,
          1, 31,  5,  9,  5,  1,  1,  5,  5, 33, 40,  5,  3,  3,  5,  5,  5, 40,
         40,  5,  5, 17,  5,  1, 81,  3, 22, 17,  1,  9,  5, 40, 59, 81, 17,  9,
          5,  9,  5,  5, 40,  5,  5, 13, 59, 31,  5, 22, 11,  9,  9, 40,  5,  1,
          3, 40]),
 'subcategory': tensor([200,  10,   2, 108,  36,   2,  27,  41,  41,  12,  64,  82,  16,   7,
          21,  43,  16,  94, 103,  36,  70, 235,  64,  10,   2,  20,   6,  10,
         101,  47,  20,  71,  68,   6,  10, 108, 200,  68, 101,  43,  75, 155,
         176,  41,  43,  32, 125,  89, 100,  94,  25,  68, 109,  20,  74,  68,
          70,  68,  64,  97, 108, 125,  68,  74,  89,  49,  27,  68,  3