In [1]:
import re
import random
from random import shuffle
from itertools import chain
from collections import Counter, defaultdict
from string import ascii_letters as alu # ascii_lowercase + ascii_uppercase
from string import ascii_lowercase as al # abcdefghijklmnopqrstuvwxyz
from string import ascii_uppercase as au # ABCDEFGHIJKLMNOPQRSTUVWXYZ
from string import digits # '0123456789'
from string import hexdigits # '0123456789abcdefABCDEF'
from string import octdigits # '01234567'
from string import punctuation # !"#$%&'()*+,-./:;<=>?@[\]^_`{|}~.

import numpy as np
import pandas as pd
import polars as pl
from tqdm import tqdm
from sklearn.model_selection import train_test_split
from gensim.models import KeyedVectors
from gensim.models.keyedvectors import KeyedVectors as Vectors
import torch
from torch import nn
from torch.nn import functional as F
from torch import optim
from torch.optim import lr_scheduler
from torch.utils.data import Dataset, DataLoader
from transformers import BertTokenizer, BertModel, BertForSequenceClassification

seed = 42
random.seed(seed)
np.random.seed(seed)
torch.manual_seed(seed)


<torch._C.Generator at 0x7fa0d1913970>

## 80

In [2]:
with open('train.txt') as f:
  train = [l.rstrip() for l in f.readlines()]

with open('valid.txt') as f:
  valid = [l.rstrip() for l in f.readlines()]

with open('test.txt') as f:
  test = [l.rstrip() for l in f.readlines()]

for data in [train, valid, test]:
  for i in range(len(data)):
    data[i] = data[i].split('\t')

columns = ['id', 'title', 'url', 'publisher', 'category', 'story', 'hostname', 'timestamp']
num_cols = ['id', 'timestamp']

train_df = pl.DataFrame(train, orient='row', schema=columns)
train_df = train_df.with_columns([pl.col(col).cast(int) for col in num_cols])

valid_df = pl.DataFrame(valid, orient='row', schema=columns)
valid_df = valid_df.with_columns([pl.col(col).cast(int) for col in num_cols])

test_df = pl.DataFrame(test, orient='row', schema=columns)
test_df = test_df.with_columns([pl.col(col).cast(int) for col in num_cols])


In [3]:
pattern = re.compile(f'[{punctuation}\n]')
df = pl.concat([train_df, valid_df, test_df])
title = df['title'].to_list()
labels = df['category'].to_numpy()
title = [i.split() for i in title]
words = list(chain.from_iterable(title))
words = [pattern.sub('', t) for t in words]
words = [t for t in words if t]
words = list(dict(Counter(words)).items())
words.sort(key=lambda x: x[0])
words.sort(key=lambda x: x[1], reverse=True)
rank = defaultdict(int)

i = 1
for k, v in words:
  if v >= 2:
    rank[k] = i
    i += 1
  else:
    rank[k] = 0

rank


defaultdict(int,
            {'to': 1,
             'in': 2,
             'on': 3,
             'as': 4,
             'UPDATE': 5,
             'for': 6,
             'of': 7,
             'The': 8,
             'US': 9,
             'To': 10,
             'the': 11,
             'and': 12,
             'In': 13,
             'Of': 14,
             'at': 15,
             'a': 16,
             'With': 17,
             'Is': 18,
             'A': 19,
             'For': 20,
             'with': 21,
             'And': 22,
             'after': 23,
             'New': 24,
             'Kardashian': 25,
             'On': 26,
             'by': 27,
             'Kim': 28,
             'After': 29,
             'up': 30,
             'says': 31,
             '1': 32,
             'is': 33,
             'At': 34,
             'China': 35,
             'From': 36,
             'new': 37,
             'from': 38,
             '2': 39,
             'Says': 40,
             'her': 41,
          

## 81

In [4]:
category_map = {
  'b': 0,
  't': 1,
  'e': 2,
  'm': 3,
}

def convert(text: str):
  return torch.tensor([rank[i] for i in text])

X = title[:]
X_ = [convert(t) for t in X]
X_ = nn.utils.rnn.pad_sequence(X_, batch_first=True)

y = [category_map[i] for i in labels]
y = torch.tensor(y)

X_train, X_valtest = X_[:len(train)], X_[len(train):]
X_valid, X_test = X_[:len(valid)], X_[len(valid):]
y_train, y_valtest = y[:len(train)], y[len(train):]
y_valid, y_test = y[:len(valid)], y[len(valid):]


In [6]:
class RNN(nn.Module):
  def __init__(self, V, dw, dh, out_features, *args, **kwargs):
    super().__init__(*args, **kwargs)
    self.embed = nn.Embedding(V, dw, padding_idx=V-1)
    self.rnn = nn.RNN(dw, dh, batch_first=True, nonlinearity='relu')
    self.fc1 = nn.Linear(dh, out_features)
    nn.init.kaiming_normal_(self.rnn.weight_ih_l0)
    nn.init.kaiming_normal_(self.rnn.weight_hh_l0)
    nn.init.kaiming_normal_(self.fc1.weight)

  def forward(self, x):
    x = self.embed(x)
    x, _ = self.rnn(x)
    x = self.fc1(x[:, -1, :])
    return x


In [7]:
V = len(rank)+1
dw = 300
dh = 50
out_features = 4
model = RNN(V, dw, dh, out_features)
y_pred = model(X_train)
y_pred = torch.argmax(torch.softmax(y_pred, dim=1), axis=1)
print(f'accuracy: {sum(y_train==y_pred)/len(y_train)}')


accuracy: 0.13562336564064026


## 82

In [15]:
class NewsDataset(Dataset):
  def __init__(self, X, y) -> None:
    self.X = X
    self.y = y

  def __len__(self):
    return len(self.X)

  def __getitem__(self, idx):
    return (self.X[idx], self.y[idx])


In [77]:
dataloader_train = DataLoader(NewsDataset(X_train, y_train), shuffle=True)
dataloader_valid = DataLoader(NewsDataset(X_valid, y_valid))
dataloader_test = DataLoader(NewsDataset(X_test, y_test))


In [78]:
model = RNN(V, dw, dh, out_features)
optimizer = optim.SGD(params=model.parameters())
criterion = nn.CrossEntropyLoss()

for epoch in range(10):
  model.train()
  loss_train = 0
  correct = 0
  for X, y in dataloader_train:
    optimizer.zero_grad()
    y_pred = model(X)

    loss = criterion(y_pred, y)
    loss.backward()
    optimizer.step()

    y_pred = torch.softmax(y_pred, dim=1)
    y_pred = y_pred.argmax(dim=1)
    correct += torch.sum(y_pred == y).item()

    loss_train += loss.item()

  loss_train /= len(dataloader_train.dataset)
  acc_train = correct / len(dataloader_train.dataset)

  model.eval()
  loss_valid = 0
  correct = 0
  with torch.no_grad():
    for X, y in dataloader_valid:
      y_pred = model(X)
      loss = criterion(y_pred, y)

      y_pred = torch.softmax(y_pred, dim=1)
      y_pred = y_pred.argmax(dim=1)
      correct += torch.sum(y_pred == y).item()

      loss_valid += loss.item()*X.size(0)

  loss_valid /= len(dataloader_valid.dataset)
  acc_valid = correct / len(dataloader_valid.dataset)

  print(f"train loss: {loss_train:>7f}, valid loss: {loss_valid:>7f}, train acc: {acc_train:>7f}, valid acc: {acc_valid:>7f}")


train loss: 1.177050, valid loss: 1.149136, train acc: 0.440565, valid acc: 0.493263
train loss: 1.145101, valid loss: 1.154696, train acc: 0.488487, valid acc: 0.492515
train loss: 1.125455, valid loss: 1.110760, train acc: 0.514695, valid acc: 0.538922
train loss: 1.098434, valid loss: 1.047414, train acc: 0.549607, valid acc: 0.596557
train loss: 1.057028, valid loss: 0.962059, train acc: 0.590603, valid acc: 0.649701
train loss: 0.975652, valid loss: 0.882369, train acc: 0.649101, valid acc: 0.692365
train loss: 0.908520, valid loss: 0.826739, train acc: 0.685324, valid acc: 0.711826
train loss: 0.850112, valid loss: 0.784706, train acc: 0.712654, valid acc: 0.742515
train loss: 0.805069, valid loss: 0.818617, train acc: 0.729783, valid acc: 0.735778
train loss: 0.776296, valid loss: 0.702587, train acc: 0.736241, valid acc: 0.762725


## 83

In [16]:
dataloader_train = DataLoader(NewsDataset(X_train.to('cuda'), y_train.to('cuda')), shuffle=True, batch_size=64)
dataloader_valid = DataLoader(NewsDataset(X_valid.to('cuda'), y_valid.to('cuda')), batch_size=64)
dataloader_test = DataLoader(NewsDataset(X_test.to('cuda'), y_test.to('cuda')), batch_size=64)


In [80]:
model = RNN(V, dw, dh, out_features).to('cuda')
optimizer = optim.SGD(params=model.parameters())
criterion = nn.CrossEntropyLoss().to('cuda')

for epoch in range(10):
  model.train()
  loss_train = 0
  correct = 0
  for X, y in dataloader_train:
    optimizer.zero_grad()
    y_pred = model(X)

    loss = criterion(y_pred, y)
    loss.backward()
    optimizer.step()

    y_pred = torch.softmax(y_pred, dim=1)
    y_pred = y_pred.argmax(dim=1)
    correct += torch.sum(y_pred == y).item()

    loss_train += loss.item()*X.size(0)

  loss_train /= len(dataloader_train.dataset)
  acc_train = correct / len(dataloader_train.dataset)

  model.eval()
  loss_valid = 0
  correct = 0
  with torch.no_grad():
    for X, y in dataloader_valid:
      y_pred = model(X)
      loss = criterion(y_pred, y)

      y_pred = torch.softmax(y_pred, dim=1)
      y_pred = y_pred.argmax(dim=1)
      correct += torch.sum(y_pred == y).item()

      loss_valid += loss.item()*X.size(0)

  loss_valid /= len(dataloader_valid.dataset)
  acc_valid = correct / len(dataloader_valid.dataset)

  print(f"train loss: {loss_train:>7f}, valid loss: {loss_valid:>7f}, train acc: {acc_train:>7f}, valid acc: {acc_valid:>7f}")


train loss: 1.227665, valid loss: 1.159218, train acc: 0.426526, valid acc: 0.443862
train loss: 1.161343, valid loss: 1.155567, train acc: 0.451329, valid acc: 0.455838
train loss: 1.156402, valid loss: 1.149515, train acc: 0.460970, valid acc: 0.478293
train loss: 1.153096, valid loss: 1.145550, train acc: 0.471640, valid acc: 0.494012
train loss: 1.148491, valid loss: 1.143850, train acc: 0.481187, valid acc: 0.488024
train loss: 1.146056, valid loss: 1.145004, train acc: 0.483433, valid acc: 0.490269
train loss: 1.143856, valid loss: 1.139306, train acc: 0.490827, valid acc: 0.499251
train loss: 1.142033, valid loss: 1.143545, train acc: 0.489985, valid acc: 0.489521
train loss: 1.140609, valid loss: 1.137088, train acc: 0.492980, valid acc: 0.501497
train loss: 1.138735, valid loss: 1.136735, train acc: 0.494478, valid acc: 0.505240


## 84

In [17]:
gen: Vectors = KeyedVectors.load_word2vec_format('GoogleNews-vectors-negative300.bin', binary=True)


In [82]:
class RNN(nn.Module):
  def __init__(self, V, dw, dh, out_features, weight=None, *args, **kwargs):
    super().__init__(*args, **kwargs)
    if weight is not None:
      self.embed = nn.Embedding.from_pretrained(weight, freeze=False, padding_idx=V-1)
    else:
      self.embed = nn.Embedding(V, dw, padding_idx=V-1)
    self.rnn = nn.RNN(dw, dh, batch_first=True, nonlinearity='relu')
    self.fc1 = nn.Linear(dh, out_features)
    nn.init.kaiming_normal_(self.rnn.weight_ih_l0)
    nn.init.kaiming_normal_(self.rnn.weight_hh_l0)
    nn.init.kaiming_normal_(self.fc1.weight)

  def forward(self, x):
    x = self.embed(x)
    x, _ = self.rnn(x)
    x = self.fc1(x[:, -1, :])
    return x


In [18]:
weight = torch.zeros(V+1, dw).to('cuda')

for k, v in rank.items():
  if v == 0:
    continue

  try:
    weight[v] = torch.tensor(gen[k]).to('cuda')
  except:
    weight[v] = torch.randn(dw).to('cuda')


In [84]:
model = RNN(V, dw, dh, out_features, weight).to('cuda')
optimizer = optim.SGD(params=model.parameters())
criterion = nn.CrossEntropyLoss().to('cuda')

for epoch in range(100):
  model.train()
  loss_train = 0
  correct = 0
  for X, y in dataloader_train:
    optimizer.zero_grad()
    y_pred = model(X)

    loss = criterion(y_pred, y)
    loss.backward()
    optimizer.step()

    y_pred = torch.softmax(y_pred, dim=1)
    y_pred = y_pred.argmax(dim=1)
    correct += torch.sum(y_pred == y).item()

    loss_train += loss.item()*X.size(0)

  loss_train /= len(dataloader_train.dataset)
  acc_train = correct / len(dataloader_train.dataset)

  model.eval()
  loss_valid = 0
  correct = 0
  with torch.no_grad():
    for X, y in dataloader_valid:
      y_pred = model(X)
      loss = criterion(y_pred, y)

      y_pred = torch.softmax(y_pred, dim=1)
      y_pred = y_pred.argmax(dim=1)
      correct += torch.sum(y_pred == y).item()

      loss_valid += loss.item()*X.size(0)

  loss_valid /= len(dataloader_valid.dataset)
  acc_valid = correct / len(dataloader_valid.dataset)

  print(f"train loss: {loss_train:>7f}, valid loss: {loss_valid:>7f}, train acc: {acc_train:>7f}, valid acc: {acc_valid:>7f}")


train loss: 1.418830, valid loss: 1.309550, train acc: 0.291838, valid acc: 0.385479
train loss: 1.284337, valid loss: 1.264725, train acc: 0.401722, valid acc: 0.417665
train loss: 1.246413, valid loss: 1.235283, train acc: 0.441595, valid acc: 0.431886
train loss: 1.220367, valid loss: 1.212662, train acc: 0.443748, valid acc: 0.437126
train loss: 1.199037, valid loss: 1.193046, train acc: 0.441969, valid acc: 0.428892
train loss: 1.181754, valid loss: 1.177773, train acc: 0.445994, valid acc: 0.436377
train loss: 1.169650, valid loss: 1.166854, train acc: 0.444684, valid acc: 0.431886
train loss: 1.162125, valid loss: 1.159914, train acc: 0.447304, valid acc: 0.438623
train loss: 1.158211, valid loss: 1.155756, train acc: 0.454137, valid acc: 0.447605
train loss: 1.155773, valid loss: 1.152606, train acc: 0.455541, valid acc: 0.453593
train loss: 1.153944, valid loss: 1.150444, train acc: 0.460876, valid acc: 0.455090
train loss: 1.152468, valid loss: 1.148561, train acc: 0.462561, 

## 85

In [10]:
class RNN(nn.Module):
  def __init__(self, V, dw, dh, out_features, weight=None, *args, **kwargs):
    super().__init__(*args, **kwargs)
    if weight is not None:
      self.embed = nn.Embedding.from_pretrained(weight, freeze=False, padding_idx=V-1)
    else:
      self.embed = nn.Embedding(V, dw, padding_idx=V-1)
    self.rnn = nn.RNN(dw, dh, batch_first=True, nonlinearity='relu', bidirectional=True)
    self.fc1 = nn.Linear(dh*2, out_features)
    nn.init.kaiming_normal_(self.rnn.weight_ih_l0)
    nn.init.kaiming_normal_(self.rnn.weight_hh_l0)
    nn.init.kaiming_normal_(self.rnn.weight_ih_l0_reverse)
    nn.init.kaiming_normal_(self.rnn.weight_hh_l0_reverse)
    nn.init.kaiming_normal_(self.fc1.weight)

  def forward(self, x):
    x = self.embed(x)
    _, x = self.rnn(x)
    rnn_out = torch.cat([x[-2,:,:], x[-1,:,:]], dim=1)
    x = self.fc1(rnn_out)
    return x


In [86]:
model = RNN(V, dw, dh, out_features, weight).to('cuda')
optimizer = optim.SGD(params=model.parameters())
criterion = nn.CrossEntropyLoss().to('cuda')

for epoch in range(100):
  model.train()
  loss_train = 0
  correct = 0
  for X, y in dataloader_train:
    optimizer.zero_grad()
    y_pred = model(X)

    loss = criterion(y_pred, y)
    loss.backward()
    optimizer.step()

    y_pred = torch.softmax(y_pred, dim=1)
    y_pred = y_pred.argmax(dim=1)
    correct += torch.sum(y_pred == y).item()

    loss_train += loss.item()*X.size(0)

  loss_train /= len(dataloader_train.dataset)
  acc_train = correct / len(dataloader_train.dataset)

  model.eval()
  loss_valid = 0
  correct = 0
  with torch.no_grad():
    for X, y in dataloader_valid:
      y_pred = model(X)
      loss = criterion(y_pred, y)

      y_pred = torch.softmax(y_pred, dim=1)
      y_pred = y_pred.argmax(dim=1)
      correct += torch.sum(y_pred == y).item()

      loss_valid += loss.item()*X.size(0)

  loss_valid /= len(dataloader_valid.dataset)
  acc_valid = correct / len(dataloader_valid.dataset)

  print(f"train loss: {loss_train:>7f}, valid loss: {loss_valid:>7f}, train acc: {acc_train:>7f}, valid acc: {acc_valid:>7f}")


train loss: 1.294980, valid loss: 1.191153, train acc: 0.364283, valid acc: 0.402695
train loss: 1.176916, valid loss: 1.154146, train acc: 0.435230, valid acc: 0.454341
train loss: 1.151639, valid loss: 1.137687, train acc: 0.466492, valid acc: 0.475299
train loss: 1.135063, valid loss: 1.121828, train acc: 0.492980, valid acc: 0.498503
train loss: 1.120134, valid loss: 1.107513, train acc: 0.515724, valid acc: 0.526946
train loss: 1.105861, valid loss: 1.094146, train acc: 0.535848, valid acc: 0.550150
train loss: 1.091950, valid loss: 1.079175, train acc: 0.554661, valid acc: 0.572605
train loss: 1.077744, valid loss: 1.063068, train acc: 0.572070, valid acc: 0.592066
train loss: 1.061610, valid loss: 1.045625, train acc: 0.590884, valid acc: 0.613772
train loss: 1.041654, valid loss: 1.018753, train acc: 0.608574, valid acc: 0.633234
train loss: 1.015277, valid loss: 0.981156, train acc: 0.627668, valid acc: 0.648204
train loss: 0.974787, valid loss: 0.936469, train acc: 0.645638, 

In [56]:
class CNN(nn.Module):
  def __init__(self, V, dw, dh, out_features, weight=None, *args, **kwargs):
    super().__init__(*args, **kwargs)
    if weight is not None:
      self.embed = nn.Embedding.from_pretrained(weight, freeze=False, padding_idx=V-1)
    else:
      self.embed = nn.Embedding(V, dw, padding_idx=V-1)
    self.conv = nn.Conv2d(1, dh, (3, dw), padding=(1, 0))
    self.relu = nn.ReLU()
    # self.pool = nn.MaxPool1d(20)
    self.fc1 = nn.Linear(dh, out_features)

  def forward(self, x: torch.Tensor):
    x = self.embed(x)
    x = x.unsqueeze(1)
    x = self.conv(x).squeeze(3)
    x = self.relu(x)
    x = F.max_pool1d(x, x.size(2)).squeeze(2)
    x = self.fc1(x)
    return x


In [64]:
model = CNN(V, dw, dh, out_features, weight).to('cuda')
for X, y in dataloader_train:
  y_pred = model(X)
  torch.softmax(y_pred, dim=1)
  display(y_pred)
  break


tensor([[ 0.1705,  0.2425,  0.0852,  0.5006],
        [-0.0130,  0.1008, -0.1564,  0.3319],
        [ 0.0647,  0.1928, -0.0469,  0.2969],
        [ 0.0374,  0.1055, -0.0766,  0.2168],
        [ 0.0760,  0.1008, -0.1489,  0.3028],
        [ 0.1728,  0.2770,  0.1563,  0.5495],
        [ 0.0044,  0.1348, -0.0107,  0.3237],
        [-0.1548,  0.4377,  0.1412,  0.7650],
        [-0.0468,  0.2362, -0.0852,  0.5560],
        [ 0.0389,  0.0888, -0.0913,  0.3029],
        [ 0.0856,  0.0453, -0.1358,  0.2373],
        [ 0.0625,  0.0419, -0.1638,  0.1898],
        [ 0.0566,  0.0712, -0.1270,  0.2444],
        [ 0.0778,  0.1704,  0.1620,  0.5226],
        [-0.2187,  0.5162,  0.0549,  0.8411],
        [-0.0759,  0.2525, -0.0608,  0.5597],
        [-0.0822,  0.5950,  0.1293,  0.9196],
        [ 0.0850,  0.1614, -0.0778,  0.3432],
        [-0.2561,  0.6025,  0.1409,  0.6821],
        [ 0.0056,  0.0151, -0.1740,  0.2810],
        [ 0.0099,  0.1329, -0.0802,  0.2953],
        [ 0.0504,  0.0244, -0.1391

## 87

In [60]:
model = CNN(V, dw, dh, out_features, weight).to('cuda')
optimizer = optim.SGD(params=model.parameters())
criterion = nn.CrossEntropyLoss().to('cuda')

for epoch in range(100):
  model.train()
  loss_train = 0
  correct = 0
  for X, y in dataloader_train:
    optimizer.zero_grad()
    y_pred = model(X)

    loss = criterion(y_pred, y)
    loss.backward()
    optimizer.step()

    y_pred = torch.softmax(y_pred, dim=1)
    y_pred = y_pred.argmax(dim=1)
    correct += torch.sum(y_pred == y).item()

    loss_train += loss.item()*X.size(0)

  loss_train /= len(dataloader_train.dataset)
  acc_train = correct / len(dataloader_train.dataset)

  model.eval()
  loss_valid = 0
  correct = 0
  with torch.no_grad():
    for X, y in dataloader_valid:
      y_pred = model(X)
      loss = criterion(y_pred, y)

      y_pred = torch.softmax(y_pred, dim=1)
      y_pred = y_pred.argmax(dim=1)
      correct += torch.sum(y_pred == y).item()

      loss_valid += loss.item()*X.size(0)

  loss_valid /= len(dataloader_valid.dataset)
  acc_valid = correct / len(dataloader_valid.dataset)

  print(f"train loss: {loss_train:>7f}, valid loss: {loss_valid:>7f}, train acc: {acc_train:>7f}, valid acc: {acc_valid:>7f}")


train loss: 1.394992, valid loss: 1.333407, train acc: 0.172033, valid acc: 0.369012
train loss: 1.301437, valid loss: 1.268069, train acc: 0.520872, valid acc: 0.567365
train loss: 1.251935, valid loss: 1.229192, train acc: 0.601367, valid acc: 0.617515
train loss: 1.220411, valid loss: 1.202487, train acc: 0.635436, valid acc: 0.654940
train loss: 1.197438, valid loss: 1.181763, train acc: 0.654156, valid acc: 0.678144
train loss: 1.179151, valid loss: 1.164771, train acc: 0.663796, valid acc: 0.687874
train loss: 1.163755, valid loss: 1.150064, train acc: 0.669319, valid acc: 0.689371
train loss: 1.150191, valid loss: 1.136745, train acc: 0.672969, valid acc: 0.690868
train loss: 1.137763, valid loss: 1.124402, train acc: 0.675683, valid acc: 0.694611
train loss: 1.126097, valid loss: 1.112588, train acc: 0.676619, valid acc: 0.698353
train loss: 1.114948, valid loss: 1.101330, train acc: 0.681486, valid acc: 0.699102
train loss: 1.104127, valid loss: 1.090289, train acc: 0.682329, 

## 88

In [8]:
class EarlyStopping:
  def __init__(self, patience=10, verbose=False, path='model/checkpoint.pth'):
    self.patience = patience
    self.verbose = verbose
    self.counter = 0
    self.early_stop = False
    self.best = torch.inf
    self.path = path

  def __call__(self, val_loss, model):
    if val_loss < self.best:
      self.checkpoint(val_loss, model)
      self.counter = 0
    else:
      self.counter += 1
      if self.verbose:
        print(f'EarlyStopping counter: {self.counter} out of {self.patience}')
      if self.counter >= self.patience:
        self.early_stop = True

  def checkpoint(self, val_loss, model):
    if self.verbose:
        print(f'Validation loss decreased ({self.best:.6f} --> {val_loss:.6f}).  Saving model ...')
    torch.save(model.state_dict(), self.path)
    self.best = val_loss


In [66]:
num_epochs = 300

model = CNN(V, dw, dh, out_features, weight).to('cuda')
optimizer = optim.AdamW(params=model.parameters())
scheduler = lr_scheduler.CosineAnnealingLR(optimizer, num_epochs, 1e-5)
criterion = nn.CrossEntropyLoss().to('cuda')
early_stopping = EarlyStopping()

for epoch in range(num_epochs):
  model.train()
  loss_train = 0
  correct = 0
  for X, y in dataloader_train:
    optimizer.zero_grad()
    y_pred = model(X)

    loss = criterion(y_pred, y)
    loss.backward()
    optimizer.step()

    y_pred = torch.softmax(y_pred, dim=1)
    y_pred = y_pred.argmax(dim=1)
    correct += torch.sum(y_pred == y).item()

    loss_train += loss.item()*X.size(0)

  loss_train /= len(dataloader_train.dataset)
  acc_train = correct / len(dataloader_train.dataset)

  model.eval()
  loss_valid = 0
  correct = 0
  with torch.no_grad():
    for X, y in dataloader_valid:
      y_pred = model(X)
      loss = criterion(y_pred, y)

      y_pred = torch.softmax(y_pred, dim=1)
      y_pred = y_pred.argmax(dim=1)
      correct += torch.sum(y_pred == y).item()

      loss_valid += loss.item()*X.size(0)

  loss_valid /= len(dataloader_valid.dataset)
  acc_valid = correct / len(dataloader_valid.dataset)

  print(f"train loss: {loss_train:>7f}, valid loss: {loss_valid:>7f}, train acc: {acc_train:>7f}, valid acc: {acc_valid:>7f}")

  early_stopping(loss_valid, model)
  if early_stopping.early_stop:
    print('-- Early Stopping --')
    break

  scheduler.step()


train loss: 0.271921, valid loss: 0.029629, train acc: 0.931018, valid acc: 0.997006
train loss: 0.020503, valid loss: 0.010149, train acc: 0.996162, valid acc: 0.998503
train loss: 0.009222, valid loss: 0.007931, train acc: 0.997847, valid acc: 0.998503
train loss: 0.006706, valid loss: 0.007182, train acc: 0.998222, valid acc: 0.997006
train loss: 0.006014, valid loss: 0.008315, train acc: 0.998222, valid acc: 0.997006
train loss: 0.005506, valid loss: 0.006582, train acc: 0.998222, valid acc: 0.997006
train loss: 0.006027, valid loss: 0.004798, train acc: 0.997941, valid acc: 0.997754
train loss: 0.005650, valid loss: 0.007021, train acc: 0.997847, valid acc: 0.997006
train loss: 0.004571, valid loss: 0.006354, train acc: 0.998409, valid acc: 0.997006
train loss: 0.005551, valid loss: 0.005732, train acc: 0.998222, valid acc: 0.997754
train loss: 0.005156, valid loss: 0.003844, train acc: 0.998502, valid acc: 0.998503
train loss: 0.004689, valid loss: 0.007049, train acc: 0.998409, 

## 89

In [25]:
class NewsDataset(Dataset):
  def __init__(self, X, y, tokenizer, max_len) -> None:
    self.X = X
    self.y = y
    self.tokenizer = tokenizer
    self.max_len = max_len

  def __len__(self):
    return len(self.X)

  def __getitem__(self, idx):
    text = self.X[idx]
    token = self.tokenizer.encode_plus(text, max_length=self.max_len, padding='max_length', truncation=True, return_tensors="pt")
    input_ids = token['input_ids']
    token_type_ids = token['token_type_ids']
    attention_mask = token['attention_mask']

    return {
      'input_ids': torch.LongTensor(input_ids).squeeze(0).to('cuda'),
      'token_type_ids': torch.LongTensor(token_type_ids).squeeze(0).to('cuda'),
      'attention_mask': torch.LongTensor(attention_mask).squeeze(0).to('cuda'),
      'labels': torch.Tensor(self.y[idx])
    }


In [28]:
X = title[:]
X_train, X_valtest = X[:len(train)], X[len(train):]
X_valid, X_test = X_valtest[:len(valid)], X_valtest[len(valid):]

y = [category_map[i] for i in labels]
y = torch.tensor(y)
y_train, y_valtest = y[:len(train)], y[len(train):]
y_valid, y_test = y[:len(valid)], y[len(valid):]


In [29]:
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
max_len = 20

dataset_train = NewsDataset(X_train, y_train.to('cuda'), tokenizer, max_len)
dataset_valid = NewsDataset(X_valid, y_valid.to('cuda'), tokenizer, max_len)
dataset_test = NewsDataset(X_test, y_test.to('cuda'), tokenizer, max_len)
dataloader_train = DataLoader(dataset_train, 256, shuffle=True)
dataloader_valid = DataLoader(dataset_valid, 64)
dataloader_test = DataLoader(dataset_test, 64)


In [159]:
tokenizer.encode_plus(X_train[0], max_length=max_len, padding='max_length', truncation=True)


{'input_ids': [101, 100, 100, 100, 100, 100, 100, 100, 100, 100, 3263, 102, 0, 0, 0, 0, 0, 0, 0, 0], 'token_type_ids': [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0]}

In [30]:
class BERT(nn.Module):
  def __init__(self, p, out_features):
    super().__init__()
    self.bert = BertModel.from_pretrained('bert-base-uncased')
    self.drop = nn.Dropout(p)
    self.fc = nn.Linear(768, out_features)

  def forward(self, input_ids, token_type_ids, attention_mask):
    outputs = self.bert(input_ids=input_ids, attention_mask=attention_mask, token_type_ids=token_type_ids)
    last_hidden_state = outputs.last_hidden_state

    expanded_mask = attention_mask.unsqueeze(-1)
    masked_hidden_state = last_hidden_state*expanded_mask

    max_pooled = torch.max(masked_hidden_state + (1-expanded_mask)*-1e9, dim=1)[0]
    max_pooled = self.drop(max_pooled)

    x = self.fc(self.drop(max_pooled))
    return x


In [43]:
num_epochs = 300

model = BERT(0.3, 4).to('cuda')
# model = BertForSequenceClassification.from_pretrained('bert-base-uncased', num_labels=4).to('cuda')
optimizer = optim.AdamW(model.parameters(), 1e-5)
# scheduler = lr_scheduler.CosineAnnealingLR(optimizer, num_epochs, 1e-5)
criterion = nn.CrossEntropyLoss()
early_stopping = EarlyStopping()
s = set()

for epoch in range(num_epochs):
  model.train()
  loss_train = 0
  correct = 0
  for data in dataloader_train:

    optimizer.zero_grad()
    y_pred = model(input_ids=data['input_ids'], token_type_ids=data['token_type_ids'], attention_mask=data['attention_mask'])

    loss = criterion(y_pred, data['labels'])
    loss.backward()
    optimizer.step()

    y_pred = torch.softmax(y_pred, dim=1)
    y_pred = y_pred.argmax(dim=1)
    correct += torch.sum(y_pred == data['labels']).item()

    loss_train += loss.item()*data['input_ids'].size(0)

  loss_train /= len(dataloader_train.dataset)
  acc_train = correct / len(dataloader_train.dataset)

  model.eval()
  loss_valid = 0
  correct = 0
  with torch.no_grad():
    for data in dataloader_valid:

      y_pred = model(input_ids=data['input_ids'], token_type_ids=data['token_type_ids'], attention_mask=data['attention_mask'])

      y_pred = torch.softmax(y_pred, dim=1)
      y_pred = y_pred.argmax(dim=1)
      correct += torch.sum(y_pred == data['labels']).item()

      loss_valid += loss.item()*data['input_ids'].size(0)

  loss_valid /= len(dataloader_valid.dataset)
  acc_valid = correct / len(dataloader_valid.dataset)

  print(f"train loss: {loss_train:>7f}, valid loss: {loss_valid:>7f}, train acc: {acc_train:>7f}, valid acc: {acc_valid:>7f}")

  early_stopping(loss_valid, model)
  if early_stopping.early_stop:
    print('-- Early Stopping --')
    break

  # scheduler.step()


train loss: 1.068996, valid loss: 0.849781, train acc: 0.575534, valid acc: 0.420659
train loss: 0.811935, valid loss: 0.713185, train acc: 0.709940, valid acc: 0.413922
train loss: 0.721582, valid loss: 0.703723, train acc: 0.740079, valid acc: 0.403443
train loss: 0.667735, valid loss: 0.667069, train acc: 0.760670, valid acc: 0.401198
train loss: 0.634880, valid loss: 0.508331, train acc: 0.770592, valid acc: 0.395958
train loss: 0.608184, valid loss: 0.648385, train acc: 0.778922, valid acc: 0.396707
train loss: 0.585423, valid loss: 0.553524, train acc: 0.787346, valid acc: 0.407186
train loss: 0.567084, valid loss: 0.674349, train acc: 0.795021, valid acc: 0.403443
train loss: 0.548574, valid loss: 0.598040, train acc: 0.802040, valid acc: 0.392216
train loss: 0.525514, valid loss: 0.476576, train acc: 0.808218, valid acc: 0.390719
train loss: 0.507952, valid loss: 0.561907, train acc: 0.816735, valid acc: 0.399701
train loss: 0.496509, valid loss: 0.482738, train acc: 0.820105, 

KeyboardInterrupt: 