In [1]:
import pandas as pd
import numpy as np

import torch
import torch.nn as nn
from torch.nn.utils.rnn import pack_padded_sequence, pad_packed_sequence
from transformers import AutoTokenizer
from torch.optim import Adam
import evaluate


from torch.utils.data import Dataset, DataLoader
from torch.nn.utils.rnn import pad_sequence
from tqdm import tqdm

  from .autonotebook import tqdm as notebook_tqdm


### Data

In [2]:
from src.next_token_dataset import AutoTextDataset, collate_fn

In [3]:
train, val, test = pd.read_csv('data/train.csv'), pd.read_csv('data/val.csv'), pd.read_csv('data/test.csv')

In [4]:
trainds, valds, testds = AutoTextDataset(train), AutoTextDataset(val), AutoTextDataset(test)

In [5]:
train_laoder, val_loader, test_loader = DataLoader(trainds, shuffle=True, batch_size=64 ,collate_fn=collate_fn),\
                                        DataLoader(valds, shuffle=False, batch_size=32 ,collate_fn=collate_fn),\
                                        DataLoader(testds, shuffle=False, batch_size=32 ,collate_fn=collate_fn)

### Models

In [5]:
from src.gru_model import GRUmodel
import torch

In [6]:
model1 = GRUmodel()
model1.load_state_dict(torch.load('models/gru_model_autotest.pth', map_location='cuda'))
model1

GRUmodel(
  (emb): Embedding(50257, 128, padding_idx=50256)
  (gru): GRU(128, 64, batch_first=True)
  (fc): Linear(in_features=64, out_features=50257, bias=True)
)

In [10]:
sum([p.numel() for p in model1.parameters()])

9736849

for training model move to ```gru_train_solution.ipynb```

### Metrics 

In [8]:
from src.eval_gru import val_loop
device = 'cuda'
criterion = nn.CrossEntropyLoss(ignore_index=50256)
tokenizer = AutoTokenizer.from_pretrained("openai-community/gpt2")

In [9]:
val_loop(val_loader, model1.to('cuda'), criterion, tokenizer, device, num_samples=5)

(4.736642544245048,
 {'rouge1': np.float64(0.11666666666666665),
  'rouge2': np.float64(0.01818181818181818),
  'rougeL': np.float64(0.09984126984126984),
  'rougeLsum': np.float64(0.09984126984126984)},
 [(" should send apple an angry email along the lines of &quot;hey d-bags, 'we fixed your computer' generally means you actually did something.&quot;.",
   " you're too. &lt;3. i'm so excited. i barely have a good day"),
  ('ad munchkin today...why is fort william so far away? stupid fort william. also, stupid jimmy chungs only doing breakfast in jul/aug',
   "ustaaaaaah. but i'm so tired. no sleep in my house. thx fer"),
  (" only just following me!! tut tut!! how's life?? we av neva met up since i've been bk to bpool and ur leaving soon!",
   ' &lt;3 miss you guys! hahahhaha! ;) night xoxo:'),
  ('!? nooo, you need to get it she has twitter? haha cool. i love you too denisse.&lt;3',
   ' fame. xxx â\x99¥ love you! â\xa0 itâ´s'),
  ('orders, why do you put out a coupon that is only go

In [11]:
val_loop(test_loader, model1.to('cuda'), criterion, tokenizer, device, num_samples=5)

(4.736921673189635,
 {'rouge1': np.float64(0.056582234042284504),
  'rouge2': np.float64(0.0),
  'rougeL': np.float64(0.055754577117066606),
  'rougeLsum': np.float64(0.0576369472921197)},
 [("iinq and waitinq to leave naenae's house ! i do not wanna stay here ; ! borinq as _____x :[ : i wish i was at home ! im nvr home tho",
   ' nyd sis tho. my bf got outta his apt. :/shii'),
  (" i'm going 2 go 2 the cinco de mayo fiesta at centennial olympic park today. cinco won't be the same w/ out the ls's though",
   ' :| lol. i like sundays. it was goodly too long fun. wait until again'),
  (' comes the sun! come to nectar for happy hour w/ dj sosa of goods crew for throwback cuts, food &amp; drink specials! party on the patio',
   ' nd it! wimma hunt! and ur not? xx xx xxx love you.'),
  ("'re such a pig dil... did u know i ate 3 slices of cake n a bunch of sushissss tday for lunch sad faceeee!!!",
   ' ur awesome song!! xxo!! i love u. aww i love ya.&lt;'),
  (" thought of going to uni but i

### GPT2

In [1]:
from src.eval_transformer_pipeline import run_gpt2

  from .autonotebook import tqdm as notebook_tqdm


In [12]:
from transformers import AutoModelForCausalLM
model = AutoModelForCausalLM.from_pretrained("openai-community/gpt2")
sum([p.numel() for p in model.parameters()])

124439808

In [None]:
run_gpt2() 

  0%|          | 0/5252 [00:00<?, ?it/s]The attention mask is not set and cannot be inferred from input because pad token is same as eos token. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
100%|██████████| 5252/5252 [08:22<00:00, 10.46it/s] 


Val — Loss: 5.3497, ROUGE-1: 0.1102, ROUGE-2: 0.0208,  ROUGE-L: 0.0747


100%|██████████| 5252/5252 [07:02<00:00, 12.42it/s] 


Test — Loss: 5.3497, ROUGE-1: 0.1171, ROUGE-1: 0.0351, ROUGE-L: 0.1004


### Результаты 

Относительная неглубовая GRU модель (9736849 параметров), обученная на 16 эпох, показала на val по всем ключевым метрикам лучше результат, чем предобученный gpt2 (124439808 параметров, что в 13 раз больше первой модели).
По val: 
- Loss 4.7366 и 5.3497 
- ROUGE-1: 0.1167 и 0.1102

По test:
- Loss 4.7369 и 5.3497 
- ROUGE-1: 0.0566 и 0.1171
- ROUGE-2: 0.0 и 0.0351
- ROUGE-L: 0.05575 и 0.1004

Однако, на тесте по метрике ROUGE-1 наблюдаем ухудшение метрик, что говорит о переобучении модели. =(. Лучше всего улучшать модель регуляризацией, взять AdamW, добавить dropout. Хотя по лосс в модели не наблюдается переобучение. 