# Import

In [1]:
import torch
import numpy as np
import pytorch_lightning as pl
from torch import nn
from torch import optim
from sklearn.model_selection import train_test_split

from models.baseline import Seq2Seq, Encoder, Decoder
from data_utils.dataset import TranslationDataset
from data_utils.lang import read_langs, PAD
from pl_utils.pl_model import ModelWrapper
from pl_utils.pl_dataset import PlTranslationDataset

DEVICE = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
TEST_SHARE = 0.2

%load_ext autoreload
%autoreload 2

# Data Prep

In [2]:
with open("data.txt", 'r') as flines:
    all_lines = np.array(flines.readlines())

test_size = int(TEST_SHARE * len(all_lines))
train_size = len(all_lines) - test_size

train_lines, val_lines = train_test_split(all_lines, test_size=TEST_SHARE, random_state=42)


In [3]:
EN_LANG, RU_LANG, _ = read_langs("en", "ru", list(train_lines))

train_dataset = TranslationDataset(list(train_lines), EN_LANG, RU_LANG)
val_dataset = TranslationDataset(list(val_lines), EN_LANG, RU_LANG)


# Model Train

In [4]:
INPUT_DIM = len(EN_LANG.vocab)
OUTPUT_DIM = len(RU_LANG.vocab)
ENC_EMB_DIM = 256
DEC_EMB_DIM = 256
HID_DIM = 512
N_LAYERS = 2
ENC_DROPOUT = 0.5
DEC_DROPOUT = 0.5

enc = Encoder(INPUT_DIM, ENC_EMB_DIM, HID_DIM, N_LAYERS, ENC_DROPOUT)
dec = Decoder(OUTPUT_DIM, DEC_EMB_DIM, HID_DIM, N_LAYERS, DEC_DROPOUT)

In [5]:
model = Seq2Seq(enc, dec, DEVICE)

In [6]:
def optimizer_fn(model: nn.Module):
    return optim.Adam(model.parameters(), lr=5e-3)

criterion_fn = nn.CrossEntropyLoss(ignore_index=RU_LANG.vocab.get_stoi()[PAD])

In [None]:
%load_ext tensorboard

In [7]:
pl_model = ModelWrapper(model, criterion_fn, optimizer_fn)
pl_dataset = PlTranslationDataset(train_dataset, val_dataset, 128, 128)
checkpoint_callback = pl.callbacks.ModelCheckpoint(monitor="val_loss")
#pip install tensorboard
trainer = pl.Trainer(
    max_epochs=30,
    accelerator="gpu",
    devices=1,
    callbacks=[
        pl.callbacks.early_stopping.EarlyStopping(monitor="val_loss", patience=5),
        pl.callbacks.LearningRateMonitor(logging_interval="step"),
        checkpoint_callback
    ]
)

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs


In [7]:
torch.set_float32_matmul_precision('medium')
trainer.fit(
    pl_model, 
    pl_dataset,
)

NameError: name 'trainer' is not defined