# Imports

In [48]:
import pandas as pd
import numpy as np

# PyTorch related imports
import torch
from torch.nn import *
from torch.nn import functional as F

# PyTorch Lightning related imports
import pytorch_lightning as pl
from pytorch_lightning.callbacks import Callback
from pytorch_lightning.callbacks.early_stopping import EarlyStopping

# metrics
from torchmetrics.functional import mean_absolute_error, mean_squared_error
from metrics.regression_metrics import t_kendalltau, t_pearson, t_spearman

# fnet architecture & RAdam
from fnet.model import FNet
from radam.radam import RAdam

# Custom datamodule
from datamodules.tm_datamodule import TextMiningDataModule

# weights and biases // not working // disabled
import wandb
from pytorch_lightning.loggers import WandbLogger

# ModelCheckpoint fails. Tutorial outdated. 
# Leaving here for improvements in the future
from pytorch_lightning.callbacks import ModelCheckpoint

import time

# Dataset & DataLoader

In [87]:
srt = ["source", "reference", "translation"]
language_pairs = [
    "cs-en",
    "de-en",
    "en-fi",
    "en-zh",
    "ru-en",
    "zh-en",
]
scores = {pair: pd.read_csv(f"corpus/{pair}/scores.csv") for pair in language_pairs}

In [3]:
pair = "de-en"
embedding_ref = torch.from_numpy(np.load(f"corpus/{pair}/laser.reference_embeds.npy")).float()
embedding_src = torch.from_numpy(np.load(f"corpus/{pair}/laser.source_embeds.npy")).float()
embedding_hyp = torch.from_numpy(np.load(f"corpus/{pair}/laser.translation_embeds.npy")).float()
score = torch.tensor(scores[pair]["z-score"]).float()

In [4]:
score.shape

torch.Size([21704])

# Loggers

In [7]:
early_stop_callback = EarlyStopping(monitor='val_loss', patience=5, verbose=False, mode='min')

In [8]:
class TranslationPredictionLogger(Callback):
    def __init__(self, val_samples, num_samples=32):
        super().__init__()
        self.num_samples = num_samples
        self.val_imgs, self.val_score = val_samples

    def on_validation_batch_end(
        self, trainer, pl_module,
    ):
        val_imgs = self.val_imgs.to(device=pl_module.device)
        val_score = self.val_score.to(device=pl_module.device)

        predictions = pl_module(val_imgs)
        trainer.logger.experiment.log(
            {
                "examples": [
                    wandb.Image(x, caption=f"Prediction:{p}, Score: {y}")
                    for x, p, y in zip(
                        val_imgs[: self.num_samples],
                        predictions[: self.num_samples],
                        val_score[: self.num_samples],
                    )
                ]
            }
        )


# Modules

## Convolutional

In [9]:
FEATURES_1 = 64
FEATURES_2 = FEATURES_1 * 2
FEATURES_3 = FEATURES_2 * 2
FEATURES_4 = FEATURES_3 * 2
FEATURES_5 = FEATURES_4 * 2

class Model(pl.LightningModule):
    def __init__(self, input_shape, learning_rate=0.001):
        super().__init__()

        self.save_hyperparameters()
        self.learning_rate = learning_rate

        self.c1 = Conv2d(3, FEATURES_1, (3, 1), 1, (1, 0))
        self.c2 = Conv2d(FEATURES_1, FEATURES_1, (3, 1), 1, (1, 0)) # 32
        self.c3 = Conv2d(FEATURES_1, FEATURES_2, (3, 1), 4, (1, 0)) # 24
        self.c4 = Conv2d(FEATURES_2, FEATURES_3, (3, 1), 2, (1, 0)) # 16
        self.c5 = Conv2d(FEATURES_3, FEATURES_4, (3, 1), 2, (1, 0)) # 8
        self.c6 = Conv2d(FEATURES_4, FEATURES_5, (3, 1), 2, (1, 0)) # 2
        self.bn = BatchNorm2d(FEATURES_1)
        self.bn3 = BatchNorm2d(FEATURES_2)
        self.bn4 = BatchNorm2d(FEATURES_3)
        self.bn5 = BatchNorm2d(FEATURES_4)
        self.bn6 = BatchNorm2d(FEATURES_5)
        self.fc1 = Linear(32 * FEATURES_5, 256)
        self.fc2 = Linear(256, 64)
        self.fc3 = Linear(64, 1)

    def _forward_features(self, x):
        x = self.bn(F.relu(self.c1(x)))
        # x = self.bn(F.relu(self.c2(x) + x))
        # x = self.bn(F.relu(self.c2(x) + x))
        # x = self.bn(F.relu(self.c2(x) + x))
        x = self.bn(F.relu(self.c2(x)))
        x = self.bn3(F.relu(self.c3(x)))
        x = self.bn4(F.relu(self.c4(x)))
        x = self.bn5(F.relu(self.c5(x)))
        # x = self.bn6(F.relu(self.c6(x)))
        return x

    def forward(self, x):
        x = self._forward_features(x)
        # print(x.shape)
        x = torch.flatten(x, 1)
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = torch.tanh(self.fc3(x))
        return x

    def training_step(self, batch, batch_idx):
        x, y = batch
        logits = self(x)
        loss = F.mse_loss(logits, y)
        mse = mean_squared_error(logits, y)
        mae = mean_absolute_error(logits, y)
        k = t_kendalltau(logits, y)
        p = t_pearson(logits, y)
        s = t_spearman(logits, y)

        self.log("train_loss", loss, on_step=True, on_epoch=True, logger=True)
        self.log("train_mse", mse, on_step=True, on_epoch=True, logger=True)
        self.log("train_mae", mae, on_step=True, on_epoch=True, logger=True)
        self.log("train_kendalltau", k, on_step=False, on_epoch=True, logger=True)
        self.log("train_pearson", p, on_step=False, on_epoch=True, logger=True)
        self.log("train_spearman", s, on_step=False, on_epoch=True, logger=True)

        return loss

    def validation_step(self, batch, batch_idx):
        x, y = batch
        logits = self(x)
        loss = F.mse_loss(logits, y)
        mse = mean_squared_error(logits, y)
        mae = mean_absolute_error(logits, y)
        k = t_kendalltau(logits, y)
        p = t_pearson(logits, y)
        s = t_spearman(logits, y)
        self.log("val_loss", loss, prog_bar=True)
        self.log("val_mse", mse, prog_bar=True)
        self.log("val_mae", mae, prog_bar=True)
        self.log("val_kendalltau", k, on_step=False, prog_bar=True)
        self.log("val_pearson", p, on_step=False, prog_bar=True)
        self.log("val_spearman", s, on_step=False, prog_bar=True)

        return loss

    def test_step(self, batch, batch_idx):
        x, y = batch
        logits = self(x)
        loss = F.mse_loss(logits, y)
        mse = mean_squared_error(logits, y)
        mae = mean_absolute_error(logits, y)
        k = t_kendalltau(logits, y)
        p = t_pearson(logits, y)
        s = t_spearman(logits, y)
        self.log("test_loss", loss, on_step=True, prog_bar=True)
        self.log("test_mse", mse, on_step=True, prog_bar=True)
        self.log("test_mae", mae, on_step=True, prog_bar=True)
        self.log("test_kendalltau", k, prog_bar=True)
        self.log("test_pearson", p, prog_bar=True)
        self.log("test_spearman", s, prog_bar=True)

        return loss

    def configure_optimizers(self):
        optimizer = torch.optim.Adam(self.parameters(), lr=self.learning_rate)
        return optimizer


## FourierTransformerDecoder

In [50]:
class Model(pl.LightningModule):
    def __init__(
        self,
        input_shape,
        learning_rate=0.001,
        num_layers: int = 6,
        dropout: float = 0.1,
        dim_ff:int = 2048
    ):
        super().__init__()

        self.save_hyperparameters()
        self.learning_rate = learning_rate
        self.decoder = FNet(num_layers, input_shape, dim_ff, dropout)
        self.fc_bloc = Sequential(
            Linear(input_shape, input_shape // (div := 2)),  # 1024 > 512
            GELU(),
            Dropout(dropout),
            Linear(input_shape // div, input_shape // (div := div * 4)),  # 512 > 128
            GELU(),
            Dropout(dropout),
            Linear(input_shape // div, 1),  # 128 > 1
        )

    def forward(self, x):
        x = self.decoder(x)
        x = self.fc_bloc(x)
        x = torch.tanh(x)
        return x

    def training_step(self, batch, batch_idx):
        x, y = batch
        logits = self(x)
        loss = F.mse_loss(logits, y)
        mse = mean_squared_error(logits, y)
        mae = mean_absolute_error(logits, y)
        k = t_kendalltau(logits, y)
        p = t_pearson(logits, y)
        s = t_spearman(logits, y)

        self.log("train_loss", loss, on_step=True, on_epoch=True, logger=True)
        self.log("train_mse", mse, on_step=True, on_epoch=True, logger=True)
        self.log("train_mae", mae, on_step=True, on_epoch=True, logger=True)
        # self.log("train_kendalltau", k, on_step=False, on_epoch=True, logger=True)
        # self.log("train_pearson", p, on_step=False, on_epoch=True, logger=True)
        # self.log("train_spearman", s, on_step=False, on_epoch=True, logger=True)

        return loss

    def validation_step(self, batch, batch_idx):
        x, y = batch
        logits = self(x)
        loss = F.mse_loss(logits, y)
        mse = mean_squared_error(logits, y)
        mae = mean_absolute_error(logits, y)
        k = t_kendalltau(logits, y)
        p = t_pearson(logits, y)
        s = t_spearman(logits, y)
        self.log("val_loss", loss, prog_bar=True)
        self.log("val_mse", mse, prog_bar=True)
        self.log("val_mae", mae, prog_bar=True)
        self.log("val_kendalltau", k, on_step=False, prog_bar=True)
        self.log("val_pearson", p, on_step=False, prog_bar=True)
        self.log("val_spearman", s, on_step=False, prog_bar=True)

        return loss

    def test_step(self, batch, batch_idx):
        x, y = batch
        logits = self(x)
        loss = F.mse_loss(logits, y)
        mse = mean_squared_error(logits, y)
        mae = mean_absolute_error(logits, y)
        k = t_kendalltau(logits, y)
        p = t_pearson(logits, y)
        s = t_spearman(logits, y)
        self.log("test_loss", loss, on_step=True, prog_bar=True)
        self.log("test_mse", mse, on_step=True, prog_bar=True)
        self.log("test_mae", mae, on_step=True, prog_bar=True)
        self.log("test_kendalltau", k, prog_bar=True)
        self.log("test_pearson", p, prog_bar=True)
        self.log("test_spearman", s, prog_bar=True)

        return loss

    def configure_optimizers(self):
        # optimizer = torch.optim.Adam(self.parameters(), lr=self.learning_rate)
        optimizer = RAdam(self.parameters(), lr=self.learning_rate)
        return optimizer


# Training + Predictions

In [63]:
pair = "de-en"
num_layers = 4  # 2 best results
dim_ff = 256  # 256 best results
lr = 0.00005  # 0.0001 best results
dm = TextMiningDataModule(16, pair, 1024 * 3)
dm.setup()
model = Model(dm.dims, learning_rate=lr, num_layers=4, dim_ff=dim_ff)
# wandb_logger = WandbLogger(project="wandb-lightning", job_type="train")
checkpoint_callback = ModelCheckpoint(
    monitor="val_kendalltau",
    filename=f"{pair}"+"_{epoch:02d}_{val_kendalltau:.3f}",
    mode="max",
)
trainer = pl.Trainer(
    max_epochs=10,
    progress_bar_refresh_rate=1,
    gpus=1,
    logger=False,
    # checkpoint_callback=False
    # logger=wandb_logger,
    callbacks=[checkpoint_callback],
)
model


GPU available: True, used: True
TPU available: False, using: 0 TPU cores


Model(
  (decoder): FNet(
    (layers): ModuleList(
      (0): DecoderLayer(
        (ff): Sequential(
          (0): Linear(in_features=3072, out_features=256, bias=True)
          (1): GELU()
          (2): Dropout(p=0.1, inplace=False)
          (3): Linear(in_features=256, out_features=3072, bias=True)
          (4): Dropout(p=0.1, inplace=False)
        )
        (norm1): LayerNorm((3072,), eps=1e-05, elementwise_affine=True)
        (dropout1): Dropout(p=0.1, inplace=False)
        (norm2): LayerNorm((3072,), eps=1e-05, elementwise_affine=True)
      )
      (1): DecoderLayer(
        (ff): Sequential(
          (0): Linear(in_features=3072, out_features=256, bias=True)
          (1): GELU()
          (2): Dropout(p=0.1, inplace=False)
          (3): Linear(in_features=256, out_features=3072, bias=True)
          (4): Dropout(p=0.1, inplace=False)
        )
        (norm1): LayerNorm((3072,), eps=1e-05, elementwise_affine=True)
        (dropout1): Dropout(p=0.1, inplace=False)
  

In [64]:
trainer.fit(model, dm)
trainer.save_checkpoint(f"{pair}_{time.time()}.ckpt")

LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name    | Type       | Params
---------------------------------------
0 | decoder | FNet       | 6.4 M 
1 | fc_bloc | Sequential | 5.3 M 
---------------------------------------
11.7 M    Trainable params
0         Non-trainable params
11.7 M    Total params
46.659    Total estimated model params size (MB)


Epoch 9: 100%|██████████| 1357/1357 [00:29<00:00, 45.68it/s, loss=0.6, val_loss=0.696, val_mse=0.696, val_mae=0.647, val_kendalltau=0.231, val_pearson=0.325, val_spearman=0.322]


# Test set

In [76]:
model.eval()
results = []
i = 1000

for test_emb, test_score in dm.test_dataloader():
    results += (model(test_emb).detach().cpu().numpy().tolist())

np.savetxt(f"scores_{pair}.csv", np.array(results).flatten(), delimiter=",")

In [88]:
test_scores = {pair: pd.read_csv(f"testset/{pair}/scores.csv") for pair in language_pairs}

In [90]:
test_scores["de-en"]

Unnamed: 0.1,Unnamed: 0,source,reference,translation,metric
0,0,Das Publikum ist fast gleichmäßig zwischen Sch...,The audience is almost evenly split between bl...,The audience is almost evenly split between bl...,2.764301
1,1,Du kannst ihre Energie durch den Bildschirm sp...,"You can feel their energy through the screen. """"","You can feel her energy through the screen.""",1.287924
2,2,"Da die Adresse unbekannt ist, wird die Mithilf...","As the address is unknown, the help of the pop...","As the address is unknown, the assistance of t...",1.499944
3,3,"Arsenal-Manager Arsene Wenger, dessen Verein i...","Arsenal manager Arsene Wenger, whose club is o...","Arsenal manager Arsene Wenger, whose club is o...",2.764301
4,4,Landwirtschaftsminister im Interview - Wie sch...,Agriculture Minister in the interview - How do...,Minister of Agriculture in interview – How do ...,0.204494
...,...,...,...,...,...
28399,28399,Insgesamt gebe es jedoch seit rund 30 Jahren e...,"Total, however, there were approximately 30 ye...","Nevertheless, altogether, there have been a se...",-0.811066
28400,28400,"Wir können froh sein, dass wir diese Zeit läng...",We can be glad we have overcome these time alr...,We can be glad that we have long overcome this...,-0.629707
28401,28401,"Der unheilbare Zustand, der in den USA rund 1,...","The condition of unheilbare, which affects the...","The incurable condition, which affects around ...",-0.945399
28402,28402,"""Komfort Check-in"" nennt die Bahn die Fahrkart...","""Check-in amenities,"" the rail operator descri...",The railway is calling this conductorless tick...,-1.990388


In [73]:
np.array(results, dtype="object")

array([array([[-9.1613717e-03],
              [-1.2315828e-01],
              [ 1.5249759e-01],
              [ 2.9827762e-01],
              [ 1.8509631e-01],
              [ 4.9634852e-02],
              [-4.4969472e-01],
              [-6.8793494e-01],
              [ 5.8158837e-02],
              [-6.0465139e-01],
              [ 1.1201333e-01],
              [-4.3616255e-04],
              [ 1.6160364e-01],
              [ 4.0735242e-01],
              [ 2.5897461e-01],
              [-7.5041703e-03]], dtype=float32),
       array([[ 0.28417528],
              [ 0.11135941],
              [-0.2411364 ],
              [-0.00213673],
              [ 0.08225336],
              [ 0.15491392],
              [-0.12357191],
              [-0.01498444],
              [ 0.07161871],
              [ 0.15485623],
              [ 0.17278703],
              [-0.04378385],
              [ 0.22395141],
              [ 0.07993133],
              [ 0.39238593],
              [ 0.3087611 ]], dtype=

In [47]:
trainer.validate()

[autoreload of radam.radam failed: Traceback (most recent call last):
  File "/home/fsx/miniconda3/envs/pl/lib/python3.9/site-packages/IPython/extensions/autoreload.py", line 245, in check
    superreload(m, reload, self.old_objects)
  File "/home/fsx/miniconda3/envs/pl/lib/python3.9/site-packages/IPython/extensions/autoreload.py", line 410, in superreload
    update_generic(old_obj, new_obj)
  File "/home/fsx/miniconda3/envs/pl/lib/python3.9/site-packages/IPython/extensions/autoreload.py", line 347, in update_generic
    update(a, b)
  File "/home/fsx/miniconda3/envs/pl/lib/python3.9/site-packages/IPython/extensions/autoreload.py", line 302, in update_class
    if update_generic(old_obj, new_obj): continue
  File "/home/fsx/miniconda3/envs/pl/lib/python3.9/site-packages/IPython/extensions/autoreload.py", line 347, in update_generic
    update(a, b)
  File "/home/fsx/miniconda3/envs/pl/lib/python3.9/site-packages/IPython/extensions/autoreload.py", line 266, in update_function
    setat

--------------------------------------------------------------------------------
DATALOADER:0 VALIDATE RESULTS
{'val_kendalltau': 0.09496324509382248,
 'val_loss': 0.7278435826301575,
 'val_mae': 0.6904030442237854,
 'val_mse': 0.7278435826301575,
 'val_pearson': 0.14763259887695312,
 'val_spearman': 0.14055135846138}
--------------------------------------------------------------------------------


[{'val_loss': 0.7278435826301575,
  'val_mse': 0.7278435826301575,
  'val_mae': 0.6904030442237854,
  'val_kendalltau': 0.09496324509382248,
  'val_pearson': 0.14763259887695312,
  'val_spearman': 0.14055135846138}]