# Imports

In [1]:
import pandas as pd
import numpy as np

# PyTorch related imports
import torch
from torch.nn import *
from torch.nn import functional as F

# PyTorch Lightning related imports
import pytorch_lightning as pl
from pytorch_lightning.callbacks import Callback
from pytorch_lightning.callbacks.early_stopping import EarlyStopping

# metrics
from torchmetrics.functional import mean_absolute_error, mean_squared_error
from metrics.regression_metrics import t_kendalltau, t_pearson, t_spearman

# fnet architecture & RAdam
from fnet.model import FNet
from radam.radam import RAdam

# Custom datamodule
from datamodules.tm_datamodule import TextMiningDataModule

# weights and biases // not working // disabled
import wandb
from pytorch_lightning.loggers import WandbLogger

# ModelCheckpoint fails. Tutorial outdated. 
# Leaving here for improvements in the future
from pytorch_lightning.callbacks import ModelCheckpoint

import time

# Dataset & DataLoader

In [2]:
srt = ["source", "reference", "translation"]
language_pairs = [
    "cs-en",
    "de-en",
    "en-fi",
    "en-zh",
    "ru-en",
    "zh-en",
]
scores = {pair: pd.read_csv(f"corpus/{pair}/scores.csv") for pair in language_pairs}

In [3]:
pair = "de-en"
embedding_ref = torch.from_numpy(np.load(f"corpus/{pair}/laser.reference_embeds.npy")).float()
embedding_src = torch.from_numpy(np.load(f"corpus/{pair}/laser.source_embeds.npy")).float()
embedding_hyp = torch.from_numpy(np.load(f"corpus/{pair}/laser.translation_embeds.npy")).float()
score = torch.tensor(scores[pair]["z-score"]).float()

In [4]:
score.shape

torch.Size([21704])

# Loggers

In [5]:
early_stop_callback = EarlyStopping(monitor='val_loss', patience=5, verbose=False, mode='min')

In [6]:
class TranslationPredictionLogger(Callback):
    def __init__(self, val_samples, num_samples=32):
        super().__init__()
        self.num_samples = num_samples
        self.val_imgs, self.val_score = val_samples

    def on_validation_batch_end(
        self, trainer, pl_module,
    ):
        val_imgs = self.val_imgs.to(device=pl_module.device)
        val_score = self.val_score.to(device=pl_module.device)

        predictions = pl_module(val_imgs)
        trainer.logger.experiment.log(
            {
                "examples": [
                    wandb.Image(x, caption=f"Prediction:{p}, Score: {y}")
                    for x, p, y in zip(
                        val_imgs[: self.num_samples],
                        predictions[: self.num_samples],
                        val_score[: self.num_samples],
                    )
                ]
            }
        )


# Modules

## Convolutional

In [7]:
FEATURES_1 = 64
FEATURES_2 = FEATURES_1 * 2
FEATURES_3 = FEATURES_2 * 2
FEATURES_4 = FEATURES_3 * 2
FEATURES_5 = FEATURES_4 * 2

class Model(pl.LightningModule):
    def __init__(self, input_shape, learning_rate=0.001):
        super().__init__()

        self.save_hyperparameters()
        self.learning_rate = learning_rate

        self.c1 = Conv2d(3, FEATURES_1, (3, 1), 1, (1, 0))
        self.c2 = Conv2d(FEATURES_1, FEATURES_1, (3, 1), 1, (1, 0)) # 32
        self.c3 = Conv2d(FEATURES_1, FEATURES_2, (3, 1), 4, (1, 0)) # 24
        self.c4 = Conv2d(FEATURES_2, FEATURES_3, (3, 1), 2, (1, 0)) # 16
        self.c5 = Conv2d(FEATURES_3, FEATURES_4, (3, 1), 2, (1, 0)) # 8
        self.c6 = Conv2d(FEATURES_4, FEATURES_5, (3, 1), 2, (1, 0)) # 2
        self.bn = BatchNorm2d(FEATURES_1)
        self.bn3 = BatchNorm2d(FEATURES_2)
        self.bn4 = BatchNorm2d(FEATURES_3)
        self.bn5 = BatchNorm2d(FEATURES_4)
        self.bn6 = BatchNorm2d(FEATURES_5)
        self.fc1 = Linear(32 * FEATURES_5, 256)
        self.fc2 = Linear(256, 64)
        self.fc3 = Linear(64, 1)

    def _forward_features(self, x):
        x = self.bn(F.relu(self.c1(x)))
        # x = self.bn(F.relu(self.c2(x) + x))
        # x = self.bn(F.relu(self.c2(x) + x))
        # x = self.bn(F.relu(self.c2(x) + x))
        x = self.bn(F.relu(self.c2(x)))
        x = self.bn3(F.relu(self.c3(x)))
        x = self.bn4(F.relu(self.c4(x)))
        x = self.bn5(F.relu(self.c5(x)))
        # x = self.bn6(F.relu(self.c6(x)))
        return x

    def forward(self, x):
        x = self._forward_features(x)
        # print(x.shape)
        x = torch.flatten(x, 1)
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = torch.tanh(self.fc3(x))
        return x

    def training_step(self, batch, batch_idx):
        x, y = batch
        logits = self(x)
        loss = F.mse_loss(logits, y)
        mse = mean_squared_error(logits, y)
        mae = mean_absolute_error(logits, y)
        k = t_kendalltau(logits, y)
        p = t_pearson(logits, y)
        s = t_spearman(logits, y)

        self.log("train_loss", loss, on_step=True, on_epoch=True, logger=True)
        self.log("train_mse", mse, on_step=True, on_epoch=True, logger=True)
        self.log("train_mae", mae, on_step=True, on_epoch=True, logger=True)
        self.log("train_kendalltau", k, on_step=False, on_epoch=True, logger=True)
        self.log("train_pearson", p, on_step=False, on_epoch=True, logger=True)
        self.log("train_spearman", s, on_step=False, on_epoch=True, logger=True)

        return loss

    def validation_step(self, batch, batch_idx):
        x, y = batch
        logits = self(x)
        loss = F.mse_loss(logits, y)
        mse = mean_squared_error(logits, y)
        mae = mean_absolute_error(logits, y)
        k = t_kendalltau(logits, y)
        p = t_pearson(logits, y)
        s = t_spearman(logits, y)
        self.log("val_loss", loss, prog_bar=True)
        self.log("val_mse", mse, prog_bar=True)
        self.log("val_mae", mae, prog_bar=True)
        self.log("val_kendalltau", k, on_step=False, prog_bar=True)
        self.log("val_pearson", p, on_step=False, prog_bar=True)
        self.log("val_spearman", s, on_step=False, prog_bar=True)

        return loss

    def test_step(self, batch, batch_idx):
        x, y = batch
        logits = self(x)
        loss = F.mse_loss(logits, y)
        mse = mean_squared_error(logits, y)
        mae = mean_absolute_error(logits, y)
        k = t_kendalltau(logits, y)
        p = t_pearson(logits, y)
        s = t_spearman(logits, y)
        self.log("test_loss", loss, on_step=True, prog_bar=True)
        self.log("test_mse", mse, on_step=True, prog_bar=True)
        self.log("test_mae", mae, on_step=True, prog_bar=True)
        self.log("test_kendalltau", k, prog_bar=True)
        self.log("test_pearson", p, prog_bar=True)
        self.log("test_spearman", s, prog_bar=True)

        return loss

    def configure_optimizers(self):
        optimizer = torch.optim.Adam(self.parameters(), lr=self.learning_rate)
        return optimizer


## FourierTransformerDecoder

In [8]:
class Model(pl.LightningModule):
    def __init__(
        self,
        input_shape,
        learning_rate=0.001,
        num_layers: int = 6,
        dropout: float = 0.1,
        dim_ff:int = 2048
    ):
        super().__init__()

        self.save_hyperparameters()
        self.learning_rate = learning_rate
        self.decoder = FNet(num_layers, input_shape, dim_ff, dropout)
        self.fc_bloc = Sequential(
            Linear(input_shape, input_shape // (div := 2)),  # 1024 > 512
            GELU(),
            Dropout(dropout),
            Linear(input_shape // div, input_shape // (div := div * 4)),  # 512 > 128
            GELU(),
            Dropout(dropout),
            Linear(input_shape // div, 1),  # 128 > 1
        )

    def forward(self, x):
        x = self.decoder(x)
        x = self.fc_bloc(x)
        x = torch.tanh(x)
        return x

    def training_step(self, batch, batch_idx):
        x, y = batch
        logits = self(x)
        loss = F.mse_loss(logits, y)
        mse = mean_squared_error(logits, y)
        mae = mean_absolute_error(logits, y)
        k = t_kendalltau(logits, y)
        p = t_pearson(logits, y)
        s = t_spearman(logits, y)

        self.log("train_loss", loss, on_step=True, on_epoch=True, logger=True)
        self.log("train_mse", mse, on_step=True, on_epoch=True, logger=True)
        self.log("train_mae", mae, on_step=True, on_epoch=True, logger=True)
        self.log("train_kendalltau", k, on_step=False, on_epoch=True, logger=True)
        self.log("train_pearson", p, on_step=False, on_epoch=True, logger=True)
        self.log("train_spearman", s, on_step=False, on_epoch=True, logger=True)

        return loss

    def validation_step(self, batch, batch_idx):
        x, y = batch
        logits = self(x)
        loss = F.mse_loss(logits, y)
        mse = mean_squared_error(logits, y)
        mae = mean_absolute_error(logits, y)
        k = t_kendalltau(logits, y)
        p = t_pearson(logits, y)
        s = t_spearman(logits, y)
        self.log("val_loss", loss, prog_bar=True)
        self.log("val_mse", mse, prog_bar=True)
        self.log("val_mae", mae, prog_bar=True)
        self.log("val_kendalltau", k, on_step=False, prog_bar=True)
        self.log("val_pearson", p, on_step=False, prog_bar=True)
        self.log("val_spearman", s, on_step=False, prog_bar=True)

        return loss

    def test_step(self, batch, batch_idx):
        x, y = batch
        logits = self(x)
        loss = F.mse_loss(logits, y)
        mse = mean_squared_error(logits, y)
        mae = mean_absolute_error(logits, y)
        k = t_kendalltau(logits, y)
        p = t_pearson(logits, y)
        s = t_spearman(logits, y)
        self.log("test_loss", loss, on_step=True, prog_bar=True)
        self.log("test_mse", mse, on_step=True, prog_bar=True)
        self.log("test_mae", mae, on_step=True, prog_bar=True)
        self.log("test_kendalltau", k, prog_bar=True)
        self.log("test_pearson", p, prog_bar=True)
        self.log("test_spearman", s, prog_bar=True)

        return loss

    def configure_optimizers(self):
        # optimizer = torch.optim.Adam(self.parameters(), lr=self.learning_rate)
        optimizer = RAdam(self.parameters(), lr=self.learning_rate)
        return optimizer


# Training + Predictions

In [84]:
pair = "en-zh"
num_layers = 1  # 2 best results
dim_ff = 1024  # 256 best results
lr = 0.01  # 0.0001 best results
bs = 1024 # 16 best results ## 256
dm = TextMiningDataModule(bs, pair, 1024 * 3)
dm.setup()
model = Model(dm.dims, learning_rate=lr, num_layers=num_layers, dim_ff=dim_ff)
# wandb_logger = WandbLogger(project="wandb-lightning", job_type="train")
checkpoint_callback = ModelCheckpoint(
    monitor="val_kendalltau",
    filename=f"{pair}"+"_{epoch:02d}_{val_kendalltau:.3f}",
    mode="max",
    save_top_k = 1,
)
trainer = pl.Trainer(
    max_epochs=75,
    progress_bar_refresh_rate=1,
    gpus=1,
    logger=False,
    auto_lr_find=True,
    # checkpoint_callback=False
    # logger=wandb_logger,
    callbacks=[checkpoint_callback],
)
# model

GPU available: True, used: True
TPU available: False, using: 0 TPU cores


In [85]:
trainer.fit(model, dm)

LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name    | Type       | Params
---------------------------------------
0 | decoder | FNet       | 6.3 M 
1 | fc_bloc | Sequential | 2.5 M 
---------------------------------------
8.8 M     Trainable params
0         Non-trainable params
8.8 M     Total params
35.263    Total estimated model params size (MB)


Epoch 74: 100%|██████████| 10/10 [00:01<00:00,  5.68it/s, loss=0.384, val_loss=0.776, val_mae=0.678, val_kendalltau=0.267, val_pearson=0.382]


# Test set

In [86]:
# en-zh
model = Model.load_from_checkpoint("checkpoints/en-zh_epoch=27_val_kendalltau=0.277.ckpt")
model.eval()
val_scores = trainer.validate()

for k,v in val_scores[0].items():
    print(k, round(v, 3))

LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


--------------------------------------------------------------------------------
DATALOADER:0 VALIDATE RESULTS
{'val_kendalltau': 0.27738261222839355,
 'val_loss': 0.7160733342170715,
 'val_mae': 0.6557644605636597,
 'val_pearson': 0.4065486192703247}
--------------------------------------------------------------------------------
val_loss 0.716
val_mae 0.656
val_kendalltau 0.277
val_pearson 0.407


In [87]:
score

# Validated corpus

In [37]:
# ZH-EN
val_scores = trainer.validate()

LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


--------------------------------------------------------------------------------
DATALOADER:0 VALIDATE RESULTS
{'val_kendalltau': 0.27426302433013916,
 'val_loss': 0.6592675447463989,
 'val_mae': 0.6607044339179993,
 'val_mse': 0.6592675447463989,
 'val_pearson': 0.39609044790267944,
 'val_spearman': 0.3779524266719818}
--------------------------------------------------------------------------------


In [27]:
# DE-EN
model = Model.load_from_checkpoint("checkpoints/de-en_epoch=41_val_kendalltau=0.255.ckpt")
val_scores = trainer.validate()

for k,v in val_scores[0].items():
    print(k, round(v, 3))

In [54]:
# cs-en
model = Model.load_from_checkpoint("checkpoints/cs-en_epoch=38_val_kendalltau=0.363.ckpt")
val_scores = trainer.validate()

for k,v in val_scores[0].items():
    print(k, round(v, 3))

In [78]:
# en-fi
model = Model.load_from_checkpoint("checkpoints/en-fi_epoch=22_val_kendalltau=0.333.ckpt")
model.eval()
val_scores = trainer.validate()

for k,v in val_scores[0].items():
    print(k, round(v, 3))

In [82]:
# ru-en
model = Model.load_from_checkpoint("checkpoints/ru-en_epoch=15_val_kendalltau=0.231.ckpt")
model.eval()
val_scores = trainer.validate()

for k,v in val_scores[0].items():
    print(k, round(v, 3))

# Fill Test Scores

In [124]:
path = "de-en"
scored = pd.read_csv("scores_de-en_0.255.csv")["metric"]
scored.shape

(28404,)

In [126]:
to_fill = pd.read_csv(f"testset/{path}/scores.csv").drop(columns=["metric"])
print(to_fill.shape)
to_fill.head()

(28404, 3)


Unnamed: 0,source,reference,translation
0,Das Publikum ist fast gleichmäßig zwischen Sch...,The audience is almost evenly split between bl...,The audience is almost evenly split between bl...
1,Du kannst ihre Energie durch den Bildschirm sp...,"You can feel their energy through the screen. """"","You can feel her energy through the screen."""
2,"Da die Adresse unbekannt ist, wird die Mithilf...","As the address is unknown, the help of the pop...","As the address is unknown, the assistance of t..."
3,"Arsenal-Manager Arsene Wenger, dessen Verein i...","Arsenal manager Arsene Wenger, whose club is o...","Arsenal manager Arsene Wenger, whose club is o..."
4,Landwirtschaftsminister im Interview - Wie sch...,Agriculture Minister in the interview - How do...,Minister of Agriculture in interview – How do ...


In [128]:
to_fill.join(scored).to_csv(f"filled_scores_{path}.csv", index=False)