In [1]:
import pandas as pd
import numpy as np
import os

import torch
from torch import nn
from torch.nn import functional as F
from torch.utils.data import DataLoader, random_split
from torchvision.datasets import MNIST
from torchvision import transforms
import pytorch_lightning as pl
from pytorch_lightning.metrics.functional import accuracy

In [2]:
srt = ["source", "reference", "translation"]
language_pairs = [
    "cs-en",
    "de-en",
    "en-fi",
    "en-zh",
    "ru-en",
    "zh-en",
]
scores = {pair: pd.read_csv(f"corpus/{pair}/scores.csv") for pair in language_pairs}

In [67]:
pair = "cs-en"
embedding_ref = torch.from_numpy(np.load(f"corpus/{pair}/laser.reference_embeds.npy"))
embedding_src = torch.from_numpy(np.load(f"corpus/{pair}/laser.source_embeds.npy"))
embedding_trn = torch.from_numpy(np.load(f"corpus/{pair}/laser.translation_embeds.npy"))
score = torch.tensor(scores[pair]["z-score"]).float()

In [139]:
# Exposing how to stack each embedding
TEST_VALUE = 1024
a = torch.tensor([[0 for i in range(TEST_VALUE)] for _ in range(11585)])
b = torch.tensor([[1 for i in range(TEST_VALUE)] for _ in range(11585)])
c = torch.tensor([[2 for i in range(TEST_VALUE)] for _ in range(11585)])
# each row is an embedding
tmp = torch.stack((a, b, c), 1).float()

In [140]:
embedding = torch.stack((embedding_ref, embedding_src, embedding_trn), 1)
embedding.shape

torch.Size([11585, 3, 1024])

In [232]:
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        ## Define layers of a CNN
        self.conv1 = nn.Conv2d(1, 512, (3,4), stride=(3,1), padding=0)
        self.pool1 = nn.MaxPool2d((3, 4), stride=(3, 1), padding=0)
        self.linear1 = nn.Linear(10, 1, bias=False)
        self.dropout2 = nn.Dropout(0.25)
        self.flatten = nn.Flatten()
    
    def forward(self, x):
        ## Define forward behavior
        x = self.pool1(F.relu(self.conv1(x)))
        
        # Flatten layer
        x = self.flatten(x) 
        x = self.dropout1(x)
        x = F.relu(self.linear1(x))
        return x

In [233]:
class Model(pl.LightningModule):

    def __init__(self):
        super(Model, self).__init__()
        self.flatten = nn.Flatten()
        self.l1 = torch.nn.Linear(TEST_VALUE * 3, 1)

    def forward(self, x):
        x = self.flatten(x)
        x = self.l1(x)
        return torch.relu(x)

    def training_step(self, batch, batch_nb):
        x, y = batch
        loss = F.mse_loss(self(x), y.unsqueeze(1))
        return loss

    def configure_optimizers(self):
        return torch.optim.Adam(self.parameters(), lr=0.3)
        # return Ranger21(self.parameters(), lr=0.02)

In [234]:
class Model(pl.LightningModule):

    def __init__(self):
        super(Model, self).__init__()
        self.conv1 = nn.ConvTranspose2d(1, 512, (3,4), stride=(3,1), padding=0)
        self.pool1 = nn.MaxPool2d((3, 4), stride=(3, 1), padding=0)
        self.linear1 = nn.Linear(10, 1, bias=False)
        self.dropout2 = nn.Dropout(0.25)
        self.flatten = nn.Flatten()
    
    def forward(self, x):
        ## Define forward behavior
        x = self.conv1(x)
        x = F.relu(x)
        x = self.pool1(x)
        
        # Flatten layer
        # x = self.flatten(x) 
        # x = self.dropout2(x)
        x = F.relu(self.linear1(x))
        return x

    def training_step(self, batch, batch_nb):
        x, y = batch
        loss = F.mse_loss(self(x), y.unsqueeze(1))
        return loss

    def configure_optimizers(self):
        return torch.optim.Adam(self.parameters(), lr=0.3)
        # return Ranger21(self.parameters(), lr=0.02)

In [235]:
from torch.utils.data import Dataset
class WordsDataset(Dataset):
    def __init__(self, pair, transform=None):
        embedding_ref = torch.from_numpy(np.load(f"corpus/{pair}/laser.reference_embeds.npy"))
        embedding_src = torch.from_numpy(np.load(f"corpus/{pair}/laser.source_embeds.npy"))
        embedding_trn = torch.from_numpy(np.load(f"corpus/{pair}/laser.translation_embeds.npy"))
        self.embedding = torch.stack((embedding_src, embedding_ref, embedding_trn), 1).float()
        self.score = torch.tensor(scores[pair]["z-score"]).float()
        self.transform = transform

    def __len__(self):
        return len(self.score)

    def __getitem__(self, idx):
        emb = self.embedding[idx]
        classification = self.score[idx]
        if self.transform:
            embedding = self.transform(emb)
        sample = [emb, classification]
        return sample

In [236]:
mnist_model = Model()

train_ds = WordsDataset(pair)
train_loader = DataLoader(train_ds, batch_size=64, num_workers=12)

# Initialize a trainer
trainer = pl.Trainer(gpus=1, max_epochs=30, progress_bar_refresh_rate=20)

GPU available: True, used: True
TPU available: False, using: 0 TPU cores


In [237]:

# Train the model ⚡
trainer.fit(mnist_model, train_loader)

LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name     | Type            | Params
---------------------------------------------
0 | conv1    | ConvTranspose2d | 6.7 K 
1 | pool1    | MaxPool2d       | 0     
2 | linear1  | Linear          | 10    
3 | dropout2 | Dropout         | 0     
4 | flatten  | Flatten         | 0     
---------------------------------------------
6.7 K     Trainable params
0         Non-trainable params
6.7 K     Total params
0.027     Total estimated model params size (MB)


Epoch 0:   0%|          | 0/182 [00:00<?, ?it/s] 

RuntimeError: Expected 4-dimensional input for 4-dimensional weight [1, 512, 3, 4], but got 3-dimensional input of size [64, 3, 1024] instead

In [214]:
for i in range(10):
    print(mnist_model(train_ds.embedding[i].flatten().unsqueeze(1).transpose(0, 1)), score[i])

tensor([[0.]], grad_fn=<ReluBackward0>) tensor(-0.6754)
tensor([[0.]], grad_fn=<ReluBackward0>) tensor(-0.8294)
tensor([[0.]], grad_fn=<ReluBackward0>) tensor(0.8032)
tensor([[0.]], grad_fn=<ReluBackward0>) tensor(0.5631)
tensor([[0.]], grad_fn=<ReluBackward0>) tensor(0.0215)
tensor([[0.]], grad_fn=<ReluBackward0>) tensor(-0.8177)
tensor([[0.]], grad_fn=<ReluBackward0>) tensor(0.2873)
tensor([[0.]], grad_fn=<ReluBackward0>) tensor(-0.8359)
tensor([[0.]], grad_fn=<ReluBackward0>) tensor(1.0278)
tensor([[0.]], grad_fn=<ReluBackward0>) tensor(0.1739)


In [None]:
from torch import Tensor
from torch.nn import (TransformerEncoder, TransformerDecoder,
                      TransformerEncoderLayer, TransformerDecoderLayer)


class Seq2SeqTransformer(nn.Module):
    def __init__(self, num_encoder_layers: int, num_decoder_layers: int,
                 emb_size: int, src_vocab_size: int, tgt_vocab_size: int,
                 dim_feedforward:int = 512, dropout:float = 0.1):
        super(Seq2SeqTransformer, self).__init__()
        encoder_layer = TransformerEncoderLayer(d_model=emb_size, nhead=NHEAD,
                                                dim_feedforward=dim_feedforward)
        self.transformer_encoder = TransformerEncoder(encoder_layer, num_layers=num_encoder_layers)
        decoder_layer = TransformerDecoderLayer(d_model=emb_size, nhead=NHEAD,
                                                dim_feedforward=dim_feedforward)
        self.transformer_decoder = TransformerDecoder(decoder_layer, num_layers=num_decoder_layers)

        self.generator = nn.Linear(emb_size, tgt_vocab_size)
        self.src_tok_emb = TokenEmbedding(src_vocab_size, emb_size)
        self.tgt_tok_emb = TokenEmbedding(tgt_vocab_size, emb_size)
        self.positional_encoding = PositionalEncoding(emb_size, dropout=dropout)

    def forward(self, src: Tensor, trg: Tensor, src_mask: Tensor,
                tgt_mask: Tensor, src_padding_mask: Tensor,
                tgt_padding_mask: Tensor, memory_key_padding_mask: Tensor):
        src_emb = self.positional_encoding(self.src_tok_emb(src))
        tgt_emb = self.positional_encoding(self.tgt_tok_emb(trg))
        memory = self.transformer_encoder(src_emb, src_mask, src_padding_mask)
        outs = self.transformer_decoder(tgt_emb, memory, tgt_mask, None,
                                        tgt_padding_mask, memory_key_padding_mask)
        return self.generator(outs)

    def encode(self, src: Tensor, src_mask: Tensor):
        return self.transformer_encoder(self.positional_encoding(
                            self.src_tok_emb(src)), src_mask)

    def decode(self, tgt: Tensor, memory: Tensor, tgt_mask: Tensor):
        return self.transformer_decoder(self.positional_encoding(
                          self.tgt_tok_emb(tgt)), memory,
                          tgt_mask)