In [1]:
import pandas as pd
import numpy as np
import os

import torch
from torch import nn
from torch.nn import functional as F
from torch.utils.data import DataLoader, random_split
from torchvision.datasets import MNIST
from torchvision import transforms
import pytorch_lightning as pl
from pytorch_lightning.metrics.functional import accuracy

In [2]:
srt = ["source", "reference", "translation"]
language_pairs = [
    "cs-en",
    "de-en",
    "en-fi",
    "en-zh",
    "ru-en",
    "zh-en",
]
scores = {pair: pd.read_csv(f"corpus/{pair}/scores.csv") for pair in language_pairs}

In [3]:
pair = "cs-en"
embedding_ref = torch.from_numpy(np.load(f"corpus/{pair}/laser.reference_embeds.npy"))
embedding_src = torch.from_numpy(np.load(f"corpus/{pair}/laser.source_embeds.npy"))
embedding_trn = torch.from_numpy(np.load(f"corpus/{pair}/laser.translation_embeds.npy"))
score = torch.tensor(scores[pair]["z-score"]).float()

In [4]:
# Exposing how to stack each embedding
TEST_VALUE = 1024
a = torch.tensor([[0 for i in range(TEST_VALUE)] for _ in range(11585)])
b = torch.tensor([[1 for i in range(TEST_VALUE)] for _ in range(11585)])
c = torch.tensor([[2 for i in range(TEST_VALUE)] for _ in range(11585)])
# each row is an embedding
tmp = torch.stack((a, b, c), 1).float()

In [5]:
embedding = torch.stack((embedding_ref, embedding_src, embedding_trn), 1)
embedding.shape

torch.Size([11585, 3, 1024])

In [6]:
class Model(pl.LightningModule):

    def __init__(self):
        super(Model, self).__init__()
        self.flatten = nn.Flatten()
        self.l1 = torch.nn.Linear(TEST_VALUE * 3, 1)

    def forward(self, x):
        x = self.flatten(x)
        x = self.l1(x)
        return torch.relu(x)

    def training_step(self, batch, batch_nb):
        x, y = batch
        loss = F.mse_loss(self(x), y.unsqueeze(1))
        return loss

    def configure_optimizers(self):
        return torch.optim.Adam(self.parameters(), lr=0.3)
        # return Ranger21(self.parameters(), lr=0.02)

In [152]:
class Model(pl.LightningModule):

    def __init__(self):
        super(Model, self).__init__()
        self.conv1 = nn.Conv2d(1, 64, (3,4), stride=(3,8), padding=(0, 0))
        self.pool1 = nn.MaxPool2d((3, 3), stride=(3, 1), padding=(1,1))
        self.linear1 = nn.Linear(8192, 1, bias=False)
        self.dropout2 = nn.Dropout(0.2)
        self.flatten = nn.Flatten()
    
    def forward(self, x):
        ## Define forward behavior
        x = self.conv1(x)
        # print(x.shape)
        x = F.relu(x)
        x = self.pool1(x)
        # print(x.shape)
        
        # Flatten layer
        x = self.flatten(x) 
        # print(x.shape)
        x = self.dropout2(x)
        x = self.linear1(x)
        x = torch.log_softmax(x, 1)
        return x

    def training_step(self, batch, batch_nb):
        x, y = batch
        # logits = self(x)
        # print(x.shape, y.shape)
        loss = F.mse_loss(self(x), y.unsqueeze(1))
        return loss

    def configure_optimizers(self):
        return torch.optim.Adam(self.parameters(), lr=0.03)
        # return Ranger21(self.parameters(), lr=0.02)

In [224]:
class Model(pl.LightningModule):
    def __init__(self):
        super().__init__()

        # mnist images are (1, 28, 28) (channels, width, height)
        self.layer_1 = nn.Linear(3 * 1024, 1024)
        self.layer_2 = nn.Linear(1024, 256)
        self.layer_3 = nn.Linear(256, 1)
        self.flatten = nn.Flatten()

    def forward(self, x):
        batch_size, channels, width, height = x.size()
        # (b, 1, 28, 28) -> (b, 1*28*28)
        # x = x.view(batch_size, -1)
        x = self.flatten(x)
        x = self.layer_1(x)
        x = F.relu(x)
        x = self.layer_2(x)
        x = F.relu(x)
        x = self.layer_3(x)

        x = torch.sigmoid(x)
        return x
    def training_step(self, batch, batch_nb):
        x, y = batch
        loss = F.mse_loss(self(x), y.unsqueeze(1))
        return loss
    def test_step(self, batch, batch_idx):
        x, y = batch
        logits = self(x)
        loss = F.mse_loss(logits, y.unsqueeze(1))
        return loss
        
    def configure_optimizers(self):
        return torch.optim.Adam(self.parameters(), lr=0.03)


In [225]:
model = Model()
x = torch.randn(1,1,3,1024)
model(x)

tensor([[0.4517]], grad_fn=<SigmoidBackward>)

In [226]:
from torch.utils.data import Dataset
class WordsDataset(Dataset):
    def __init__(self, pair, transform=None):
        embedding_ref = torch.from_numpy(np.load(f"corpus/{pair}/laser.reference_embeds.npy"))
        embedding_src = torch.from_numpy(np.load(f"corpus/{pair}/laser.source_embeds.npy"))
        embedding_trn = torch.from_numpy(np.load(f"corpus/{pair}/laser.translation_embeds.npy"))
        self.embedding = torch.stack((embedding_src, embedding_ref, embedding_trn), 1).float()
        self.embedding = self.embedding.unsqueeze(1)
        self.score = torch.tensor(scores[pair]["z-score"]).float()
        self.transform = transform

    def __len__(self):
        return len(self.score)

    def __getitem__(self, idx):
        emb = self.embedding[idx]
        classification = self.score[idx]
        if self.transform:
            embedding = self.transform(emb)
        sample = [emb, classification]
        return sample

In [227]:
model = Model()

train_ds = WordsDataset(pair)
train_loader = DataLoader(train_ds, batch_size=32, num_workers=12)

# Initialize a trainer
trainer = pl.Trainer(gpus=1, max_epochs=1, progress_bar_refresh_rate=10)

GPU available: True, used: True
TPU available: False, using: 0 TPU cores


In [228]:
# Train the model ⚡
trainer.fit(model, train_loader)

LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name    | Type    | Params
------------------------------------
0 | layer_1 | Linear  | 3.1 M 
1 | layer_2 | Linear  | 262 K 
2 | layer_3 | Linear  | 257   
3 | flatten | Flatten | 0     
------------------------------------
3.4 M     Trainable params
0         Non-trainable params
3.4 M     Total params
13.638    Total estimated model params size (MB)


Epoch 0: 100%|██████████| 363/363 [00:02<00:00, 129.45it/s, loss=0.93, v_num=83]


In [229]:
trainer.test(test_dataloaders=train_loader)

LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: 100%|██████████| 363/363 [00:01<00:00, 279.96it/s]
--------------------------------------------------------------------------------
DATALOADER:0 TEST RESULTS
{}
--------------------------------------------------------------------------------


[{}]

In [230]:
for i in range(10):
    print(model(train_ds.embedding[i].unsqueeze(1).transpose(0, 1)), score[i])

tensor([[0.]], grad_fn=<SigmoidBackward>) tensor(-0.6754)
tensor([[0.]], grad_fn=<SigmoidBackward>) tensor(-0.8294)
tensor([[0.]], grad_fn=<SigmoidBackward>) tensor(0.8032)
tensor([[0.]], grad_fn=<SigmoidBackward>) tensor(0.5631)
tensor([[0.]], grad_fn=<SigmoidBackward>) tensor(0.0215)
tensor([[0.]], grad_fn=<SigmoidBackward>) tensor(-0.8177)
tensor([[0.]], grad_fn=<SigmoidBackward>) tensor(0.2873)
tensor([[0.]], grad_fn=<SigmoidBackward>) tensor(-0.8359)
tensor([[0.]], grad_fn=<SigmoidBackward>) tensor(1.0278)
tensor([[0.]], grad_fn=<SigmoidBackward>) tensor(0.1739)
