In [768]:
# PyTorch imports
import torch
from torch.utils.data import Dataset, DataLoader
from torch.nn import functional as F
from torch import nn

# NLTK imports
from nltk.translate.bleu_score import sentence_bleu
from nltk.translate.gleu_score import sentence_gleu
from nltk.translate.nist_score import sentence_nist

# others
import pandas as pd
import datasets
from datasets import load_metric
import warnings
warnings.filterwarnings("ignore")

# sklearn imports
import sklearn
from sklearn.svm import SVC
from sklearn.metrics import f1_score, accuracy_score

### Build Custom Dataset

In [769]:
class MTCheckDataset(Dataset):
    
    def __init__(self, data_path):
        self.data_path = data_path
        self.references, self.candidates, self.scores, self.labels = self.import_data(self.data_path)
        self.samples = self.generate_features(self.references, self.candidates)

    def __getitem__(self, idx):
        sample = torch.tensor(self.samples.loc[idx]).float()
        label = self.labels[idx]
        return (sample, label)
        
    def __len__(self):
        return len(self.samples)
    
    def import_data(self, file):
        # English references
        references = []
        # Translated English sentences
        candidates = []
        # Bleu scores for translations
        scores = []
        # Labels indicating human (H-->0) or machine translation (M-->1)
        labels = []
        with open(file) as f:
            lines = f.readlines()
            for i, line in enumerate(lines):
                line = line.strip('\n')
                if i % 6 == 1:
                    references.append(line)
                elif i % 6 == 2:
                    candidates.append(line)
                elif i % 6 == 3:
                    scores.append(float(line))
                elif i % 6 == 4:
                    if line == 'H':
                        labels.append(0)
                    else:
                        labels.append(1)
        return references, candidates, scores, labels
    
    def generate_features(self, references, candidates):
        # define empty arrays for metrics
        blue_1_ind, blue_2_ind, blue_3_ind, blue_4_ind, blue_2_cumu, blue_3_cumu, blue_4_cumu = [], [], [], [], [], [], []
        gleu_default, nist_defualt = [], []

        # compute 9 different metrics
        for ref, cand in zip(references, candidates):
            ref_split = ref.split(' ')
            ref_input = [ref_split]
            cand_input = cand.split(' ')
            # individual bleu scores
            blue_1_ind.append(sentence_bleu(ref_input, cand_input, weights=(1, 0, 0, 0)))
            blue_2_ind.append(sentence_bleu(ref_input, cand_input, weights=(0, 1, 0, 0)))
            blue_3_ind.append(sentence_bleu(ref_input, cand_input, weights=(0, 0, 1, 0)))
            blue_4_ind.append(sentence_bleu(ref_input, cand_input, weights=(0, 0, 0, 1)))
            # cumulative bleu scores
            blue_2_cumu.append(sentence_bleu(ref_input, cand_input, weights=(0.5, 0.5, 0, 0)))
            blue_3_cumu.append(sentence_bleu(ref_input, cand_input, weights=(0.33, 0.33, 0.33, 0)))
            blue_4_cumu.append(sentence_bleu(ref_input, cand_input, weights=(0.25, 0.25, 0.25, 0.25)))
            # other scores
            gleu_default.append(sentence_gleu(ref_input, cand_input))
            nist_defualt.append(sentence_nist(ref_input, cand_input))
            #ribes_defualt.append(sentence_ribes(ref_input, cand_input))

        # pass metrics to dataframe and return
        feature_dict = {"blue_1_ind": blue_1_ind, "blue_2_ind": blue_2_ind, "blue_3_ind": blue_3_ind,
                        "blue_4_ind": blue_4_ind, "blue_2_cumu": blue_2_cumu, "blue_3_cumu": blue_3_cumu,
                        "blue_4_cumu": blue_4_cumu, "gleu_default": gleu_default, "nist_defualt": nist_defualt}
        features = pd.DataFrame(feature_dict)
        return features

In [770]:
# define train and test data
train_data = MTCheckDataset(data_path="train.txt")
test_data = MTCheckDataset(data_path="test.txt")

In [771]:
# define train and test dataloaders
train_loader = DataLoader(train_data, batch_size=16, shuffle=True)
test_loader = DataLoader(test_data, batch_size=8, shuffle=True)

### Feed-Forward Neural Network

In [772]:
# TODO: add optuna hyperparm search
class FFNN(LightningModule):
    def __init__(self, input_dim, hidden_dim, out_dim):
        super().__init__()
        self.hidden_dim = hidden_dim
        self.input_dim = input_dim
        self.out_dim = out_dim
        self.hidden = nn.Linear(self.input_dim, self.hidden_dim)
        self.out = nn.Linear(self.hidden_dim, self.out_dim)
        self.activation = F.tanh
        self.droput = nn.Dropout(0.1)
    
    def forward(self, x):
        x = self.hidden(x)
        x = self.activation(x)
        x = self.droput(x)
        x = F.sigmoid(self.out(x))
        return x

In [773]:
ffnn = FFNN(input_dim=9, hidden_dim=256, out_dim=2)

### The Training Loop

In [774]:
# define the optimizer
optimizer = torch.optim.Adam(ffnn.parameters(), lr=0.01)

In [775]:
# define the loss function
loss = nn.CrossEntropyLoss()

In [777]:
# training loop
num_epochs = 5
total = len(test_data)
for epoch in range(num_epochs):
    losses = list()
    for batch in train_loader:
        X, y = batch
        # 1. forward
        predicted_vector = ffnn(X)
        # 2. compute objective function
        J = loss(predicted_vector, y)
        # 3. clear the gradients
        optimizer.zero_grad()
        # 4. accumulate partial derivatives of J w.r.t to params
        J.backward()
        # 5. step in opp direction of gradient
        optimizer.step()     
        losses.append(J.item())

    train_loss = torch.tensor(losses).mean()

    all_labels, all_preds = list(), list()
    for batch in test_loader:
        X, y = batch
        # 1. forward
        predicted_vector = ffnn(X)
        # for accuracy
        predicted_labels = torch.argmax(predicted_vector, dim=1)
        # for f1 score
        all_labels.append(y.tolist())
        all_preds.append(predicted_labels.tolist())

    # flatten the lists
    all_labels = [item for sublist in all_labels for item in sublist]
    all_preds = [item for sublist in all_preds for item in sublist]
    f1 = f1_score(all_labels, all_preds)
    acc = accuracy_score(all_labels, all_preds)
    
    print(f'epoch {epoch+1}, train_loss: {train_loss:.2f}, f1_score: {f1:.2f}, accuracy: {acc:.2f}')

epoch 1, train_loss: 0.58, f1_score: 0.73, accuracy: 0.71
epoch 2, train_loss: 0.59, f1_score: 0.71, accuracy: 0.66
epoch 3, train_loss: 0.58, f1_score: 0.74, accuracy: 0.75
epoch 4, train_loss: 0.58, f1_score: 0.73, accuracy: 0.74
epoch 5, train_loss: 0.57, f1_score: 0.73, accuracy: 0.72


### SVM Classifier

In [748]:
clf = SVC()
clf.fit(train_data.samples, train_data.labels)

SVC()

In [749]:
predictions = clf.predict(test_data.samples)

In [764]:
f1_score = sklearn.metrics.f1_score(test_data.labels, predictions)
print(f1_score)

0.7500000000000001


### Feature Importance Review