In [1]:
# Importing the libraries
import torch as th
import torch.nn as nn
from torchtext import vocab
import pickle as pl
from tqdm import tqdm

# Import model
from Models.RNN import GRUNet
from Models.Transformer import Transformer

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
# Load Data
with open('data/English_encodings.pkl', 'rb') as f:
    english_encodings,english_sentences,Paddings_en,Vocab_en = pl.load(f)
with open('data/French_encodings.pkl', 'rb') as f:
    french_encodings,french_sentences,Paddings_fr,Vocab_fr = pl.load(f)

# Get the vocabulary size
src_vocab_size = Vocab_fr.__len__()
tgt_vocab_size = Vocab_en.__len__()
src_padding_idx = Vocab_fr.__getitem__('<Pad>')
tgt_padding_idx = Vocab_en.__getitem__('<Pad>')


# Load Train,Vaildation and Test data
with open('data/Train_data.pkl', 'rb') as f:
    X_train,Y_train,src_padding_mask,tgt_padding_mask = pl.load(f)

with open('data/Validation_data.pkl', 'rb') as f:
    X_vali,Y_vali,src_padding_mask_vali = pl.load(f)

with open('data/Test_data.pkl', 'rb') as f:
    X_test,Y_test,src_padding_mask_test = pl.load(f)

# Define no look ahead mask
tgt_mask = th.triu(th.full((27, 27), float('-inf')), diagonal=1)

In [3]:
# Set the hyperparameters
d_model = 512
dim_input = d_model
dim_output = d_model
dim_recurrent = d_model
num_sequence = 27
output = tgt_vocab_size
src_padding_idx = Vocab_en.__getitem__('<Pad>')

# Define model with same architecture
RNN = GRUNet(dim_input, dim_recurrent, num_sequence, src_padding_idx, tgt_padding_idx, src_vocab_size,tgt_vocab_size)

# Load trained parameters
RNN.load_state_dict(th.load('Models/RNN.pt',map_location=th.device('cpu')))
RNN.eval()

GRUNet(
  (Embedding_src): Embedding(343, 512, padding_idx=205)
  (Embedding_tgt): Embedding(207, 512, padding_idx=205)
  (Encoder): GRU(512, 512, num_layers=27, batch_first=True, dropout=0.1)
  (Decoder): GRUCell(512, 512)
  (Forward): Linear(in_features=512, out_features=207, bias=True)
)

In [4]:
# Intialize the model with set hyperparameters
T = 27
d_model = 512 # Dimension of the model (Embedding size)
d_ff = 2048 # Dimension of the feedforward network model in transformer
nhead = 8 # Number of heads in the multiheadattention models
dk = d_model//nhead
dv = d_model//nhead
num_layers = 6


# Initialize the model
TransformerModel = Transformer(
    T = T,
    d_model = d_model,
    nhead = nhead,
    d_ff = d_ff,
    dk = dk,
    dv = dv,
    num_layers = num_layers,
    src_vocab_size = src_vocab_size,
    tgt_vocab_size = tgt_vocab_size,
    src_padding_idx = src_padding_idx,
    tgt_padding_idx = tgt_padding_idx,
    dropout=0.1
)

# Load trained parameters
TransformerModel.load_state_dict(th.load('Models/Transformer.pt',map_location=th.device('cpu')))
TransformerModel.eval()

Transformer(
  (Embedding_src): Embedding(343, 512, padding_idx=205)
  (Embedding_tgt): Embedding(207, 512, padding_idx=205)
  (PositionalEncoding): PositionalEncoding()
  (Linear_out): Linear(in_features=512, out_features=207, bias=True)
  (encoder): Encoder(
    (encoders): ModuleList(
      (0): EncoderLayer(
        (MHA): MHA(
          (Softmax): Softmax(dim=1)
          (Qs): Linear(in_features=512, out_features=512, bias=True)
          (Ks): Linear(in_features=512, out_features=512, bias=True)
          (Vs): Linear(in_features=512, out_features=512, bias=True)
          (Attention): Attention(
            (Softmax): Softmax(dim=1)
            (dropout): Dropout(p=0.1, inplace=False)
          )
          (out): Linear(in_features=512, out_features=512, bias=True)
        )
        (LayerNorm1): LayerNorm(
          (LayerNorm): LayerNorm((512,), eps=1e-05, elementwise_affine=True)
        )
        (Feedforward): Feedforward(
          (Linear1): Linear(in_features=512, out_f

In [5]:
# Define loss function
loss_fn = nn.CrossEntropyLoss(ignore_index = Vocab_en.__getitem__('<Pad>'))

# Define function to predict the loss of a sentence
def Predict_loss_Transformer(X,Y,print_sentence = True):

    # Initialize the prediction
    Prediction = th.zeros(27,dtype=th.int32)
    Prediction[:] = Vocab_en.__getitem__('<Pad>')
    
    # Initialize the prediction mask
    Prediction_mask = th.zeros((27,d_model))

    # Initialize the prediction vectors
    Prediction_vectors = th.zeros((27,tgt_vocab_size))
    Prediction_vectors[0][204] = 1

    # Initialize the first word
    Prediction[0] = Vocab_en.__getitem__('<Start>')
    Prediction_mask[0] = 1

    # Initialize the source mask
    src_mask = th.ones((27,d_model))
    src_mask[-(X == Vocab_fr.__getitem__('<Pad>')).sum():] = 0

    # loop over the sentence
    for i in range(1,27):

        # Predict the next word
        out = TransformerModel(X,Prediction,src_padding_mask = src_mask,tgt_mask=tgt_mask,tgt_padding_mask = Prediction_mask)

        # Update the prediction
        Prediction[i] = out.argmax(1)[i]
        Prediction_vectors[i] = out[i]
        Prediction_mask[i] = 1

    # Print the predicted and true sentence
    if print_sentence:
        # Print the predicted sentence
        Senctence_pred = ""
        for word in Vocab_en.lookup_tokens(Prediction.tolist()):
            Senctence_pred += " " + word

        print("Transformer: Predicted Senctence:")
        print(Senctence_pred)
        print("")

        # Print the true sentence
        Senctence_true = ""
        for word in Vocab_en.lookup_tokens(Y.tolist()):
            Senctence_true += " " + word
        print("True Senctence:")
        print(Senctence_true)

    # Calculate the loss
    loss = loss_fn(Prediction_vectors,Y)

    return loss,Prediction

# Define function print the predicted sentence
def print_sentence(Prediction,print_sentence = True):

    # Print the predicted sentence
    if print_sentence:

        # Get the predicted sentence
        Senctence_pred = ""
        for word in Vocab_en.lookup_tokens(Prediction.tolist()):
            Senctence_pred += " " + word

        # Print the predicted sentence
        print("RNN: Predicted Senctence:")
        print(Senctence_pred)
        print("")


In [6]:
# Get test losses for RNN and Transformer
test_losses_rnn = th.zeros(100)
test_losses_Transformer = th.zeros(100)

# get predictions for Transformer
Transformer_Predictions = th.zeros((100,27),dtype=th.int32)

# Set predict token
predict_token = th.tensor([Vocab_en.__getitem__('<Start>')],dtype = th.int32)

# Predict the first 100 rnn sentences
out = RNN(X_test[0:100],predict_token)

# Predict the first 100 transformer sentences
for idx,sample in enumerate(X_test[0:100]):
    
    # Get prediction and set loss
    test_losses_Transformer[idx],Transformer_Predictions[idx] = Predict_loss_Transformer(sample,Y_test[idx],print_sentence = False)
    test_losses_rnn[idx] = loss_fn(out[idx],Y_test[idx])


In [14]:
# Calulate the accuracy of the Transformer
element_counter = 0
correct_preds = 0
for idx in range(100):
    for j in range(27):
        if Y_test[idx][j] == Vocab_en.__getitem__('<Pad>'):
            break
        if Transformer_Predictions[idx][j] == Y_test[idx][j]:
            correct_preds += 1
        element_counter += 1

print(f"Accuracy: {correct_preds/element_counter}")


Accuracy: 0.7488888888888889


In [10]:
# Print the mean and std of the losses
print(f"RNN: Mean: {test_losses_rnn.mean().item()} ,  std: {test_losses_rnn.std().item()}, N = {len(test_losses_rnn)}")
print(f"Transformer Mean: {test_losses_Transformer.mean().item()} ,  std: {test_losses_Transformer.std().item()}, N = {len(test_losses_Transformer)}")

print("")

# Print the minimum and maximum loss
print(f"RNN: Minimum loss: {test_losses_rnn.min().item()} idx = {test_losses_rnn.argmin()}, Maximum loss: {test_losses_rnn.max().item()}, idx = {test_losses_rnn.argmax()}")
print(f"Transformer: minimum: {test_losses_Transformer.min().item()} idx = {test_losses_Transformer.argmin()}, Maximum loss: {test_losses_Transformer.max().item()} idx = {test_losses_Transformer.argmax()}")

RNN: Mean: 3.7049672603607178 ,  std: 0.9800503849983215, N = 100
Transformer Mean: 2.9030420780181885 ,  std: 1.9188352823257446, N = 100

RNN: Minimum loss: 2.5856926441192627 idx = 87, Maximum loss: 8.312722206115723, idx = 22
Transformer: minimum: 0.40354448556900024 idx = 3, Maximum loss: 6.520341396331787 idx = 21


In [11]:
# Print predictions of test samples
for idx in [3,12,63,87,21,22]:

    print(f"idx = {idx}")
    print(f"Loss RNN: {test_losses_rnn[idx].item()}")
    print_sentence(out[idx].argmax(1),Y_test[idx])
    print(f"Loss Transformer: {test_losses_Transformer[idx].item()}")
    _ = Predict_loss_Transformer(X_test[idx],Y_test[idx])
    print("")


idx = 3
Loss RNN: 3.5277204513549805
RNN: Predicted Senctence:
 is she is is , during , , , it is <Pad> <Pad> in <Pad> <Pad> <Pad> <Pad> <Pad> <Pad> <Pad> <Pad> <Pad> <Pad> <Pad> <Pad> <Pad>

Loss Transformer: 0.40354448556900024
Transformer: Predicted Senctence:
 <Start> the peach is their favorite favorite fruit , but the apple is our least favorite . <End> <Pad> <Pad> <Pad> <Pad> <Pad> <Pad> <Pad> <Pad> <Pad>

True Senctence:
 <Start> the peach is their least favorite fruit , but the apple is our least favorite . <End> <Pad> <Pad> <Pad> <Pad> <Pad> <Pad> <Pad> <Pad> <Pad>

idx = 12
Loss RNN: 4.243879318237305
RNN: Predicted Senctence:
 is she is is , during , , , it is <Pad> <Pad> in <Pad> <Pad> <Pad> <Pad> <Pad> <Pad> <Pad> <Pad> <Pad> <Pad> <Pad> <Pad> <Pad>

Loss Transformer: 5.349869251251221
Transformer: Predicted Senctence:
 <Start> the united states is wet spring spring spring and it is nice mild january spring <End> <Pad> <Pad> <Pad> <Pad> <Pad> <Pad> <Pad> <Pad> <Pad> <Pad>

In [13]:
# Print predictions of test sample 1 and 2
for idx in [1,2]:

    print(f"idx = {idx}")
    print(f"Loss RNN: {test_losses_rnn[idx].item()}")
    print_sentence(out[idx].argmax(1),Y_test[idx])
    print(f"Loss Transformer: {test_losses_Transformer[idx].item()}")
    _ = Predict_loss_Transformer(X_test[idx],Y_test[idx])
    print("")

idx = 1
Loss RNN: 3.5585129261016846
RNN: Predicted Senctence:
 is she is is , during , , , it is <Pad> <Pad> in <Pad> <Pad> <Pad> <Pad> <Pad> <Pad> <Pad> <Pad> <Pad> <Pad> <Pad> <Pad> <Pad>

Loss Transformer: 3.016312599182129
Transformer: Predicted Senctence:
 <Start> the united states is relaxing may may may , it it is mild dry . . <End> <Pad> <Pad> <Pad> <Pad> <Pad> <Pad> <Pad> <Pad> <Pad>

True Senctence:
 <Start> the united states is nice during may , and it is quiet in august . <End> <Pad> <Pad> <Pad> <Pad> <Pad> <Pad> <Pad> <Pad> <Pad> <Pad>

idx = 2
Loss RNN: 2.6494274139404297
RNN: Predicted Senctence:
 is she is is , during , , , it is <Pad> <Pad> in <Pad> <Pad> <Pad> <Pad> <Pad> <Pad> <Pad> <Pad> <Pad> <Pad> <Pad> <Pad> <Pad>

Loss Transformer: 1.0401921272277832
Transformer: Predicted Senctence:
 <Start> california is sometimes warm june june but but it is cold snowy autumn fall . <End> <Pad> <Pad> <Pad> <Pad> <Pad> <Pad> <Pad> <Pad> <Pad> <Pad>

True Senctence:
 <Start> c