In [1]:
# Importing the libraries
import torch as th
import torch.nn as nn
from torchtext import vocab
import pickle as pl
from tqdm import tqdm
import numpy as np

# Import model
from Models.RNN import GRUNet

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
# Load Data
with open('data/English_encodings.pkl', 'rb') as f:
    english_encodings,english_sentences,Paddings_en,Vocab_en = pl.load(f)
with open('data/French_encodings.pkl', 'rb') as f:
    french_encodings,french_sentences,Paddings_fr,Vocab_fr = pl.load(f)

# Get the vocabulary size
src_vocab_size = Vocab_fr.__len__()
tgt_vocab_size = Vocab_en.__len__()
src_padding_idx = Vocab_fr.__getitem__('<Pad>')
tgt_padding_idx = Vocab_en.__getitem__('<Pad>')


# Load Train,Vaildation and Test data
with open('data/Train_data.pkl', 'rb') as f:
    X_train,Y_train,src_padding_mask,tgt_padding_mask = pl.load(f)

with open('data/Validation_data.pkl', 'rb') as f:
    X_vali,Y_vali,src_padding_mask_vali = pl.load(f)

with open('data/Test_data.pkl', 'rb') as f:
    X_test,Y_test,src_padding_mask_test = pl.load(f)

tgt_mask = th.triu(th.full((27, 27), float('-inf')), diagonal=1)

In [3]:
n_train = 10000
batch_size = 50
# Define Training Function
def train(model, optimizer, loss_fn, n_epochs, n_batches, X_train, Y_train):


     # Check if CUDA/mps is available
    if th.cuda.is_available():
        device = th.device("cuda")
    elif th.backends.mps.is_available():
        device = "cpu"
    else:
        device = "cpu"
    
    # Move the model to the device
    model.to(device)

    # Predict token
    predict_token = th.tensor([Vocab_fr.__getitem__('<Start>')],dtype=th.int32)

    # Move the data to the device
    X_train = X_train.to(device)
    Y_train = Y_train.to(device)
    predict_token = predict_token.to(device)

    loss_batches = []
    # Train for n_epochs
    for epoch in range(n_epochs):
        loss_epoch = 0
        for batch in range(n_batches):
            model.to(device)

            # Get the data

            X_batch = X_train[batch]
            Y_batch = Y_train[batch]

            # Zero the gradients
            optimizer.zero_grad()

            # Forward pass
            y_pred = model(X_batch,predict_token,device)

            # Compute the loss
            loss = 0 
            for sample in range(batch_size):
                loss += loss_fn(y_pred[sample], Y_batch[sample])

            # Backward pass
            loss.backward()

            # Update the parameters
            optimizer.step()


            # Print the loss
            loss_batches.append(loss.item())
            loss_epoch += loss.item()

        print('Epoch: %d, Batch: %d, Loss: %f' % (epoch, batch, loss_epoch/n_train))
    return loss_batches

In [5]:
d_model = 512
dim_input = d_model
dim_output = d_model
dim_recurrent = d_model
num_sequence = 27
output = tgt_vocab_size
src_padding_idx = Vocab_en.__getitem__('<Pad>')

Model = GRUNet(dim_input, dim_recurrent, num_sequence, src_padding_idx, tgt_padding_idx, src_vocab_size,tgt_vocab_size)

# Define the loss function
loss_fn = nn.CrossEntropyLoss()

# Define the optimizer LSTM
optimizer = th.optim.Adam(Model.parameters(), lr=0.0001)

#define the number of epochs
n_epochs = 200
batch_size = 50
n_train = 10000

# Define the number of batches
n_batches = 200

loss_batches = train(Model, optimizer, loss_fn, n_epochs, n_batches, X_train, Y_train)

th.save(Model.state_dict(), "Models/RNN.pt")

with open('Models/RnnLoss.pkl', 'wb') as f:
    pl.dump(loss_batches,f)

Epoch: 0, Batch: 199, Loss: 3.177315
Epoch: 1, Batch: 199, Loss: 2.119747
Epoch: 2, Batch: 199, Loss: 2.011071
Epoch: 3, Batch: 199, Loss: 1.975379
Epoch: 4, Batch: 199, Loss: 1.926619
Epoch: 5, Batch: 199, Loss: 1.896645
Epoch: 6, Batch: 199, Loss: 1.882116
Epoch: 7, Batch: 199, Loss: 1.871537
Epoch: 8, Batch: 199, Loss: 1.863516
Epoch: 9, Batch: 199, Loss: 1.856687
Epoch: 10, Batch: 199, Loss: 1.852032
Epoch: 11, Batch: 199, Loss: 1.847987
Epoch: 12, Batch: 199, Loss: 1.845449
Epoch: 13, Batch: 199, Loss: 1.842615
Epoch: 14, Batch: 199, Loss: 1.840647
Epoch: 15, Batch: 199, Loss: 1.838642
Epoch: 16, Batch: 199, Loss: 1.837027
Epoch: 17, Batch: 199, Loss: 1.835567
Epoch: 18, Batch: 199, Loss: 1.834379
Epoch: 19, Batch: 199, Loss: 1.833399
Epoch: 20, Batch: 199, Loss: 1.832366
Epoch: 21, Batch: 199, Loss: 1.831525
Epoch: 22, Batch: 199, Loss: 1.830869
Epoch: 23, Batch: 199, Loss: 1.830263
Epoch: 24, Batch: 199, Loss: 1.829708
Epoch: 25, Batch: 199, Loss: 1.829092
Epoch: 26, Batch: 199,

In [None]:
Model.to("cpu")
predict_token = th.tensor([Vocab_en.__getitem__('<Start>')],dtype = th.int32)

predict_token_batch = th.zeros((n_batches,1),dtype=th.int32)
predict_token_batch[:] = Vocab_fr.__getitem__('<Pad>')


out = Model(X_train[0],predict_token)
out[0].argmax(dim=1)

tensor([  0,   6,   0,   0,   1,   5,   1,   1,   1,   4,   0, 205, 205,   3,
        205, 205, 205, 205, 205, 205, 205, 205, 205, 205, 205, 205, 205])