In [None]:
import torch
import torch.nn as nn
import numpy as np
import pandas as pd
from plotnine import *
from datetime import datetime

In [None]:
start_time = datetime.now()

In [None]:
%run Kaggle_CommonLit_Challenge_Func.ipynb

### Single Batch BiLSTM-based Predictive Model 

In [None]:
# Set the hyperparameters
words_per_essay = 77
vec_len_per_word=50
vec_len_hidden_layer=100

### Step 1: Read the excerpt embeddings

In [None]:
essay_embed = np.load('Embeddings_Valid_Excerpt.npy')
essay_embed.shape

In [None]:
lit_score_frame = pd.read_csv('Scores_Valid_Excerpt.csv')
lit_score = torch.tensor(lit_score_frame['target'], dtype=torch.float) #dtype has to be float32 and not float64

In [None]:
# Spread of the 'target' values
lit_score_frame['target'].describe()

In [None]:
num_essays = lit_score_frame.shape[0]
torch_essay = torch.zeros((words_per_essay, num_essays, vec_len_per_word))

for idx in range(num_essays):
    start_idx = 0 + idx*words_per_essay
    end_idx = start_idx + words_per_essay
    torch_essay[:, idx, :] = torch.tensor(essay_embed[start_idx:end_idx, :])
    
torch_essay.size()

### Step 2: Train the BiLSTM model

In [None]:
torch.manual_seed(10)
lit_model = BiLSTM(in_size=vec_len_per_word, hidden_layer_size=vec_len_hidden_layer, output_size=[30, 10, 1])
lit_loss_function = nn.MSELoss()
lit_optimizer = torch.optim.Adam(lit_model.parameters(), lr=0.001)

In [None]:
# Determine the number of model parameters
sum(p.numel() for p in lit_model.parameters() if p.requires_grad)

In [None]:
num_epochs = 100
loss_per_epoch = []

for epoch in range(num_epochs):
    if epoch%10 == 0:
        print(epoch)
    
    current_fit = lit_model(torch_essay)
    current_loss = lit_loss_function(current_fit, torch.unsqueeze(lit_score, 1))  # 2
    loss_per_epoch.append(current_loss.item())
     
    lit_optimizer.zero_grad()     # 3
    current_loss.backward()       # 4
    lit_optimizer.step()          # 5

In [None]:
pd.Series(loss_per_epoch).describe()

In [None]:
per_itr_loss_frame = pd.DataFrame({'Itr':range(100), 'MSE_Loss':loss_per_epoch})
per_itr_loss_frame.head()

In [None]:
list(range(0, 100, 5))

In [None]:
# Code is split across multiple lines for readability and ease of modification
loss_plot = ggplot(per_itr_loss_frame, aes(x='Itr', y='MSE_Loss'))
loss_plot = loss_plot + geom_point() + geom_line() + scale_x_continuous(breaks=range(0, 100, 5))
loss_plot = loss_plot + labs(title='MSE Loss Across Iterations', x='Iteration', y='MSE Loss')
loss_plot = loss_plot + theme(plot_title=element_text(face='bold')
                             , axis_title_x=element_text(face='plain', size=12)
                             , axis_title_y=element_text(face='plain', size=12)
                             , figure_size=(15, 5))

loss_plot

In [None]:
end_time = datetime.now()

In [None]:
time_diff = end_time - start_time
time_diff

In [None]:
# Execution time in minutes
time_diff.total_seconds()/60

### Multi-batch BiLSTM model

In [None]:
batch_size = 32
num_batches = int(np.ceil(lit_score_frame.shape[0]/batch_size))
print(num_batches)

rng = np.random.default_rng(100)
elements_per_batch = rng.choice(num_batches, lit_score_frame.shape[0], replace=True)
np.unique(elements_per_batch, return_counts=True)

In [None]:
# 70:30 split between training and test set
0.7 * 2146

In [None]:
idx_count_per_batch = np.unique(elements_per_batch, return_counts=True)[1]
np.min(np.where(idx_count_per_batch.cumsum() >= 1502)[0])

In [None]:
trng_set_batches = list(range(0, 48))
test_set_batches = list(range(48, 68))

In [None]:
test_set_batches

In [None]:
torch.manual_seed(10)
# lit_model = BiLSTM(in_size=vec_len_per_word, hidden_layer_size=vec_len_hidden_layer, output_size=[50, 1])
lit_model = BiLSTM(in_size=vec_len_per_word, hidden_layer_size=vec_len_hidden_layer, output_size=[30, 10, 1])
# lit_model = BiLSTM(in_size=vec_len_per_word, hidden_layer_size=vec_len_hidden_layer, output_size=[100, 50, 25, 1])
lit_loss_function = nn.MSELoss()
lit_optimizer = torch.optim.Adam(lit_model.parameters(), lr=0.001)

In [None]:
num_epochs = 20
test_loss_per_epoch = []

for epoch in range(num_epochs):
    if epoch%10 == 0:
        print(epoch)
    
    for trng_batch_idx in trng_set_batches:
        current_batch_idx = np.where(elements_per_batch == trng_batch_idx)[0]
        current_fit = lit_model(torch_essay[:, current_batch_idx, :])
        current_loss = lit_loss_function(current_fit, torch.unsqueeze(lit_score[current_batch_idx], 1))  # 2
        lit_optimizer.zero_grad()     # 3
        current_loss.backward()       # 4
        lit_optimizer.step()          # 5
        
    temp_loss_per_batch = 0
    for test_batch_idx in test_set_batches:
        current_batch_idx = np.where(elements_per_batch == test_batch_idx)[0]
        current_fit = lit_model(torch_essay[:, current_batch_idx, :])
        current_loss = lit_loss_function(current_fit, torch.unsqueeze(lit_score[current_batch_idx], 1))  # 2
        temp_loss_per_batch += current_loss.item()
    
    test_loss_per_epoch.append(temp_loss_per_batch/len(test_set_batches))

In [None]:
np.min(test_loss_per_epoch)

In [None]:
np.min(np.sqrt(test_loss_per_epoch))

In [None]:
per_itr_test_loss_frame = pd.DataFrame({'Itr':range(20), 'MSE_Loss':test_loss_per_epoch})
# Code is split across multiple lines for readability and ease of modification
loss_plot = ggplot(per_itr_test_loss_frame, aes(x='Itr', y='MSE_Loss'))
loss_plot = loss_plot + geom_point() + geom_line() + scale_x_continuous(breaks=range(0, 20, 1))
loss_plot = loss_plot + labs(title='MSE Loss Across Iterations (Test Set)', x='Iteration', y='MSE Loss')
loss_plot = loss_plot + theme(plot_title=element_text(face='bold')
                             , axis_title_x=element_text(face='plain', size=12)
                             , axis_title_y=element_text(face='plain', size=12)
                             , figure_size=(15, 5))

loss_plot

In [None]:
end_time_2 = datetime.now()
time_diff_2 = end_time_2 - start_time
time_diff_2

In [None]:
# Time (in minutes)
time_diff_2.total_seconds()/60