In [9]:
# importing the libraries 
import torch 
from torch import nn
from torch.utils.data import DataLoader, Dataset
import torch.nn.functional as F
import nltk 
nltk.download('punkt')
nltk.download('stopwords')
from nltk.corpus import stopwords
import random
from torch import cuda
import config_hp as hp
from pprint import pprint
import pickle 
from  data_maker import *
WANDB_SILENT = "true"
import wandb
import torch.optim as optim
from torch.optim import lr_scheduler
wandb.login()

wandb.init(project="LSTM-anlp")

[nltk_data] Downloading package punkt to /home2/jainit/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package stopwords to
[nltk_data]     /home2/jainit/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!




0,1
Learning Rate,██████████▆▆▆▆▅▅▅▄▄▄▄▃▃▃▃▃▂▂▂▂▁▁▁▁▁▁▁▁▁▁
Perplexity,█▄▃▃▃▂▂▂▂▂▂▂▂▂▁▂▂▁▂▂▁▂▂▁▂▂▁▂▂▁▂▂▁▂▂▁▂▂▁▂
Train Loss,█▄▄▃▃▃▂▂▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
Val Loss,█▆▄▃▃▂▂▂▂▂▂▂▁▂▂▁▁▁▁▁▁▁▁▁▁▁▂▂▁▁▂▂▂▁▁▁▁▂▂▁

0,1
Learning Rate,0.00011
Perplexity,3.18553
Train Loss,0.979
Val Loss,1.15862


In [10]:
class LSTM_LM(nn.Module):
    def __init__(self, glove_embeddings,hidden_layer, num_layers, dropout):
        super(LSTM_LM, self).__init__()
        self.vocab_size = glove_embeddings.shape[0]
        self.embedding_dim = glove_embeddings.shape[1]
        self.hidden_layer = hidden_layer
        self.num_layers = num_layers
        self.dropout = dropout
        self.embedding = nn.Embedding.from_pretrained(glove_embeddings)
        self.lstm = nn.LSTM(input_size=self.embedding_dim, hidden_size=hidden_layer , num_layers=self.num_layers, dropout=dropout, batch_first = True)
        self.fc = nn.Linear(hidden_layer,self.vocab_size )
        self.dropout = nn.Dropout(dropout)
        self.softmax = nn.LogSoftmax(dim=1)
    def forward(self, x):
        # print(x.shape)
       
        embeds = self.embedding(x)
        # print(embeds.shape)
        
        lstm_out, hidden = self.lstm(embeds)
        lstm_out = self.dropout(lstm_out)
        out = self.fc(lstm_out)
       
        return out, hidden
        

In [11]:
with open('../data/embeddings.pkl', 'rb') as f:
    glove_embeddings = pickle.load(f)

In [12]:
with open('../data/word_to_id.json', 'r') as f:
    word2idx = json.load(f)


In [13]:
with open('../data/lm_dataloader.pkl','rb') as f:
    loaders = pickle.load(f)

In [14]:
wandb.init(project="LSTM-anlp", name="Cross_entropy_with_ii",config = {
    "epochs": 50 ,
    "optimizer": "Adam",
    "batch_size": hp.BATCH_SIZE,
    "embeddings_dim":50,
    "drop_out":0.5,
    "loss_fn":"CEntropy"

})

model = LSTM_LM(glove_embeddings, hp.HIDDEN_LAYER, 2, hp.DROPOUT)
wandb.watch(model)
wandb.config.update({"model":model})

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = model.to(device)
criterion = nn.CrossEntropyLoss(ignore_index=0)
optimizer = optim.Adam(model.parameters(), lr=hp.LEARNING_RATE)
scheduler = lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', patience=2, factor=0.8, verbose=True)





In [15]:
for epoch in range(hp.EPOCHS):
    train_loss = 0.0
    model.train()
    for i, (x, y) in enumerate(loaders['train']):
        x = x.to(device)
        y = y.to(device)
        optimizer.zero_grad()
        out, hidden = model(x)
        loss = criterion(out.view(-1, model.vocab_size), y.view(-1))
        loss.backward()
        optimizer.step()
        train_loss += loss.item()
    print(f'Epoch: {epoch+1}, Train Loss: {train_loss/len(loaders["train"])} ; Perplexity: {np.exp(train_loss/len(loaders["train"]))}')
    wandb.log({"Train Loss": train_loss/len(loaders["train"])})
    wandb.log({"Perplexity": np.exp(train_loss/len(loaders["train"]))})
    
    model.eval()
    val_loss = 0.0
    with torch.no_grad():
        for i, (x, y) in enumerate(loaders['val']):
            x = x.to(device)
            y = y.to(device)
            out, hidden = model(x)
            loss = criterion(out.view(-1, model.vocab_size), y.view(-1))
            val_loss += loss.item()
        print(f'Epoch: {epoch+1}, Val Loss: {val_loss/len(loaders["val"])} ; Perplexity: {np.exp(val_loss/len(loaders["val"]))}')
        wandb.log({"Val Loss": val_loss/len(loaders["val"])})
        wandb.log({"Perplexity": np.exp(val_loss/len(loaders["val"]))})
        scheduler.step(val_loss/len(loaders["val"]))
        wandb.log({"Learning Rate": optimizer.param_groups[0]['lr']})
        if (epoch+1)%5==0:
            torch.save(model.state_dict(), f'../data/models/lstm_{epoch+1}.pt')
            print(f'Model Saved at epoch {epoch+1}')
           
    


Epoch: 1, Train Loss: 5.923040959372449 ; Perplexity: 373.54592644102246
Epoch: 1, Val Loss: 5.416933751264155 ; Perplexity: 225.18758168809623
Epoch: 2, Train Loss: 5.239487299278601 ; Perplexity: 188.5733959009218
Epoch: 2, Val Loss: 4.934912829999103 ; Perplexity: 139.06102005448506
Epoch: 3, Train Loss: 4.901405631352081 ; Perplexity: 134.4786743365455
Epoch: 3, Val Loss: 4.705513660481434 ; Perplexity: 110.5550581295374
Epoch: 4, Train Loss: 4.719531171103276 ; Perplexity: 112.11567727505823
Epoch: 4, Val Loss: 4.570527209351394 ; Perplexity: 96.59502214990032
Epoch: 5, Train Loss: 4.597009121990407 ; Perplexity: 99.18721470777889
Epoch: 5, Val Loss: 4.486106499930881 ; Perplexity: 88.77512615839568
Model Saved at epoch 5
Epoch: 6, Train Loss: 4.50332702400842 ; Perplexity: 90.31711921430741
Epoch: 6, Val Loss: 4.410634738720016 ; Perplexity: 82.32169969457523
Epoch: 7, Train Loss: 4.431610133856345 ; Perplexity: 84.06666658206494
Epoch: 7, Val Loss: 4.3645172592819925 ; Perplexit