In [1]:
# importing the libraries 
import torch 
from torch import nn
from torch.utils.data import DataLoader, Dataset
import torch.nn.functional as F
import nltk 
nltk.download('punkt')
nltk.download('stopwords')
from nltk.corpus import stopwords
import random
from torch import cuda
import config_hp as hp
from pprint import pprint
import pickle 
from  data_maker import *
WANDB_SILENT = "true"
import wandb
import torch.optim as optim
from torch.optim import lr_scheduler
wandb.login()

wandb.init(project="LSTM-anlp")

[nltk_data] Downloading package punkt to /home2/jainit/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package stopwords to
[nltk_data]     /home2/jainit/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package punkt to /home2/jainit/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
[34m[1mwandb[0m: Currently logged in as: [33mjain_it[0m. Use [1m`wandb login --relogin`[0m to force relogin


Problem at: /tmp/ipykernel_38244/1585703692.py 22 <module>


KeyboardInterrupt: 

In [None]:
class LSTM_LM(nn.Module):
    def __init__(self, glove_embeddings,hidden_layer, num_layers, dropout):
        super(LSTM_LM, self).__init__()
        self.vocab_size = glove_embeddings.shape[0]
        self.embedding_dim = glove_embeddings.shape[1]
        self.hidden_layer = hidden_layer
        self.num_layers = num_layers
        self.dropout = dropout
        self.embedding = nn.Embedding.from_pretrained(glove_embeddings)
        self.lstm = nn.LSTM(input_size=self.embedding_dim, hidden_size=hidden_layer , num_layers=self.num_layers, dropout=dropout, batch_first = True)
        self.fc = nn.Linear(hidden_layer,self.vocab_size )
        self.dropout = nn.Dropout(dropout)
       
    def forward(self, x):
        # print(x.shape)
       
        embeds = self.embedding(x) # batch_size, seq_len, embedding_dim
        # print(embeds.shape)
        
        lstm_out, hidden = self.lstm(embeds) # lstm output shape: batch_size, seq_len, hidden_layer
        lstm_out = self.dropout(lstm_out) # batch_size, seq_len, hidden_layer
        out = self.fc(lstm_out) # batch_size, seq_len, vocab_size
       
        return out, hidden
        

In [None]:
with open('../data/embeddings.pkl', 'rb') as f:
    glove_embeddings = pickle.load(f)

In [None]:
with open('../data/word_to_id.json', 'r') as f:
    word2idx = json.load(f)


In [None]:
with open('../data/lm_dataloader.pkl','rb') as f:
    loaders = pickle.load(f)

In [None]:
wandb.init(project="LSTM-anlp", name="Cross_entropy_with__300_dim",config = {
    "epochs": 50 ,
    "optimizer": "Adam",
    "batch_size": hp.BATCH_SIZE,
    "embeddings_dim":300,
    "drop_out":hp.DROPOUT,
    "loss_fn":"CEntropy"

})

model = LSTM_LM(glove_embeddings, hp.HIDDEN_LAYER, 2, hp.DROPOUT)
wandb.watch(model)
wandb.config.update({"model":model})

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = model.to(device)
criterion = nn.CrossEntropyLoss(ignore_index=0)
optimizer = optim.Adam(model.parameters(), lr=hp.LEARNING_RATE)
scheduler = lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', patience=2, factor=0.8, verbose=True)





In [None]:
for epoch in range(hp.EPOCHS):
    train_loss = 0.0
    model.train()
    for i, (x, y) in enumerate(loaders['train']):
        x = x.to(device)
        y = y.to(device)
        optimizer.zero_grad()
        out, hidden = model(x)
        loss = criterion(out.view(-1, model.vocab_size), y.view(-1))
        loss.backward()
        optimizer.step()
        train_loss += loss.item()
    print(f'Epoch: {epoch+1}, Train Loss: {train_loss/len(loaders["train"])} ; Perplexity: {np.exp(train_loss/len(loaders["train"]))}')
    wandb.log({"Train Loss": train_loss/len(loaders["train"])})
    wandb.log({"Perplexity": np.exp(train_loss/len(loaders["train"]))})
    
    model.eval()
    val_loss = 0.0
    with torch.no_grad():
        for i, (x, y) in enumerate(loaders['val']):
            x = x.to(device)
            y = y.to(device)
            out, hidden = model(x)
            loss = criterion(out.view(-1, model.vocab_size), y.view(-1))
            val_loss += loss.item()
        print(f'Epoch: {epoch+1}, Val Loss: {val_loss/len(loaders["val"])} ; Perplexity: {np.exp(val_loss/len(loaders["val"]))}')
        wandb.log({"Val Loss": val_loss/len(loaders["val"])})
        wandb.log({"Perplexity": np.exp(val_loss/len(loaders["val"]))})
        scheduler.step(val_loss/len(loaders["val"]))
        wandb.log({"Learning Rate": optimizer.param_groups[0]['lr']})
        if (epoch+1)%5==0:
            torch.save(model.state_dict(), f'../data/models/lstm_{epoch+1}.pt')
            print(f'Model Saved at epoch {epoch+1}')
           
    


Epoch: 1, Train Loss: 5.714415735273219 ; Perplexity: 303.20699839308315
Epoch: 1, Val Loss: 5.064549768207878 ; Perplexity: 158.309150213545
Epoch: 2, Train Loss: 4.922901514496631 ; Perplexity: 137.40070551074294
Epoch: 2, Val Loss: 4.6711088963691765 ; Perplexity: 106.81612479638952
Epoch: 3, Train Loss: 4.661872010749541 ; Perplexity: 105.83401925037508
Epoch: 3, Val Loss: 4.49945464986839 ; Perplexity: 89.9680538295389
Epoch: 4, Train Loss: 4.515827855575822 ; Perplexity: 91.4532447711398
Epoch: 4, Val Loss: 4.385991582807327 ; Perplexity: 80.31782551514425
Epoch: 5, Train Loss: 4.418798117241117 ; Perplexity: 82.99647335882008
Epoch: 5, Val Loss: 4.324961087561601 ; Perplexity: 75.56257247857141
Model Saved at epoch 5
Epoch: 6, Train Loss: 4.341956690684564 ; Perplexity: 76.85777920449351
Epoch: 6, Val Loss: 4.266074499547087 ; Perplexity: 71.24142774409636
Epoch: 7, Train Loss: 4.277046339344114 ; Perplexity: 72.02738106870983
Epoch: 7, Val Loss: 4.228770181832724 ; Perplexity: 