In [1]:
import torch
import gensim
import torch
import torch.autograd as autograd
import torch.nn as nn
import torch.optim as optim
import math
import pickle
import time
from sklearn.metrics import f1_score
from allennlp.common.file_utils import cached_path
from allennlp.modules.elmo import Elmo, batch_to_ids
%load_ext tensorboard
from torch.utils.tensorboard import SummaryWriter

# Importing from my files
from entity import argmax, log_sum_exp, BiLSTM_CRF
from dataloader import get_data
from intent import IntentBiLSTM, get_predicted_intent, get_ICA
from shared import SharedBiLSTM

In [2]:
writer = SummaryWriter('runs/word2vecmodel')
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [3]:
train_loader, val_loader, test_loader, int_dict, entity_dict, n_intents = get_data()

In [4]:
START_TAG = "<START>"
STOP_TAG = "<STOP>"
entity_dict[START_TAG] = list(entity_dict.values())[-1]+1
entity_dict[STOP_TAG] = list(entity_dict.values())[-1]+1
tag_to_ix = entity_dict

In [5]:
EMBEDDING_DIM = 256
HIDDEN_DIM = 256
N_INTENTS = n_intents
EPOCHS = 10
val_samples =  len(val_loader)
n_samples = len(train_loader)
batch_size = 1
n_iterations = math.ceil(n_samples/batch_size)

In [6]:
class MultiTaskWord2vec(nn.Module):
    def __init__(self, embedding_size, SharedModel, IntentModel, EntityModel):
        super().__init__()
        self.SharedModel = SharedModel
        self.IntentModel = IntentModel
        self.EntityModel = EntityModel 
        
    def forward(self, x, entities=None, Train=False):
        x = self.SharedModel(x)
        x_i = self.IntentModel(x)  # This will return intent
        if(Train):
            x_e = self.EntityModel.neg_log_likelihood(x, entities)  # This will return entity loss
        else:
            x_e = self.EntityModel(x) # Returns score and tag sequence
        return x_i, x_e

In [7]:
# Functions for the embedding model

def get_embedding_model():
    weights_path = cached_path("https://s3-us-west-2.amazonaws.com/allennlp/models/elmo/2x1024_128_2048cnn_1xhighway/elmo_2x1024_128_2048cnn_1xhighway_weights.hdf5")
    options_path = cached_path("https://s3-us-west-2.amazonaws.com/allennlp/models/elmo/2x1024_128_2048cnn_1xhighway/elmo_2x1024_128_2048cnn_1xhighway_options.json")
    elmo = Elmo(options_path, weights_path, num_output_representations=1, dropout=0) # set requires_grad=True if we want to finetune
    return elmo

def get_elmo_embedding(sentence):
    sentence = [sentence]  # batch size = 1
    character_ids = batch_to_ids(sentence)
    embeddings = EmbeddingModel(character_ids)
    return embeddings['elmo_representations'][0][0]

In [8]:
# Implementing checkpoints
def save_checkpoint_best(epoch, model):
    print("Saving best model")
    PATH = "/workspace/data/Dhruv/pytorch/BestModel/best_model_"+str(epoch)+".pt"
    torch.save(model.state_dict(), PATH)

def save_checkpoint(epoch, model, optimizer):  # Saving model in a way so we can load and start training again
    PATH = "C:/Users/dhruv/Desktop/ASP sem 1/Capstone/Models/FinalWord2vecModels/model_"+str(epoch)+".pt"
    print("Saving model")
    torch.save({
            'epoch': epoch,
            'model_state_dict': model.state_dict(),
            'optimizer_state_dict': optimizer.state_dict()
            }, PATH)

In [9]:
EntityModel = BiLSTM_CRF(tag_to_ix, EMBEDDING_DIM, HIDDEN_DIM).to(device)
IntentModel = IntentBiLSTM(N_INTENTS, EMBEDDING_DIM, HIDDEN_DIM, batch_size).to(device)
EmbeddingModel = get_embedding_model() # Make this non trainable
SharedModel = SharedBiLSTM(EMBEDDING_DIM, HIDDEN_DIM, batch_size).to(device)
model = MultiTaskWord2vec(EMBEDDING_DIM, SharedModel, IntentModel, EntityModel).to(device)

In [10]:
optimizer = torch.optim.Adam(model.parameters(), lr=0.0005)
tr_loss_log = []

In [11]:
'''
example = iter(val_loader)
example_q, example_s, example_i = example.next()
example_q = torch.from_numpy(EmbeddingModel.wv[example_q]) # Here assuming sentence is already tokenized
example_q = torch.unsqueeze(example_q, 1).to(device)
example_s = torch.tensor([tag_to_ix[t] for t in example_s], dtype=torch.long)
writer.add_graph(model, (example_q.to(device), example_i.to(device), example_s.to(device)))
writer.close()
'''

'\nexample = iter(val_loader)\nexample_q, example_s, example_i = example.next()\nexample_q = torch.from_numpy(EmbeddingModel.wv[example_q]) # Here assuming sentence is already tokenized\nexample_q = torch.unsqueeze(example_q, 1).to(device)\nexample_s = torch.tensor([tag_to_ix[t] for t in example_s], dtype=torch.long)\nwriter.add_graph(model, (example_q.to(device), example_i.to(device), example_s.to(device)))\nwriter.close()\n'

In [14]:
# Training Loop
def train_model():
    
    least_val_loss = math.inf
    
    for epoch in range(EPOCHS):
        
        beg_time = time.time() #To calculate time taken for each epoch
        train_loss = 0.0
        entity_score = 0.0
        entity_f1 = 0.0
        intent_ICA = 0.0
        
        for i, (sentence, tags, intent) in enumerate(train_loader):
            optimizer.zero_grad()
            sentence_in = get_elmo_embedding(sentence)   # Here assuming sentence is already tokenized
            sentence_in = torch.unsqueeze(sentence_in, 1).to(device) 
            targets = torch.tensor([tag_to_ix[t] for t in tags], dtype=torch.long).to(device)
            intent = intent.to(device)
            # Forward pass
            pred_i, loss_e = model(sentence_in, entities=targets, Train=True)
            CrossEntropyIntentLoss = torch.sum(-intent * torch.log(pred_i[0]))  # Cross entropy loss
            loss = CrossEntropyIntentLoss + loss_e/10 # Weighed equally
            loss.backward()
            # Update gradients
            optimizer.step()
            # Get training loss
            train_loss += loss.item()
        tr_loss_log.append(train_loss)
        
        model.eval()
        with torch.no_grad():
            for i, (sentence, tags, intent) in enumerate(val_loader):
                optimizer.zero_grad()
                sentence_in = get_elmo_embedding(sentence) # Here assuming sentence is already tokenized
                sentence_in = torch.unsqueeze(sentence_in, 1).to(device) 
                targets = list([tag_to_ix[t] for t in tags])
                intent = intent.float().to(device)
                pred_i, (entity_score_1, pred_entities) = model(sentence_in)
                intent_ICA_1 = get_ICA(pred_i[0], intent) # This will give us the intent classification accuracy
                entity_f1_1 = f1_score(targets, pred_entities, average='micro')
                entity_score += entity_score_1
                entity_f1 += entity_f1_1
                intent_ICA += intent_ICA_1
        model.train()
      
        # Saving checkpoints
        save_checkpoint(epoch+1, model, optimizer)
        '''
        if(val_loss < least_val_loss):
            save_checkpoint_best(epoch+1, model)
            least_val_loss = val_loss
        '''
          
        end_time = time.time()
        print('Epoch: {:.0f}/{:.0f}, Time: {:.0f}m {:.0f}s, Train_Loss: {:.4f}, Val_ICA: {:.4f}, Val_entity_score: {:.4f}, Val_entity_F1_score: {:.4f}'.format(
            epoch+1, EPOCHS, (end_time-beg_time)//60, (end_time-beg_time)%60, train_loss, intent_ICA/val_samples, entity_score/val_samples, entity_f1/val_samples))
        writer.add_scalar('Training_loss', train_loss, (epoch+1))
        writer.add_scalar('Val_ICA', intent_ICA/val_samples, (epoch+1))
        writer.add_scalar('Val_entity_F1_score', entity_f1/val_samples, (epoch+1))
        writer.close()

In [None]:
train_model()

In [None]:
%tensorboard --logdir=runs/

# Testing the model

In [None]:
checkpoint = torch.load("C:/Users/dhruv/Desktop/ASP sem 1/Capstone/Models/FinalElmoNoFTModels/model_8.pt")
model.load_state_dict(checkpoint['model_state_dict'])
model.eval()

In [11]:
test_samples =  len(test_loader)
entity_score = 0.0
entity_f1 = 0.0
intent_ICA = 0.0
with torch.no_grad():
    for i, (sentence, tags, intent) in enumerate(test_loader):
        sentence_in = get_elmo_embedding(sentence) # Here assuming sentence is already tokenized
        sentence_in = torch.unsqueeze(sentence_in, 1).to(device) 
        targets = list([tag_to_ix[t] for t in tags])
        intent = intent.float().to(device)
        pred_i, (entity_score_1, pred_entities) = model(sentence_in)
        intent_ICA_1 = get_ICA(pred_i[0], intent) # This will give us the intent classification accuracy
        entity_f1_1 = f1_score(targets, pred_entities, average='micro')
        entity_score += entity_score_1
        entity_f1 += entity_f1_1
        intent_ICA += intent_ICA_1
        
print('Test_ICA: {:.4f}, Test_entity_score: {:.4f}, Test_entity_F1_score: {:.4f}'.format(
    intent_ICA/test_samples, entity_score/test_samples, entity_f1/test_samples))

Test_ICA: 0.9586, Test_entity_score: 111.2955, Test_entity_F1_score: 0.9725
