In [1]:
# ! pip3 install torch torchvision torchaudio --extra-index-url https://download.pytorch.org/whl/cu118

In [2]:
import torch
import torch.nn as nn
from torch.utils.data import Dataset
from torch.utils.data import DataLoader
from torch.nn.utils.rnn import pad_sequence, pack_padded_sequence, pad_packed_sequence

import os
from collections import defaultdict

import numpy as np

In [3]:
device = "cuda" if torch.cuda.is_available() else "cpu"
device

'cuda'

In [4]:
def create_vocab(path, threshold=3): 
    with open(path,'r') as f:
        lines = f.readlines()

        word_count = defaultdict(int)
        tag_count = defaultdict(int)
        word_count['<pad>']=0
        word_count['<unk>']=0

        sentences = [] #list of sentences
        tagged_sentences = [] #tag of words sentences
        sentence = [] #list of words in sentence
        tagged_sentence = [] #list of tag for words in sentence

        orig_sentences = [] #to retain original sentences
        orig_sentence = [] #to retain original words

        for line in lines:
            data = line.split()          
            if data: #row is not empty
                word_count[data[1].lower()]+=1      #edit to test boolean mask
                tag_count[data[2]]+=1
        
        for line in lines:
            data = line.split()
            if not data: #row is empty
                sentences.append(sentence)
                orig_sentences.append(orig_sentence)
                tagged_sentences.append(tagged_sentence)
                sentence = []
                orig_sentence = []
                tagged_sentence = []
            else: 
                if word_count[data[1].lower()]<threshold:           #edit to test boolean mask
                    word_count['<unk>'] += word_count[data[1]]
                    del word_count[data[1]]
                sentence.append(data[1].lower())
                orig_sentence.append(data[1])
                tagged_sentence.append(data[2])
        sentences.append(sentence)
        tagged_sentences.append(tagged_sentence)
        orig_sentences.append(orig_sentence)
    return orig_sentences, sentences, tagged_sentences, word_count, tag_count

train_sentences, train_model_sentences, tagged_sentences, vocab, tag_count = create_vocab('data/train')

word_list = list(vocab.keys()) 
tag_list = list(tag_count.keys())
vocab_size = len(word_list)
char_list = ['<pad>']
char_list.extend([*"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789,;.!?:'\"/\\|_@#$%^&*~`+-=<>()[]{}"])

In [5]:
def embed_input(sentences, vocab, word_list):
    embed_inp = [[word_list.index(word) if word in vocab else word_list.index('<unk>') for word in sentence] for sentence in sentences]
    return embed_inp
    
def boolean_mask(sentences): #0 -> capital, 1 -> lower
    return [[1 if word==word.lower() else 0 for word in sentence] for sentence in sentences]

def encode_tags(tags):
    return [[tag_list.index(tag) for tag in sentence] for sentence in tags]  

def encode_char_sequence(sentences):
    maxsentlen = -np.Inf
    maxwordlen = -np.Inf
    for sentence in sentences:
        maxsentlen = max(maxsentlen, len(sentence))
        for word in sentence:
            maxwordlen = max(maxwordlen, len(word))
    encoded_chars = torch.zeros(len(sentences),maxsentlen, maxwordlen)
    for i, sentence in enumerate(sentences):
        for j, word in enumerate(sentence):
            for k, char in enumerate(word):
                encoded_chars[i][j][k] = char_list.index(char)
    return encoded_chars

In [6]:
def process_dev(path): #returns original sentences, sentences to be modelled, tags of sentences
    with open(path,'r') as f:
        lines = f.readlines()

        model_sentences = [] #list of sentences
        tagged_sentences = [] #tag of words sentences

        model_sentence = [] #list of words in sentence
        tagged_sentence = [] #list of tag for words in sentence

        orig_sentences = [] #to retain original sentences
        orig_sentence = []

        for line in lines:
            data = line.split()
            if data: #row not empty
                model_sentence.append(data[1].lower())
                tagged_sentence.append(data[2])
                orig_sentence.append(data[1])
            else:
                model_sentences.append(model_sentence)
                tagged_sentences.append(tagged_sentence)
                orig_sentences.append(orig_sentence)
                model_sentence=[]
                tagged_sentence=[]
                orig_sentence=[]
        model_sentences.append(model_sentence)
        tagged_sentences.append(tagged_sentence)
        orig_sentences.append(orig_sentence)
    return orig_sentences, model_sentences, tagged_sentences

In [7]:
def process_test(path):
    with open(path,'r') as f:
        lines = f.readlines()

        model_sentences = [] #list of sentences
        model_sentence = [] #list of words in sentence

        orig_sentences = [] #to retain original sentences
        orig_sentence = []

        for line in lines:
            data = line.split()
            if data: #row not empty
                model_sentence.append(data[1].lower())
                orig_sentence.append(data[1])
            else:
                model_sentences.append(model_sentence)
                orig_sentences.append(orig_sentence)
                model_sentence=[]
                orig_sentence=[]
        model_sentences.append(model_sentence)
        orig_sentences.append(orig_sentence)
    return orig_sentences, model_sentences

In [86]:
target_weight = [1 if tag!='O' else 0.7 for tag in tag_list]
loss_fn = nn.CrossEntropyLoss(weight=torch.tensor(target_weight, device=device),ignore_index=-1)

In [87]:
class LSTMDataset(Dataset):
    def __init__(self, sentences, tags, masks):
        self.sentences = pad_sequence([torch.tensor(sentence) for sentence in sentences], batch_first=True, padding_value=0)
        self.lengths = torch.tensor([len(sentence) for sentence in sentences])
        self.tags = pad_sequence([torch.tensor(tag) for tag in tags], batch_first=True, padding_value=-1)
        self.masks = pad_sequence([torch.tensor(mask) for mask in masks], batch_first=True, padding_value=2)
        
    def __len__(self):
        return len(self.sentences)

    def __getitem__(self, idx):
        sentence = self.sentences[idx]
        tag = self.tags[idx]
        length = self.lengths[idx]
        mask = self.masks[idx]
        return sentence, length, tag, mask

In [88]:
class LSTMTestDataset(Dataset):
    def __init__(self, sentences, masks):
        self.sentences = pad_sequence([torch.tensor(sentence) for sentence in sentences], batch_first=True, padding_value=0)
        self.lengths = torch.tensor([len(sentence) for sentence in sentences])
        self.masks = pad_sequence([torch.tensor(mask) for mask in masks], batch_first=True, padding_value=2)
        
    def __len__(self):
        return len(self.sentences)

    def __getitem__(self, idx):
        sentence = self.sentences[idx]
        length = self.lengths[idx]
        mask = self.masks[idx]
        return sentence, length, mask

In [89]:
def evaluate_LSTM(model, dataloader):
    predicted_labels = []
    valid_loss=0
    model.eval()
    with torch.no_grad():
        for batch, lens, labels, masks in dataloader:
            packed_labels = pack_padded_sequence(labels, lens, batch_first=True, enforce_sorted=False)
            labels, len_labels = pad_packed_sequence(packed_labels, batch_first=True, padding_value=-1)
            batch, labels, masks = batch.to(device), labels.to(device), masks.to(device)
            y_pred = model(batch, lens, masks)
            y_pred = torch.permute(y_pred, dims=(0,2,1))
            y_pred_class = torch.argmax(torch.log_softmax(y_pred, dim=1), dim=1)
            loss = loss_fn(y_pred, labels)
            valid_loss+= loss.item()
            predicted_labels.extend(y_pred_class.cpu().numpy().tolist())
        
        valid_loss/=len(dataloader)
    print('Validation Loss : ', valid_loss)
    return predicted_labels, valid_loss

In [90]:
def trainLSTM(model_name, model, optimizer, train_dataloader, valid_dataloader, scheduler=None, epochs=10):
    valid_loss_min = np.Inf
    for epoch in range(epochs):
        train_loss = 0

        model.train()
        for batch, lens, labels, masks in train_dataloader:
            packed_labels = pack_padded_sequence(labels, lens, batch_first=True, enforce_sorted=False)
            labels,_ = pad_packed_sequence(packed_labels, batch_first=True, padding_value=-1)
            batch, labels, masks = batch.to(device), labels.to(device), masks.to(device)
            y_pred = model(batch, lens, masks)
            y_pred = torch.permute(y_pred, (0,2,1))
            loss = loss_fn(y_pred, labels)
            train_loss += loss.item()
            optimizer.zero_grad()
            torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1)
            loss.backward()
            optimizer.step()
        
        train_loss/=len(train_dataloader)
        if isinstance(scheduler, torch.optim.lr_scheduler.ReduceLROnPlateau):
            scheduler.step(train_loss)
        elif scheduler is not None:
            scheduler.step()

        print('Epoch: {} \tTraining Loss: {:.4f} \t'.format(
        epoch+1, 
        train_loss,
        ))
        _, valid_loss = evaluate_LSTM(model, valid_dataloader)
        if valid_loss_min>valid_loss:
            print(f'Validation Loss Reduced ({valid_loss_min})--->({valid_loss}). Saving model')
            torch.save(model.state_dict(), f'{model_name}.pt')
            valid_loss_min = valid_loss

In [91]:
def get_test_preds(model, dataloader):
    predicted_labels = []
    model.eval()
    with torch.no_grad():
        for batch, lens, masks in dataloader:
            batch, masks = batch.to(device), masks.to(device)
            y_pred = model(batch, lens, masks)
            y_pred = torch.permute(y_pred, dims=(0,2,1))
            y_pred_class = torch.argmax(torch.log_softmax(y_pred, dim=1), dim=1)
            predicted_labels.extend(y_pred_class.cpu().numpy().tolist())
    return predicted_labels

In [138]:
embedded_train = embed_input(train_model_sentences, vocab, word_list)
encoded_train = encode_tags(tagged_sentences)  
train_masks = boolean_mask(train_sentences)

train_data = LSTMDataset(embedded_train, encoded_train, train_masks)
train_loader = DataLoader(train_data, batch_size=9, shuffle=True)

orig_dev_sentences, dev_model_sentences, dev_tags = process_dev('data/dev')

embedded_dev = embed_input(dev_model_sentences, vocab, word_list)
encoded_dev = encode_tags(dev_tags)
dev_masks = boolean_mask(orig_dev_sentences)

valid_data = LSTMDataset(embedded_dev, encoded_dev, dev_masks)
valid_loader = DataLoader(valid_data, batch_size=128)

orig_test_sentences, test_model_sentences = process_test('data/test')

embedded_test = embed_input(test_model_sentences, vocab, word_list)
test_masks = boolean_mask(orig_test_sentences)

test_data = LSTMTestDataset(embedded_test, test_masks)
test_loader = DataLoader(test_data, batch_size=128)

In [93]:
class BLSTM1(nn.Module):
    def __init__(self, vocab_size, cap_embed_size=10):
        super(BLSTM1, self).__init__()
        self.embedding = nn.Embedding(vocab_size, 100, padding_idx=0)
        self.cap_embedding = nn.Embedding(3, cap_embed_size, padding_idx=0)
        nn.init.uniform_(self.cap_embedding.weight, -np.sqrt(3/cap_embed_size), np.sqrt(3/cap_embed_size))
        self.lstm = nn.LSTM(input_size=100+cap_embed_size, hidden_size=256, batch_first=True, bidirectional=True)
        for name, param in self.lstm.named_parameters():
            if 'bias' in name:
                nn.init.zeros_(param)
            elif 'weight' in name:
                nn.init.uniform_(param, -np.sqrt(6/(len(param)+len(param[0]))), np.sqrt(6/(len(param)+len(param[0]))))
        self.dropout = nn.Dropout(0.33)
        self.linear = nn.Linear(512, 128)
        self.elu = nn.ELU()
        self.out = nn.Linear(128,9)

    def forward(self, x, lengths, masks):
        # print('x shape ', x.shape)
        output = self.embedding(x)
        # print('after embedding ', output.shape)
        cap_embeds = self.cap_embedding(masks)
        output = torch.cat((output, cap_embeds), 2)
        output = pack_padded_sequence(output, lengths, batch_first=True, enforce_sorted=False)
        output, _ = self.lstm(output)
        output, _ = pad_packed_sequence(output, batch_first=True,padding_value=0)
        # print('after lstm ', output.shape)
        output = self.dropout(output)
        output = self.linear(output)
        # print('after linear ', output.shape)
        output = self.elu(output)
        output = self.out(output)
        # print('after classifier ', output.shape)
        return output

blstm1 = BLSTM1(vocab_size).to(device)
optimizer = torch.optim.SGD(blstm1.parameters(), lr=0.5)
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer,'min',0.5)
# scheduler = torch.optim.lr_scheduler.CyclicLR(optimizer, 0.01, 0.1)
# scheduler = torch.optim.lr_scheduler.StepLR(optimizer,10,0.5)

trainLSTM('blstm1', blstm1, optimizer, train_loader, valid_loader, scheduler)

Epoch: 1 	Training Loss: 0.3622 	
Validation Loss :  0.22366638348570891
Validation Loss Reduced (inf)--->(0.22366638348570891). Saving model
Epoch: 2 	Training Loss: 0.1892 	
Validation Loss :  0.16978095219071423
Validation Loss Reduced (0.22366638348570891)--->(0.16978095219071423). Saving model
Epoch: 3 	Training Loss: 0.1318 	
Validation Loss :  0.15521524513938598
Validation Loss Reduced (0.16978095219071423)--->(0.15521524513938598). Saving model
Epoch: 4 	Training Loss: 0.0945 	
Validation Loss :  0.1422420060262084
Validation Loss Reduced (0.15521524513938598)--->(0.1422420060262084). Saving model
Epoch: 5 	Training Loss: 0.0693 	
Validation Loss :  0.14214840145515545
Validation Loss Reduced (0.1422420060262084)--->(0.14214840145515545). Saving model
Epoch: 6 	Training Loss: 0.0526 	
Validation Loss :  0.15663559189332382
Epoch: 7 	Training Loss: 0.0409 	
Validation Loss :  0.15785922283040627
Epoch: 8 	Training Loss: 0.0323 	
Validation Loss :  0.16699592542967626
Epoch: 9 	

In [139]:
blstmt = BLSTM1(vocab_size).to(device)
blstmt.load_state_dict(torch.load('blstm1_final.pt'))
predicted_labels, _ = evaluate_LSTM(blstm1, valid_loader)
predicted_labels1, _ = evaluate_LSTM(blstmt, valid_loader)
with open('predictions1.txt','w') as f:
    for i, sentence in enumerate(dev_model_sentences):
        for j, word in enumerate(sentence):
            f.write(f'{j+1} {word} {dev_tags[i][j]} {tag_list[predicted_labels[i][j]]}\n')
        f.write('\n')
with open('predictions2.txt', 'w') as f:
    for i, sentence in enumerate(dev_model_sentences):
        for j, word in enumerate(sentence):
            f.write(f'{j+1} {word} {dev_tags[i][j]} {tag_list[predicted_labels1[i][j]]}\n')
        f.write('\n')

Validation Loss :  0.17801679657506092
Validation Loss :  0.17583868971892766


In [95]:
blstmt = BLSTM1(vocab_size).to(device)
blstmt.load_state_dict(torch.load('blstm1.pt'))

valid_data = LSTMTestDataset(embedded_dev, dev_masks)
valid_loader = DataLoader(valid_data, batch_size=128)

predicted_labels = get_test_preds(blstmt, valid_loader)
with open('dev1.out', 'w') as f:
    for i, sentence in enumerate(orig_dev_sentences):
        for j, word in enumerate(sentence):
            f.write(f'{j+1} {word} {tag_list[predicted_labels[i][j]]}\n')
        f.write('\n')

orig_test_sentences, test_model_sentences = process_test('data/test')

embedded_test = embed_input(test_model_sentences, vocab, word_list)
test_masks = boolean_mask(orig_test_sentences)

test_data = LSTMTestDataset(embedded_test, test_masks)
test_loader = DataLoader(test_data, batch_size=128)

predicted_labels = get_test_preds(blstmt, test_loader)
with open('test1.out','w') as f:
    for i, sentence in enumerate(orig_test_sentences):
        for j, word in enumerate(sentence):
            f.write(f'{j+1} {word} {tag_list[predicted_labels[i][j]]}\n')
        f.write('\n')

In [121]:
def create_glove_vocab(vocab):
    with open('glove.6B.100d/glove.6B.100d.txt','r',encoding='utf-8') as f:
        lines=f.readlines()
        glove_vocab = defaultdict(list)
        glove_vocab['<pad>']=np.zeros(100)
        glove_vocab['<unk>']=np.random.uniform(-1,1,100)
        for line in lines:
            word, *embedding = line.split()
            if word in vocab:
                glove_vocab[word] = np.array(embedding).astype(np.double)
        for w in list(set(vocab.keys())-set(glove_vocab.keys())):
            glove_vocab[w] = np.random.uniform(-1,1,100)
        return glove_vocab

In [124]:
glove_vocab = create_glove_vocab(vocab)
glove_embeddings = list(glove_vocab.values())
glove_list = list(glove_vocab.keys())


In [146]:
embedded_train = embed_input(train_model_sentences, glove_vocab, glove_list)
train_masks = boolean_mask(train_model_sentences)
encoded_train_tags = encode_tags(tagged_sentences)

train_data = LSTMDataset(embedded_train, encoded_train_tags, train_masks)
train_loader = DataLoader(train_data, batch_size=9, shuffle=True)

orig_dev_sentences, dev_model_sentences, dev_tags = process_dev('data/dev')
embedded_dev = embed_input(dev_model_sentences, glove_vocab, glove_list)
dev_masks = boolean_mask(dev_model_sentences)
encoded_dev = encode_tags(dev_tags)

valid_data = LSTMDataset(embedded_dev, encoded_dev, dev_masks)
valid_loader = DataLoader(valid_data, batch_size=128)

orig_test_sentences, test_model_sentences = process_test('data/test')
embedded_test = embed_input(test_model_sentences, glove_vocab, glove_list)
test_masks = boolean_mask(orig_test_sentences)

test_data = LSTMTestDataset(embedded_test, test_masks)
test_loader = DataLoader(test_data, batch_size=128)

In [125]:
class BLSTM2(nn.Module):
    def __init__(self, vocab_size, embeddings, cap_embed_size=10):
        super(BLSTM2, self).__init__()
        self.embedding = nn.Embedding(vocab_size, 100, padding_idx=0)
        self.embedding.weight = nn.Parameter(torch.tensor(embeddings, dtype=torch.float))
        self.cap_embedding = nn.Embedding(3, cap_embed_size, padding_idx=2)
        nn.init.uniform_(self.cap_embedding.weight, -np.sqrt(3/cap_embed_size), np.sqrt(3/cap_embed_size))
        self.lstm = nn.LSTM(100+cap_embed_size, hidden_size=256, batch_first=True, bidirectional=True)
        for name, param in self.lstm.named_parameters():
            if 'bias' in name:
                nn.init.zeros_(param)
            elif 'weight' in name:
                nn.init.uniform_(param, -np.sqrt(6/(len(param)+len(param[0]))), np.sqrt(6/(len(param)+len(param[0]))))
        self.dropout = nn.Dropout(0.33)
        self.linear = nn.Linear(512, 128)
        self.elu = nn.ELU()
        self.out = nn.Linear(128,9)

    def forward(self, x, lengths, masks):
        # print('x shape ', x.shape)
        output = self.embedding(x)
        # print('after embedding ', output.shape)
        cap_embeds = self.cap_embedding(masks)
        # print('mask shape ', cap_embeds.shape)
        output = torch.cat((output, cap_embeds), 2)
        # print('concatenated shape ', output.shape)
        output = pack_padded_sequence(output, lengths, batch_first=True, enforce_sorted=False)
        output, _ = self.lstm(output)
        output, _ = pad_packed_sequence(output, batch_first=True,padding_value=0)
        # print('after lstm ', output.shape)
        output = self.dropout(output)
        output = self.linear(output)
        # print('after linear ', output.shape)
        output = self.elu(output)
        output = self.out(output)
        # print('after classifier ', output.shape)
        return output
    
blstm2 = BLSTM2(len(glove_list), glove_embeddings).to(device)
optimizer2 = torch.optim.SGD(blstm2.parameters(), lr=0.5)
scheduler2 = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer2,'min',0.5)
# scheduler2 = torch.optim.lr_scheduler.CyclicLR(optimizer2, 0.01, 0.1)
# scheduler2 = torch.optim.lr_scheduler.StepLR(optimizer,5,0.5)
        
trainLSTM('blstm2', blstm2, optimizer2, train_loader, valid_loader, scheduler=scheduler2)

In [147]:
blstmt = BLSTM2(len(vocab), glove_embeddings).to(device)
blstmt.load_state_dict(torch.load('blstm2.pt'))

pred_labels1, _ = evaluate_LSTM(blstmt, valid_loader)
with open('predictions3.txt','w') as f:
    for i, sentence in enumerate(dev_model_sentences):
        for j, word in enumerate(sentence):
            f.write(f'{j+1} {word} {dev_tags[i][j]} {tag_list[pred_labels1[i][j]]}\n')
        f.write('\n')

Validation Loss :  2.0651948153972626


In [113]:
blstmt = BLSTM2(len(glove_list), glove_embeddings).to(device)
blstmt.load_state_dict(torch.load('blstm2.pt'))

valid_data = LSTMTestDataset(embedded_dev, dev_masks)
valid_loader = DataLoader(valid_data, batch_size=128)

predicted_labels = get_test_preds(blstmt, valid_loader)
with open('dev2.out', 'w') as f:
    for i, sentence in enumerate(orig_dev_sentences):
        for j, word in enumerate(sentence):
            f.write(f'{j+1} {word} {tag_list[predicted_labels[i][j]]}\n')
        f.write('\n')

predicted_labels = get_test_preds(blstmt, test_loader)
with open('test2.out','w') as f:
    for i, sentence in enumerate(orig_test_sentences):
        for j, word in enumerate(sentence):
            f.write(f'{j+1} {word} {tag_list[predicted_labels[i][j]]}\n')
        f.write('\n')

In [114]:
# class LSTMCNNDataset(Dataset):
#     def __init__(self, sentences, tags, masks, char_seq):
#         self.sentences = pad_sequence([torch.tensor(sentence) for sentence in sentences], batch_first=True, padding_value=0)
#         self.lengths = torch.tensor([len(sentence) for sentence in sentences])
#         self.tags = pad_sequence([torch.tensor(tag) for tag in tags], batch_first=True, padding_value=-1)
#         self.masks = pad_sequence([torch.tensor(mask) for mask in masks], batch_first=True, padding_value=2)
#         self.char_seqs = char_seq
#         # print(self.sentences.shape)
#         # print(self.tags.shape)
#         # print(self.masks.shape)
#         # print(self.lengths.shape)
        
#     def __len__(self):
#         return len(self.sentences)

#     def __getitem__(self, idx):
#         sentence = self.sentences[idx]
#         tag = self.tags[idx]
#         length = self.lengths[idx]
#         mask = self.masks[idx]
#         char_seq = self.char_seqs[idx]
#         return sentence, length, tag, mask, char_seq

In [115]:
# def evaluate_LSTMCNN(model, dataloader):
#     predicted_labels = []
#     valid_loss=0
#     model.eval()
#     with torch.no_grad():
#         for batch, lens, labels, masks, char_seq in dataloader:
#             packed_labels = pack_padded_sequence(labels, lens, batch_first=True, enforce_sorted=False)
#             labels, len_labels = pad_packed_sequence(packed_labels, batch_first=True, padding_value=-1)
#             batch, labels, masks = batch.to(device), labels.to(device), masks.to(device)
#             y_pred = model(batch, lens, masks, char_seq)
#             y_pred = torch.permute(y_pred, dims=(0,2,1))
#             y_pred_class = torch.argmax(torch.log_softmax(y_pred, dim=1), dim=1)
#             loss = loss_fn(y_pred, labels)
#             valid_loss+= loss.item()
#             predicted_labels.extend(y_pred_class.cpu().numpy().tolist())
        
#         valid_loss/=len(dataloader)
#     print('Validation Loss : ', valid_loss)
#     return predicted_labels, valid_loss

In [116]:
# def trainLSTMCNN(model, optimizer, train_dataloader, valid_dataloader, scheduler=None, epochs=10):
#     valid_loss_min = np.Inf
#     for epoch in range(epochs):
#         train_loss = 0

#         model.train()
#         for batch, lens, labels, masks, char_seq in train_dataloader:
#             packed_labels = pack_padded_sequence(labels, lens, batch_first=True, enforce_sorted=False)
#             labels,_ = pad_packed_sequence(packed_labels, batch_first=True, padding_value=-1)
#             batch, labels, masks = batch.to(device), labels.to(device), masks.to(device)
#             y_pred = model(batch, lens, masks, char_seq)
#             y_pred = torch.permute(y_pred, (0,2,1))
#             loss = loss_fn(y_pred, labels)
#             train_loss += loss.item()
#             optimizer.zero_grad()
#             torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1)
#             loss.backward()
#             optimizer.step()
        
#         train_loss/=len(train_dataloader)
#         if isinstance(scheduler, torch.optim.lr_scheduler.ReduceLROnPlateau):
#             scheduler.step(train_loss)
#         elif scheduler is not None:
#             scheduler.step()

#         print('Epoch: {} \tTraining Loss: {:.4f} \t'.format(
#         epoch+1, 
#         train_loss,
#         ))
#         _, valid_loss = evaluate_LSTMCNN(model, valid_dataloader)
#         if valid_loss_min>valid_loss:
#             print(f'Validation Loss Reduced ({valid_loss_min})--->({valid_loss}). Saving model')
#             torch.save(model.state_dict(), 'blstmcnn.pt')
#             valid_loss_min = valid_loss

In [117]:
# embedded_train = embed_input(train_model_sentences, glove_vocab, glove_list)
# train_masks = boolean_mask(train_model_sentences)
# encoded_train_tags = encode_tags(tagged_sentences)
# train_encoded_chars = encode_char_sequence(train_sentences)

# # print(len(embedded_train[0]), len(train_masks[0]), len(encoded_train_tags[0]))

# train_data = LSTMCNNDataset(embedded_train, encoded_train_tags, train_masks, train_encoded_chars)
# train_loader = DataLoader(train_data, batch_size=9, shuffle=True)

# orig_dev_sentences, dev_model_sentences, dev_tags = process_dev('data/dev')
# embedded_dev = embed_input(dev_model_sentences, glove_vocab, glove_list)
# dev_masks = boolean_mask(dev_model_sentences)
# encoded_dev = encode_tags(dev_tags)
# dev_encoded_chars = encode_char_sequence(orig_dev_sentences)

# valid_data = LSTMCNNDataset(embedded_dev, encoded_dev, dev_masks, dev_encoded_chars)
# valid_loader = DataLoader(valid_data, batch_size=128)

In [118]:
# class BLSTMCNN(nn.Module):
#     def __init__(self, vocab_size, embeddings, cap_embed_dims=10, cnn_out=53, char_embed_dims=30):
#         super(BLSTMCNN, self).__init__()
#         self.char_embeds = nn.Embedding(len(char_list), char_embed_dims)
#         nn.init.uniform_(self.char_embeds.weight, -np.sqrt(3/char_embed_dims),np.sqrt(3/char_embed_dims))
#         self.cnn = nn.Sequential(
#             nn.Conv1d(1, cnn_out, kernel_size=3, padding=1),
#             nn.ReLU(),
#             nn.Dropout(0.2),
#             nn.MaxPool1d(3)
#         )
#         self.embedding = nn.Embedding(vocab_size, 100, padding_idx=0)
#         self.embedding.weight = nn.Parameter(torch.tensor(embeddings, dtype=torch.float))
#         self.cap_embedding = nn.Embedding(3, cap_embed_dims, padding_idx=2)
#         nn.init.uniform_(self.cap_embedding.weight, -np.sqrt(3/cap_embed_dims),np.sqrt(3/cap_embed_dims))
#         self.lstm = nn.LSTM(100+cap_embed_dims+cnn_out, 256, batch_first=True, bidirectional=True)
#         for name, param in self.lstm.named_parameters():
#             if 'bias' in name:
#                 nn.init.zeros_(param)
#             elif 'weight' in name:
#                 nn.init.uniform_(param, -np.sqrt(6/(len(param)+len(param[0]))), np.sqrt(6/(len(param)+len(param[0]))))
#         self.dropout = nn.Dropout(0.33)
#         self.linear = nn.Linear(512, 128)
#         self.elu = nn.ELU()
#         self.out = nn.Linear(128,9)

#     def forward(self, x, lengths, masks, char_seq):
#         # print('x shape ', x.shape)
#         output = self.embedding(x)
#         # print('after embedding ', output.shape)
#         # print('masks shape ', masks.shape)
#         # print(masks)
#         cap_embeds = self.cap_embedding(masks)
#         # print('cap embed shape ', cap_embeds.shape)
#         char_embeds = self.char_embeds(char_seq)
#         char_cnn = self.cnn(char_embeds)
#         # print('char embed shape ', char_embed.shape)
#         output = torch.cat((output, cap_embeds, char_cnn), 2)
#         print('concatenated shape ', output.shape)
#         output = pack_padded_sequence(output, lengths, batch_first=True, enforce_sorted=False)
#         output, _ = self.lstm(output)
#         output, _ = pad_packed_sequence(output, batch_first=True,padding_value=0)
#         # print('after lstm ', output.shape)
#         output = self.dropout(output)
#         output = self.linear(output)
#         # print('after linear ', output.shape)
#         output = self.elu(output)
#         output = self.out(output)
#         # print('after classifier ', output.shape)
#         return output
    

# lstmcnn = BLSTMCNN(len(glove_list), glove_embeddings).to(device)
# optimizer3 = torch.optim.SGD(lstmcnn.parameters(), lr=0.5)
# scheduler3 = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer3,'min',0.5)
# # scheduler2 = torch.optim.lr_scheduler.CyclicLR(optimizer2, 0.01, 0.1)
# # scheduler2 = torch.optim.lr_scheduler.StepLR(optimizer,5,0.5)
        
# trainLSTMCNN(lstmcnn, optimizer3, train_loader, valid_loader, scheduler3)

In [119]:
# lstmcnn.load_state_dict(torch.load('blstmcnn.pt'))
# pred_labels, _ = evaluate_LSTMCNN(lstmcnn, valid_loader)
# with open('predictions3.txt','w') as f:
#     for i, sentence in enumerate(dev_model_sentences):
#         for j, word in enumerate(sentence):
#             f.write(f'{j+1} {word} {dev_tags[i][j]} {tag_list[pred_labels[i][j]]}\n')
#         f.write('\n')