In [1]:
import torch
import torch.nn as nn
import torch.optim as optim

from torchtext.legacy import data
import spacy
import numpy as np

import time
import random
SEED = 42

random.seed(SEED)
np.random.seed(SEED)
torch.manual_seed(SEED)
torch.backends.cudnn.deterministic = True
from tqdm import tqdm

In [2]:
BOS_WORD = '<sos>'
EOS_WORD = '<eos>'
BLANK_WORD = "<blank>"
MAX_LEN = 85
TEXT = data.Field(lower = True, pad_token= BLANK_WORD,init_token= BOS_WORD, eos_token= EOS_WORD,fix_length=MAX_LEN)
POS = data.Field(unk_token = None, pad_token= BLANK_WORD,init_token= BOS_WORD, eos_token= EOS_WORD,fix_length=MAX_LEN)
NEG_SCOPE = data.Field(unk_token = None, pad_token= BLANK_WORD,init_token= BOS_WORD, eos_token= EOS_WORD,fix_length=MAX_LEN)

fields = (("Sentence", TEXT), ("POS", POS), ("Neg_Scope", NEG_SCOPE))

In [3]:
train, val, test = data.TabularDataset.splits(path='./', train='data/train.csv', validation='data/val.csv',test= 'data/test_cardboard.csv', format='csv', fields=fields, skip_header=True)

In [4]:
TEXT.build_vocab(train, val)
POS.build_vocab(train, val)
NEG_SCOPE.build_vocab(train, val)

In [5]:
len(val.examples)

99

In [6]:
class MyModel(nn.Module):
    def __init__(self, 
                 input_dim, 
                 embedding_dim, 
                 hidden_dim, 
                 output_dim1,
                 output_dim2,
                 n_layers, 
                 bidirectional, 
                 dropout, 
                 pad_idx):
        
        super().__init__()
        
        self.embedding = nn.Embedding(input_dim, embedding_dim, padding_idx = pad_idx)
        
        
        self.lstm = nn.LSTM(embedding_dim, 
                            hidden_dim, 
                            num_layers = n_layers, 
                            bidirectional = bidirectional,
                            dropout = dropout if n_layers > 1 else 0)
        self.T1 = torch.nn.Transformer(d_model=hidden_dim)
        self.T2 = torch.nn.Transformer(d_model=hidden_dim)
        
        self.fc1 = nn.Linear(hidden_dim, output_dim1)
        self.fc2 = nn.Linear(hidden_dim, output_dim2)
        
        self.dropout = nn.Dropout(dropout)
        
    def forward(self, text, y1, y2):

        #text = [sent len, batch size]
        
        #pass text through embedding layer
        embedded_X = self.dropout(self.embedding(text))
        embedded_y1 = self.dropout(self.embedding(y1))
        embedded_y2 = self.dropout(self.embedding(y2))
        
        #embedded = [sent len, batch size, emb dim]
        #pass embeddings into LSTM
        shared_output, (hidden, cell) = self.lstm(embedded_X)
#         print(shared_output.shape)
        #outputs holds the backward and forward hidden states in the final layer
        #hidden and cell are the backward and forward hidden and cell states at the final time-step
        
        #output = [sent len, batch size, hid dim * n directions]
        #hidden/cell = [n layers * n directions, batch size, hid dim]
        
        #we use our outputs to make a prediction of what the tag should be
        out1 = self.T1(shared_output, embedded_y1)
        out2 = self.T2(shared_output, embedded_y2)
        predictions_1 = self.fc1(self.dropout(out1))
        predictions_2 = self.fc2(self.dropout(out2))
        
        #predictions = [sent len, batch size, output dim]
        
        return predictions_1, predictions_2

In [7]:
INPUT_DIM = len(TEXT.vocab)
EMBEDDING_DIM = 128
HIDDEN_DIM = 128
OUTPUT_DIM1 = len(POS.vocab)
OUTPUT_DIM2 = len(NEG_SCOPE.vocab)
N_LAYERS = 2
BIDIRECTIONAL = False
DROPOUT = 0.25
PAD_IDX = TEXT.vocab.stoi[TEXT.pad_token]

model = MyModel(INPUT_DIM, 
                        EMBEDDING_DIM, 
                        HIDDEN_DIM, 
                        OUTPUT_DIM1, 
                        OUTPUT_DIM2,
                        N_LAYERS, 
                        BIDIRECTIONAL, 
                        DROPOUT, 
                        PAD_IDX)

In [8]:
def count_parameters(model):
    return sum(p.numel() for p in model.parameters() if p.requires_grad)

print(f'The model has {count_parameters(model):,} trainable parameters')

The model has 15,627,056 trainable parameters


In [9]:
optimizer = optim.Adam(model.parameters())
TAG_PAD_IDX = NEG_SCOPE.vocab.stoi[NEG_SCOPE.pad_token]
criterion = nn.CrossEntropyLoss(ignore_index = TAG_PAD_IDX)

In [10]:
def categorical_accuracy(preds, y, tag_pad_idx):
    """
    Returns accuracy per batch, i.e. if you get 8/10 right, this returns 0.8, NOT 8
    """
    max_preds = preds.argmax(dim = 1, keepdim = True) # get the index of the max probability
    non_pad_elements = (y != tag_pad_idx).nonzero()
    correct = max_preds[non_pad_elements].squeeze(1).eq(y[non_pad_elements])
    return correct.sum() / torch.FloatTensor([y[non_pad_elements].shape[0]])

In [11]:
def train_model(model, iterator, optimizer, criterion, tag_pad_idx):
    epoch_loss = 0
    epoch_acc_pos = 0
    epoch_acc_neg = 0
    model.train()
    for batch in tqdm(iterator):
        text = batch.Sentence
        pos = batch.POS
        neg_scope = batch.Neg_Scope
        optimizer.zero_grad()
        #text = [sent len, batch size]
        predictions1, predictions2 = model(text, pos, neg_scope)
        
        #predictions = [sent len, batch size, output dim]
        #tags = [sent len, batch size]
        
        predictions1 = predictions1.view(-1, predictions1.shape[-1])
        predictions2 = predictions2.view(-1, predictions2.shape[-1])
        pos = pos.view(-1)
        neg_scope = neg_scope.view(-1)
        
        #predictions = [sent len * batch size, output dim]
        #tags = [sent len * batch size]
        
        loss1 = criterion(predictions1, pos) 
        loss2 = criterion(predictions2, neg_scope)
        
        loss = loss1+loss2
                
        acc_pos = categorical_accuracy(predictions1, pos, tag_pad_idx)
        acc_neg = categorical_accuracy(predictions2, neg_scope, tag_pad_idx)
        
        loss.backward()
        
        optimizer.step()
        
        epoch_loss += loss.item()
        epoch_acc_pos += acc_pos.item()
        epoch_acc_neg += acc_neg.item()
        
    return epoch_loss / len(iterator), epoch_acc_pos / len(iterator), epoch_acc_neg / len(iterator)

In [12]:
def evaluate(model, iterator, criterion, tag_pad_idx):
    
    epoch_loss = 0
    epoch_acc_pos = 0
    epoch_acc_neg = 0
    
    model.eval()
    
    with torch.no_grad():
    
        for batch in iterator:

            text = batch.Sentence
            pos = batch.POS
            neg_scope = batch.Neg_Scope
            
            predictions1, predictions2 = model(text, pos, neg_scope)
            
            predictions1 = predictions1.view(-1, predictions1.shape[-1])
            predictions2 = predictions2.view(-1, predictions2.shape[-1])
            pos = pos.view(-1)
            neg_scope = neg_scope.view(-1)
            
            loss1 = criterion(predictions1, pos) 
            loss2 = criterion(predictions2, neg_scope)

            loss = loss1+loss2

            acc_pos = categorical_accuracy(predictions1, pos, tag_pad_idx)
            acc_neg = categorical_accuracy(predictions2, neg_scope, tag_pad_idx)

            epoch_loss += loss.item()
            epoch_acc_pos += acc_pos.item()
            epoch_acc_neg += acc_neg.item()
        
    return epoch_loss / len(iterator), epoch_acc_pos / len(iterator), epoch_acc_neg / len(iterator)

In [13]:
def epoch_time(start_time, end_time):
    elapsed_time = end_time - start_time
    elapsed_mins = int(elapsed_time / 60)
    elapsed_secs = int(elapsed_time - (elapsed_mins * 60))
    return elapsed_mins, elapsed_secs

In [14]:
BATCH_SIZE = 100

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

train_iterator, valid_iterator, test_iterator = data.BucketIterator.splits(
    (train, val, test), 
    batch_size = BATCH_SIZE,
    device = device,sort=False)

In [15]:
bat = next(iter(train_iterator))

In [17]:
from torchviz import make_dot
out1, out2 = model(bat.Sentence,bat.POS,bat.Neg_Scope)

In [18]:
make_dot(out1) 

CalledProcessError: Command '['dot', '-Kdot', '-Tsvg']' returned non-zero exit status 3221225725. [stderr: b'']

<graphviz.dot.Digraph at 0x1995be2d040>

In [155]:
N_EPOCHS = 10

best_valid_loss = float('inf')

for epoch in range(N_EPOCHS):

    start_time = time.time()
    
    train_loss, train_acc_pos, train_acc_neg = train_model(model, train_iterator, optimizer, criterion, TAG_PAD_IDX)
    valid_loss, valid_acc_pos, valid_acc_neg = evaluate(model, valid_iterator, criterion, TAG_PAD_IDX)
    
    end_time = time.time()

    epoch_mins, epoch_secs = epoch_time(start_time, end_time)
    
    if valid_loss < best_valid_loss:
        best_valid_loss = valid_loss
        torch.save(model.state_dict(), 'try-model.pt')
    
    print(f'Epoch: {epoch+1:02} | Epoch Time: {epoch_mins}m {epoch_secs}s')
    print(f'\tTrain Loss: {train_loss:.3f} | Train Acc POS: {train_acc_pos*100:.2f}% | Train Acc NEG: {train_acc_neg*100:.2f}%' )
    print(f'\t Val. Loss: {valid_loss:.3f} |  Val. Acc POS: {valid_acc_pos*100:.2f}% | Val. Acc NEG: {valid_acc_neg*100:.2f}%')

  0%|                                                                                            | 0/9 [00:31<?, ?it/s]
ERROR:root:Internal Python error in the inspect module.
Below is the traceback from this internal error.



Traceback (most recent call last):
  File "C:\Users\Harsh\anaconda3\lib\site-packages\IPython\core\interactiveshell.py", line 3418, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-155-0372cf5836eb>", line 9, in <module>
    train_loss, train_acc_pos, train_acc_neg = train_model(model, train_iterator, optimizer, criterion, TAG_PAD_IDX)
  File "<ipython-input-149-eb02b277978b>", line 33, in train_model
    loss.backward()
  File "C:\Users\Harsh\anaconda3\lib\site-packages\torch\tensor.py", line 245, in backward
    torch.autograd.backward(self, gradient, retain_graph, create_graph, inputs=inputs)
  File "C:\Users\Harsh\anaconda3\lib\site-packages\torch\autograd\__init__.py", line 145, in backward
    Variable._execution_engine.run_backward(
KeyboardInterrupt

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "C:\Users\Harsh\anaconda3\lib\site-packages\IPython\core\interactiveshell.py", l

TypeError: object of type 'NoneType' has no len()