In [0]:
import torch
import torch.nn as nn
from torch import optim
import torch.nn.functional as F
import csv
import re
import os
import numpy as np

In [2]:
CUDA = torch.cuda.is_available()
device = torch.device("cuda" if CUDA else "cpu")
print(device)

cuda


In [3]:
%cd 'drive/My Drive/Colab/Maths'

/content/drive/My Drive/Colab/Maths


In [0]:
with open('train.csv', newline='') as f:
    reader = csv.reader(f)
    train_data = list(reader)

with open('test.csv', newline='') as f:
    reader = csv.reader(f)
    test_data = list(reader)

train_data = train_data[1:]
test_data = test_data[1:]
test_data = [row[0] for row in test_data]

In [25]:
train_data[0]

['add 45 and 71', '45', '71', 'x+y', '116']

In [0]:
# Creating question/answer pairs with question being input statement and answer as n1 n2 equation
qa_pairs = []
for row in train_data:
    ques = row[0]
    ans = str(row[1]) + " " + str(row[2]) + " " + str(row[3])
    qa_pairs.append([ques, ans])

In [37]:
qa_pairs[0]

['add 45 and 71', '45 71 x+y']

In [0]:
# Word Processing and Vocabulary

PAD_TOKEN = 0
SOS_TOKEN = 1
EOS_TOKEN = 2

class Vocabulary():
    def __init__(self):
        self.word2index = {}
        self.word2count = {}
        self.index2word = {PAD_TOKEN: "PAD", SOS_TOKEN: "SOS", EOS_TOKEN: "EOS"}
        self.numwords = 3

    def addSentence(self, sentence):
        for word in sentence.split(' '):
            self.addWord(word)

    def addWord(self, word):
        if word not in self.word2index:
            self.word2index[word] = self.numwords
            self.word2count[word] = 1
            self.index2word[self.numwords] = word
            self.numwords += 1
        else:
            self.word2count[word] += 1


In [39]:
# Add the question/answer pairs to the vocabulary

vocab = Vocabulary()

for pair in qa_pairs:
    vocab.addSentence(pair[0])
    vocab.addSentence(pair[1])

print("Unique word count = {}".format(vocab.numwords))

Unique word count = 120


In [0]:
# Encode a sentence into a list of integers

def indexesFromSentence(vocab, sentence):
    return [vocab.word2index[word] for word in sentence.split(' ')] + [EOS_TOKEN]

In [44]:
print(qa_pairs[1][0])
indexesFromSentence(vocab, qa_pairs[1][0])

add 53 and 34


[3, 8, 5, 9, 2]

In [0]:
# Padding the sentences for equal length
def zeroPadding(l, fillvalue = 0):
    return list(itertools.zip_longest(*l, fillvalue=fillvalue))

In [50]:
# Defining a sample for testing
inp = []
out = []
for pair in qa_pairs[:10]:
    inp.append(pair[0])
    out.append(pair[1])
indexes = [indexesFromSentence(vocab, sentence) for sentence in inp]

print(inp)
print(indexes)

['add 45 and 71', 'add 53 and 34', 'add 22 and 35', 'add 38 and 97', 'add 87 and 71', 'add 31 and 81', 'add 40 and 97', 'add 8 and 40', 'add 28 and 49', 'add 27 and 26']
[[3, 4, 5, 6, 2], [3, 8, 5, 9, 2], [3, 10, 5, 11, 2], [3, 12, 5, 13, 2], [3, 14, 5, 6, 2], [3, 15, 5, 16, 2], [3, 17, 5, 13, 2], [3, 18, 5, 17, 2], [3, 19, 5, 20, 2], [3, 21, 5, 22, 2]]


In [52]:
# Each sentence is read downwards
test_result = zeroPadding(indexes)
test_result

[(3, 3, 3, 3, 3, 3, 3, 3, 3, 3),
 (4, 8, 10, 12, 14, 15, 17, 18, 19, 21),
 (5, 5, 5, 5, 5, 5, 5, 5, 5, 5),
 (6, 9, 11, 13, 6, 16, 13, 17, 20, 22),
 (2, 2, 2, 2, 2, 2, 2, 2, 2, 2)]

In [0]:
# To create a mask later
def binaryMatrix(l, value=0):
    m = []
    for i,seq in enumerate(l): # l is a list of lists just like above
        m.append([])
        for token in seq:
            if token == PAD_TOKEN:
                m[i].append(0)
            else:
                m[i].append(1)
    return m

In [54]:
binaryResult = binaryMatrix(test_result)
binaryResult

[[1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
 [1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
 [1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
 [1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
 [1, 1, 1, 1, 1, 1, 1, 1, 1, 1]]

In [0]:
# Functions to make entire data suitable for the net 
# now that the mini functions are tested

def inputVar(l, vocab):
    indexes_batch = [indexesFromSentence(vocab, sentence) for sentence in l]
    lengths = torch.tensor([len(indexes) for indexes in indexes_batch])
    padList = zeroPadding(indexes_batch)
    padVar = torch.LongTensor(padList)
    return padVar, lengths

def outputVar(l, vocab):
    indexes_batch = [indexesFromSentence(vocab, sentence) for sentence in l]
    max_target_len = max([len(indexes) for indexes in indexes_batch])
    padList = zeroPadding(indexes_batch)
    mask = binaryMatrix(padList)
    mask = torch.BoolTensor(mask)
    padVar = torch.LongTensor(padList)
    return padVar, mask, max_target_len


In [0]:
def batch2TrainData(vocab, pair_batch):
    # Sort by QUESTION LENGTH in DESCENDING order
    pair_batch.sort(key=lambda x: len(x[0].split(' ')), reverse=True)
    input_batch, output_batch = [], []
    for pair in pair_batch:
        input_batch.append(pair[0])
        output_batch.append(pair[1])
    inp, lengths = inputVar(input_batch, vocab)
    out, mask, max_target_len = outputVar(output_batch, vocab)
    return inp, lengths, out, mask, max_target_len

In [58]:
# Example 
small_batch_size = 5
batches = batch2TrainData(vocab, [random.choice(qa_pairs) for _ in range(small_batch_size)])
input_variable, lengths, target_variable, mask, max_target_len = batches

print("Input Variable:")
print(input_variable)
print("Lengths of each sentence:")
print(lengths)
print("Target Variable:")
print(target_variable)
print("Mask:")
print(mask)
print("Max target length : ", max_target_len)

Input Variable:
tensor([[105, 110, 105, 112,  42],
        [ 84,  29,  54,  77, 116],
        [106, 106, 106, 113, 117],
        [ 19,  77,  95,  55,   2],
        [  2,   2,   2,   2,   0]])
Lengths of each sentence:
tensor([5, 5, 5, 5, 4])
Target Variable:
tensor([[ 84,  29,  54,  77,  42],
        [ 19,  77,  95,  55,  92],
        [  7, 111,   7, 114, 118],
        [  2,   2,   2,   2,   2]])
Mask:
tensor([[True, True, True, True, True],
        [True, True, True, True, True],
        [True, True, True, True, True],
        [True, True, True, True, True]])
Max target length :  4


In [0]:
######################################################### DEFINING THE MODEL ##################################################################

In [0]:
class Encoder(nn.Module):
    def __init__(self, hidden_size, embedding, n_layers=1, dropout=0):
        super(Encoder, self).__init__()
        self.n_layers = n_layers
        self.hidden_size = hidden_size
        self.embedding = embedding
        self.gru = nn.GRU(hidden_size, hidden_size, n_layers, dropout=(0 if n_layers==1 else dropout), bidirectional=True)

    def forward(self, input_seq, input_lengths, hidden=None):

        embedded = self.embedding(input_seq)

        packed = torch.nn.utils.rnn.pack_padded_sequence(embedded, input_lengths) 

        outputs, hidden = self.gru(packed, hidden)

        outputs, _ = torch.nn.utils.rnn.pad_packed_sequence(outputs)

        outputs = outputs[:,:,:self.hidden_size] + outputs[:,:,self.hidden_size:]

        return outputs, hidden

In [0]:
class Attention(nn.Module):
    def __init__(self, method, hidden_size):
        super(Attention, self).__init__()
        
        self.method = method
        self.hidden_size = hidden_size

    def dot_score(self, hidden, encoder_output): # Hidden is the hidden state from the decoder
            return torch.sum(hidden*encoder_output, dim=2)

    def forward(self, hidden, encoder_output):

        attention_energies = self.dot_score(hidden, encoder_output) # Max_length x batch_size
        attention_energies = attention_energies.t() # Transpose

        return F.softmax(attention_energies, dim=1).unsqueeze(1)

In [0]:
class Decoder(nn.Module):
    def __init__(self, attention_model, embedding, hidden_size, output_size, n_layers=1, dropout=0.1):
        super(Decoder, self).__init__()
        self.attention_model = attention_model
        self.hidden_size = hidden_size
        self.output_size = output_size
        self.n_layers = n_layers
        self.dropout = dropout
        self.embedding = embedding
        self.embedding_dropout = nn.Dropout(dropout)
        self.gru = nn.GRU(hidden_size, hidden_size, n_layers, dropout=(0 if n_layers==1 else dropout))
        self.concat = nn.Linear(hidden_size*2, hidden_size)
        self.out = nn.Linear(hidden_size, output_size)

        self.attention = Attention(attention_model, hidden_size)
    
    def forward(self, input_step, last_hidden, encoder_output):
        # Input step = (1, batch_size), cause one row of words (one batch) picked up from the array of sentence length x batch size
        # Last hidden is the final hidden state of the encoder GRU (n_layers x directions, batch size, hidden size)
        # encoder output is the output of the encoder(full memory) (sentence len, batch size, directions x hidden size)
        # We run this one step (one batch of words) at a time
        
        embedded = self.embedding(input_step)
        embedded = self.embedding_dropout(embedded)

        rnn_output, hidden = self.gru(embedded, last_hidden) # RNN_Output = (1, batch size, hidden size x directions)
                                                             # Hidden state = (n_layers x directions, batch size, hidden size)

        # Attention forward function returns softmax in the form (batch size, 1, max length)
        attention_weights = self.attention(rnn_output, encoder_output)

        # For the context vector, or what to focus on vector, we multiply the attention with the encoder output
        # Attention (batch size, 1, max length) x Encoder output transpose (batch size, max length, hidden size) = (batch size, 1, hidden size)
        context = attention_weights.bmm(encoder_output.transpose(0,1))

        # Concatenate context with GRU output
        rnn_output = rnn_output.squeeze(0) # Remove the 1 from that 3-D tensor to make it 2-D
        context = context.squeeze(1) # Both of these are now batch size x hidden size 2-D tensors
        concat_input = torch.cat((rnn_output, context),1) # Concatenate along columns, so new size = (batch size, hidden size x 2)
        concat_output = torch.tanh(self.concat(concat_input)) # Pass the concat through a linear layer

        output = self.out(concat_output) # Size now is batch size x vocab size
        output = F.softmax(output, dim=1) # Each batch row contains the probabilities of all the words, so softmax across them to get 
                                          # the MOST PROBABLE WORD
        return output, hidden
    

In [0]:
def maskNLLLoss(decoder_out, target, mask): # To NOT calculate loss for padded spaces
    nTotal = mask.sum() # Number of elements to consider
    target = target.view(-1,1)

    gathered_tensor = torch.gather(decoder_out, 1, target)

    crossEntropy = -torch.log(gathered_tensor) # Calculate the loss on the gathered tensor

    loss = crossEntropy.masked_select(mask)
    loss = loss.mean()
    loss = loss.to(device)

    return loss, nTotal.item()

In [0]:
################################################################# TRAINING ########################################################################

In [67]:
#<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< This is only for visualization >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>
small_batch_size = 5
batches = batch2TrainData(vocab, [random.choice(qa_pairs) for _ in range(small_batch_size)])
input_variable, lengths, target_variable, mask, max_target_len = batches

### One time step is one batch of words ###

print("Input Variable:")
print(input_variable)
print("Lengths of each sentence:")
print(lengths)
print("Target Variable:")
print(target_variable)
print("Mask:")
print(mask)

print("Input Variable Shape:")
print(input_variable.shape)
print("Lengths Shape:")
print(lengths.shape)
print("Target Variable Shape:")
print(target_variable.shape)
print("Mask Shape:")
print(mask.shape)
print("Max target length : ", max_target_len)

# Defining the parameters
hidden_size = 500
encoder_n_layers = 2
decoder_n_layers = 2
dropout = 0.1
attention_model = 'dot'
embedding = nn.Embedding(vocab.numwords, hidden_size)

# Defining the encoder and decoder
encoder = Encoder(hidden_size, embedding, encoder_n_layers, dropout)
decoder = Decoder(attention_model, embedding, hidden_size, vocab.numwords, decoder_n_layers,dropout)
encoder = encoder.to(device)
decoder = decoder.to(device)

encoder.train()
decoder.train()

encoder_optimizer = optim.Adam(encoder.parameters(), lr=0.0001) # Parameters() specifies the weights of the encoder/decoder for the optimizer
                                                               # to differentiate and subtract from and do whatever with
decoder_optimizer = optim.Adam(decoder.parameters(), lr=0.0001)
encoder_optimizer.zero_grad()
decoder_optimizer.zero_grad()

input_variable = input_variable.to(device)
lengths = lengths.to(device)
target_variable = target_variable.to(device)
mask = mask.to(device)

loss = 0
print_losses = []
n_totals = 0

encoder_output, encoder_hidden = encoder(input_variable, lengths)
print("Encoder Output Shape = ",encoder_output.shape)
print("Last Encoder Hidden State Shape = ",encoder_hidden.shape)

decoder_input = torch.LongTensor([[SOS_TOKEN for _ in range(small_batch_size)]])
decoder_input = decoder_input.to(device)
print("Initial Decoder Input Shape = ",decoder_input.shape)
print(decoder_input)

# Last encoder hidden state is passed to the decoder as the initial hidden state
decoder_hidden = encoder_hidden[:decoder.n_layers]
print("Initial decoder hidden state shape = ",decoder_hidden.shape)
print("\n")
print("----------------------------------------------------------")
print("THIS IS WHAT HAPPENS AT EVERY TIME STEP OF THE GRU!")
print("----------------------------------------------------------")
print("\n")

for t in range(max_target_len):
    decoder_output, decoder_hidden = decoder(decoder_input, decoder_hidden, encoder_output)
    print("Decoder Output Shape = ", decoder_output.shape)    
    print("Decoder Hidden State Shape = ", decoder_hidden.shape)

    decoder_input = target_variable[t].view(1,-1) # Cause Teacher Forcing
    print("Target Variable now = ", target_variable[t])
    print("Target Variable Shape now = ", target_variable[t].shape)
    print("Decoder input shape after reshaping = ", decoder_input.shape)

    # Loss
    print("Mask for current timestep", mask[t])
    print("Mask shape for current timestep", mask[t].shape)
    mask_loss, nTotal = maskNLLLoss(decoder_output, target_variable[t], mask[t])
    print("Mask Loss = ", mask_loss)
    print("Total = ", nTotal)

    loss += mask_loss
    print_losses.append(mask_loss.item()*nTotal)
    print(print_losses)
    n_totals += nTotal
    print(nTotal)

    encoder_optimizer.step()
    decoder_optimizer.step()

    returned_loss = sum(print_losses)/n_totals
    print("Returned Loss = ", returned_loss)
    print("\n")
    print("----------------------------------------DONE ONE STEP-----------------------------------")
    print("\n")
 

Input Variable:
tensor([[105, 107,  15,  66,  70],
        [ 69,  70, 116, 116, 116],
        [106, 108, 117, 119, 119],
        [ 80,  12,   2,   2,   2],
        [  2,   2,   0,   0,   0]])
Lengths of each sentence:
tensor([5, 5, 4, 4, 4])
Target Variable:
tensor([[ 69,  70,  15,  66,  70],
        [ 80,  12,  92,  71,  71],
        [  7, 109, 118, 118, 118],
        [  2,   2,   2,   2,   2]])
Mask:
tensor([[True, True, True, True, True],
        [True, True, True, True, True],
        [True, True, True, True, True],
        [True, True, True, True, True]])
Input Variable Shape:
torch.Size([5, 5])
Lengths Shape:
torch.Size([5])
Target Variable Shape:
torch.Size([4, 5])
Mask Shape:
torch.Size([4, 5])
Max target length :  4
Encoder Output Shape =  torch.Size([5, 5, 500])
Last Encoder Hidden State Shape =  torch.Size([4, 5, 500])
Initial Decoder Input Shape =  torch.Size([1, 5])
tensor([[1, 1, 1, 1, 1]], device='cuda:0')
Initial decoder hidden state shape =  torch.Size([2, 5, 500])


-

In [0]:
def train(input_variable, lengths, target_variable, mask, max_target_len, encoder, decoder,
          embedding, encoder_optimizer, decoder_optimizer, batch_size, clip):
    
    encoder_optimizer.zero_grad()
    decoder_optimizer.zero_grad()

    input_variable = input_variable.to(device)
    lengths = lengths.to(device)
    target_variable = target_variable.to(device)
    mask = mask.to(device)

    loss = 0
    print_losses = []
    n_totals = 0

    encoder_output, encoder_hidden = encoder(input_variable, lengths)
    decoder_input = torch.LongTensor([[SOS_TOKEN for _ in range(batch_size)]])
    decoder_input = decoder_input.to(device)
    decoder_hidden = encoder_hidden[:decoder.n_layers]

    use_teacher_forcing = True if random.random() < teacher_forcing_ratio else False

    if use_teacher_forcing:
        for t in range(max_target_len):
            decoder_output, decoder_hidden = decoder(decoder_input, decoder_hidden, encoder_output)
            # Teacher forcing: next input is current target
            decoder_input = target_variable[t].view(1, -1)
            # Calculate and accumulate loss
            mask_loss, nTotal = maskNLLLoss(decoder_output, target_variable[t], mask[t])
            loss += mask_loss
            print_losses.append(mask_loss.item() * nTotal)
            n_totals += nTotal
    else:
        for t in range(max_target_len):
            decoder_output, decoder_hidden = decoder(decoder_input, decoder_hidden, encoder_output)
            # No teacher forcing: next input is decoder's own current output
            _, topi = decoder_output.topk(1)
            decoder_input = torch.LongTensor([[topi[i][0] for i in range(batch_size)]])
            decoder_input = decoder_input.to(device)
            # Calculate and accumulate loss
            mask_loss, nTotal = maskNLLLoss(decoder_output, target_variable[t], mask[t])
            loss += mask_loss
            print_losses.append(mask_loss.item() * nTotal)
            n_totals += nTotal

    loss.backward()

    _ = nn.utils.clip_grad_norm_(encoder.parameters(), clip)
    _ = nn.utils.clip_grad_norm_(decoder.parameters(), clip)

    encoder_optimizer.step()
    decoder_optimizer.step()

    return sum(print_losses)/n_totals


In [0]:
def trainIters(model_name, vocab, qa_pairs, encoder, decoder, encoder_optimizer, decoder_optimizer,
               embedding, encoder_n_layers, decoder_n_layers, save_dir, n_iteration, batch_size, 
               print_every, save_every, clip, corpus_name, loadFilename):
    
    training_batches = [batch2TrainData(vocab, [random.choice(qa_pairs) for _ in range(batch_size)]) for _ in range(n_iteration)]

    # Initializations
    print('Initializing...')
    start_iteration = 1
    print_loss = 0

    if loadFilename:
        start_iteration = checkpoint['iteration'] + 1

    # Training loop FINALLY
    print("Training...")
    for iteration in range(start_iteration, n_iteration + 1):
        training_batch = training_batches[iteration-1]
        input_variable, lengths, target_variable, mask, max_target_len = training_batch

        # Run a training iteration with batch
        loss = train(input_variable, lengths, target_variable, mask, max_target_len, encoder,
                     decoder, embedding, encoder_optimizer, decoder_optimizer, batch_size, clip)
        print_loss += loss

        # Print progress
        if iteration % print_every == 0:
            print_loss_avg = print_loss / print_every
            print("Iteration: {}; Percent complete: {:.1f}%; Average loss: {:.4f}".format(iteration, iteration / n_iteration * 100, print_loss_avg))
            print_loss = 0

        # Save checkpoint
        if (iteration % save_every == 0):
            directory = os.path.join(save_dir, model_name, corpus_name, '{}-{}_{}'.format(encoder_n_layers, decoder_n_layers, hidden_size))
            if not os.path.exists(directory):
                os.makedirs(directory)
            torch.save({
                'iteration': iteration,
                'en': encoder.state_dict(),
                'de': decoder.state_dict(),
                'en_opt': encoder_optimizer.state_dict(),
                'de_opt': decoder_optimizer.state_dict(),
                'loss': loss,
                'voc_dict': vocab.__dict__,
                'embedding': embedding.state_dict()
            }, os.path.join(directory, '{}_{}.tar'.format(iteration, 'checkpoint')))


In [0]:
# Testing the trained net
# For reading in user input and responding

class GreedySearchDecoder(nn.Module): 
    def __init__(self, encoder, decoder):
        super(GreedySearchDecoder, self).__init__()
        self.encoder = encoder
        self.decoder = decoder

    def forward(self, input_seq, input_length, max_length = 5):

        # Encode the input sequence through the encoder model
        encoder_output, encoder_hidden = self.encoder(input_seq, input_length)

        # Encoder's last hidden state is decoder's first hidden state
        decoder_hidden = encoder_hidden[:decoder.n_layers]

        # Decoder input starts with SOS_TOKEN
        decoder_input = torch.ones(1, 1, device=device, dtype=torch.long) * SOS_TOKEN

        # Initialize tensors where the words will be appended after they're found
        all_tokens = torch.zeros([0], device=device, dtype=torch.long)
        all_scores = torch.zeros([0], device=device)

        # Decode one word at a time
        for _ in range(max_length):

            # Forward pass through decoder
            decoder_output, decoder_hidden = self.decoder(decoder_input, decoder_hidden, encoder_output)
            
            # Get most likely word
            decoder_scores, decoder_input = torch.max(decoder_output, dim=1)

            # Store the word and score in the tensors
            all_tokens = torch.cat((all_tokens, decoder_input), dim=0)
            all_scores = torch.cat((all_scores, decoder_scores), dim=0)
            
            # Prepare current word to be input for the next one
            decoder_input = torch.unsqueeze(decoder_input, 0)

        return all_tokens, all_scores


In [0]:
# Make input sentence fit for answering, gives it to searcher, gets back the answer, and makes it fit for reading
def evaluate(encoder, decoder, searcher, vocab, sentence, max_length = 5):
    indexes_batch = [indexesFromSentence(vocab, sentence)]

    lengths = torch.tensor([len(indexes) for indexes in indexes_batch])

    input_batch = torch.LongTensor(indexes_batch).transpose(0,1)

    input_batch = input_batch.to(device)
    lengths = lengths.to(device)

    tokens, scores = searcher(input_batch, lengths, max_length)

    decoded_words = [vocab.index2word[token.item()] for token in tokens]

    return decoded_words   


In [0]:
# testing
def test(encoder, decoder, searcher, vocab, input_sentence):
    try:             
        output_words = evaluate(encoder, decoder, searcher, vocab, input_sentence)

        output_words[:] = [x for x in output_words if not (x == 'EOS' or x == 'PAD')]
        return ' '.join(output_words)

    except KeyError:
        print("Error: Huh. Haven't seen that before.")

In [76]:
model_name = 'Summer'
corpus_name = 'Cornell'
attention_model = 'dot'
hidden_size = 400
encoder_n_layers = 3
decoder_n_layers = 3
dropout = 0.1
batch_size = 64

save_dir = os.getcwd()
loadFilename = None
checkpoint_iter = 5000

#loadFilename = os.path.join(save_dir, model_name, corpus_name,
#                          '{}-{}_{}'.format(encoder_n_layers, decoder_n_layers, hidden_size),
#                          '{}_checkpoint.tar'.format(checkpoint_iter))

# Load model if a loadFilename is provided
if loadFilename:
    # If loading on same machine the model was trained on
    checkpoint = torch.load(loadFilename)
    # If loading a model trained on GPU to CPU
    #checkpoint = torch.load(loadFilename, map_location=torch.device('cpu'))
    encoder_sd = checkpoint['en']
    decoder_sd = checkpoint['de']
    encoder_optimizer_sd = checkpoint['en_opt']
    decoder_optimizer_sd = checkpoint['de_opt']
    embedding_sd = checkpoint['embedding']
    vocab.__dict__ = checkpoint['voc_dict']

# FINAL FINALLY
print('Building the Encoder and Decoder...')

embedding = nn.Embedding(vocab.numwords, hidden_size)
if loadFilename:
    embedding.load_state_dict(embedding_sd)

encoder = Encoder(hidden_size, embedding, encoder_n_layers, dropout)
decoder = Decoder(attention_model, embedding, hidden_size, vocab.numwords, decoder_n_layers, dropout)

if loadFilename:
    encoder.load_state_dict(encoder_sd)
    decoder.load_state_dict(decoder_sd)

encoder = encoder.to(device)
decoder = decoder.to(device)

print("We're ready to go!")


Building the Encoder and Decoder...
We're ready to go!


In [78]:
# Training, FINAL FINAL FINALLYYYYY
clip = 50.0
teacher_forcing_ratio = 1.0
learning_rate = 0.0001
decoder_learning_ratio = 5.0
n_iteration = 2000
print_every = 1
save_every = 500

# Ensure dropout layers are in train mode
encoder.train()
decoder.train()

print('Building optimizers ...')
encoder_optimizer = optim.Adam(encoder.parameters(), lr=learning_rate)
decoder_optimizer = optim.Adam(decoder.parameters(), lr=learning_rate * decoder_learning_ratio)
if loadFilename:
    encoder_optimizer.load_state_dict(encoder_optimizer_sd)
    decoder_optimizer.load_state_dict(decoder_optimizer_sd)

# If you have cuda, configure cuda to call
for state in encoder_optimizer.state.values():
    for k, v in state.items():
        if isinstance(v, torch.Tensor):
            state[k] = v.cuda()

for state in decoder_optimizer.state.values():
    for k, v in state.items():
        if isinstance(v, torch.Tensor):
            state[k] = v.cuda()
print("Starting Training!")

trainIters(model_name, vocab, qa_pairs, encoder, decoder, encoder_optimizer, decoder_optimizer,
           embedding, encoder_n_layers, decoder_n_layers, save_dir, n_iteration, batch_size,
           print_every, save_every, clip, corpus_name,loadFilename)

print("Trained.")



Building optimizers ...
Starting Training!
Initializing...
Training...
Iteration: 1; Percent complete: 0.1%; Average loss: 4.7791
Iteration: 2; Percent complete: 0.1%; Average loss: 4.6582
Iteration: 3; Percent complete: 0.1%; Average loss: 4.5177
Iteration: 4; Percent complete: 0.2%; Average loss: 4.3055
Iteration: 5; Percent complete: 0.2%; Average loss: 4.0346
Iteration: 6; Percent complete: 0.3%; Average loss: 3.7363
Iteration: 7; Percent complete: 0.4%; Average loss: 3.6154
Iteration: 8; Percent complete: 0.4%; Average loss: 3.6971
Iteration: 9; Percent complete: 0.4%; Average loss: 3.5167
Iteration: 10; Percent complete: 0.5%; Average loss: 3.3091
Iteration: 11; Percent complete: 0.5%; Average loss: 3.2981
Iteration: 12; Percent complete: 0.6%; Average loss: 3.2704
Iteration: 13; Percent complete: 0.7%; Average loss: 3.3932
Iteration: 14; Percent complete: 0.7%; Average loss: 3.2266
Iteration: 15; Percent complete: 0.8%; Average loss: 3.2286
Iteration: 16; Percent complete: 0.8%;

In [0]:
# You know what's gonna happen now :)
encoder.eval()
decoder.eval()

searcher = GreedySearchDecoder(encoder, decoder)

In [0]:
def extractExpression(input):
    var = input.split(' ')
    x = int(var[0])
    y = int(var[1])
    expression = var[2]

    return x, y, expression

In [0]:
import operator

ops = { "+": operator.add, "-": operator.sub, "*": operator.mul,  "^": operator.pow} 

def displayResult(userInput, print_result = False):

    neuralNetOutput = test(encoder, decoder, searcher, vocab, userInput)
    x, y, expression = extractExpression(neuralNetOutput)
    
    if print_result is True:
        print("X :", x)
        print("Y :", y)
        print("Expression :", expression)

    if expression[0] == 'x':
        ans = ops[expression[1]](x,y)  
    else:
        ans = ops[expression[1]](y,x)  
    
    return ans

In [122]:
displayResult('add 2 and 3', print_result = True)


X : 2
Y : 3
Expression : x+y


5

In [0]:
ans_list = ["Answers"]
for exp in test_data:
    ans = displayResult(exp)
    ans_list.append(ans)


In [130]:
print(ans_list)

['Answers', 7744, 54, 3135, 480, 59, -1, 5162, 79, 1189, 421875, 140, 117649, 125, 65, 2601, 24389, 3744, 130, 3080, 664, 19, 3225, 975, 116, 190, 84, 28, 314432, 49, 1610, 95, 380, 493039, -38, 118, -17, -54, 1369, 62, 4941, 12167, -52, 140, 78, 88, -16, -22, 97336, 28, 784, 750, 49, 126, 43, 6724, 88, 1050, 143, 29791, -6, 816, 782, 62, 529, 225, 46656, 120, 2, 676, 1610, 140608, 9801, 2, 132, -2, 111, -51, 80, 7938, -9]


In [0]:
with open('test.csv', 'r') as read_obj, \
        open('testResults.csv', 'w', newline='') as write_obj:

    csv_reader = csv.reader(read_obj)
    csv_writer = csv.writer(write_obj)

    for i, row in enumerate(csv_reader):
        row.append(ans_list[i])
        csv_writer.writerow(row)