In [1]:
import pandas as pd
import numpy as np
import torch

import torch.nn as nn
from torch import optim
import torch.nn.functional as F
import tensorflow as tf

from helper_methods import *

from main import create_model

# define path to glove and snli files
path = "../Project/"
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [2]:
snli_train = pd.read_csv(path+'snli_1.0/snli_1.0_train.txt', sep="\\t")
snli_test = pd.read_csv(path+'snli_1.0/snli_1.0_test.txt', sep="\\t")

  """Entry point for launching an IPython kernel.
  


In [3]:
print(snli_train['sentence1'][0],snli_train['sentence2'][0], snli_train['gold_label'][0])
print(snli_train['sentence1'][1],snli_train['sentence2'][1], snli_train['gold_label'][1])
print(snli_train['sentence1'][2],snli_train['sentence2'][2], snli_train['gold_label'][2])

A person on a horse jumps over a broken down airplane. A person is training his horse for a competition. neutral
A person on a horse jumps over a broken down airplane. A person is at a diner, ordering an omelette. contradiction
A person on a horse jumps over a broken down airplane. A person is outdoors, on a horse. entailment


In [4]:
# Filter and rename the important columns
train_df = snli_train.filter(['sentence1','sentence2','gold_label'], axis=1)
test_df = snli_test.filter(['sentence1','sentence2','gold_label'], axis=1)
train_df = train_df.rename(columns={"sentence1": "premise", "sentence2": "hypothesis"})
test_df = test_df.rename(columns={"sentence1": "premise", "sentence2": "hypothesis"})

# exclude sentences which could not be categorised
train_df = train_df[train_df['gold_label'] != "-"]
test_df = test_df[test_df['gold_label'] != "-"]

train_df['gold_label'].value_counts()

entailment       183416
contradiction    183187
neutral          182764
Name: gold_label, dtype: int64

# Glove

In [5]:
embeddings_dict = get_glove_embedding(path)

# Word embeddings

In [6]:
pad_seq_prem, embedding_matrix_prem, vocab_size_prem = get_embedding_matrix(train_df['premise'], embeddings_dict)

A person on a horse jumps over a broken down airplane.
[1, 56, 5, 1, 197, 191, 68, 1, 1762, 36, 877]
the max sentence length is: 78
18490
[  0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
   0   0   0   0  59 132   4 666  16  94]


In [7]:
pad_seq_hyp, embedding_matrix_hyp, vocab_size_hyp = get_embedding_matrix(train_df['hypothesis'], embeddings_dict)

A person is training his horse for a competition.
[1, 27, 3, 821, 16, 162, 20, 1, 275]
the max sentence length is: 56
30904
[  0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
   0   0   0   0   0   0  22   4  40 705]


In [8]:
enc_gold_label = encode_labels(train_df['gold_label'])

[array(['contradiction', 'entailment', 'neutral'], dtype=object)]
[0. 1. 0.]


# Sentence Embeddings

## Baseline Embeddings(Use sum of word embeddings to get sentence embedding)

In [9]:
sentence_embs = baseline_sum_sentence_embeddings(pad_seq_prem, embedding_matrix_prem)

In [10]:
sentence_embs_hyp = baseline_sum_sentence_embeddings(pad_seq_hyp, embedding_matrix_hyp)

# Model

In [11]:
model = create_model()

Model: "functional_1"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            [(None, 300)]        0                                            
__________________________________________________________________________________________________
input_2 (InputLayer)            [(None, 300)]        0                                            
__________________________________________________________________________________________________
dense (Dense)                   (None, 100)          30100       input_1[0][0]                    
__________________________________________________________________________________________________
dense_1 (Dense)                 (None, 100)          30100       input_2[0][0]                    
_______________________________________________________________________________________

# Train

In [12]:
model.compile(optimizer='Adadelta',loss='categorical_crossentropy',metrics = ['accuracy'])

In [13]:
history = model.fit([np.array(sentence_embs), np.array(sentence_embs_hyp)],enc_gold_label,epochs = 100,batch_size=256,validation_split=0.2, verbose=1)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100


Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78/100
Epoch 79/100
Epoch 80/100
Epoch 81/100
Epoch 82/100
Epoch 83/100
Epoch 84/100
Epoch 85/100
Epoch 86/100
Epoch 87/100
Epoch 88/100
Epoch 89/100
Epoch 90/100
Epoch 91/100
Epoch 92/100
Epoch 93/100
Epoch 94/100
Epoch 95/100
Epoch 96/100
Epoch 97/100
Epoch 98/100
Epoch 99/100
Epoch 100/100


# Test

In [14]:
pad_seq_prem_test, embedding_matrix_prem_test, vocab_size_prem_test = get_embedding_matrix(test_df['premise'], embeddings_dict)
pad_seq_hyp_test, embedding_matrix_hyp_test, vocab_size_hyp_test = get_embedding_matrix(test_df['hypothesis'], embeddings_dict)

sentence_embs_prem_test = baseline_sum_sentence_embeddings(pad_seq_prem_test, embedding_matrix_prem_test)
sentence_embs_hyp_test = baseline_sum_sentence_embeddings(pad_seq_hyp_test, embedding_matrix_hyp_test)

enc_gold_label_test = encode_labels(test_df['gold_label'])

This church choir sings to the masses as they sing joyous songs from the book at a church.
[281, 668, 779, 600, 12, 3, 2084, 42, 184, 1455, 2085, 2086, 64, 3, 272, 17, 1, 668]
the max sentence length is: 54
4093
[   0    0    0    0    0    0    0    0    0    0    0    0    0    0
    0    0    0    0    0    0    0    0    0    0    0    0    0    0
    0    0    0    0    0    0    0    0    0    0    0    0    0    0
    0    0    0    0    0    0    0    0    0    0    0    0    0    0
    0    0    0    0    0    0    0    0    0    0    0    0    0    0
    0    0    0    0    0    0    0    0    0    0    0    0    0    0
    0    0    0    1   10    9    1   43 1400   22   16    4    1  218
  282 2087]
The church has cracks in the ceiling.
[2, 655, 36, 2754, 6, 2, 2755]
the max sentence length is: 29
5166
[  0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
   0   0   0   0   0   0   0

In [15]:
# Evaluate the model on the test data using evaluate
print("Evaluate on test data")
results = model.evaluate([sentence_embs_prem_test, sentence_embs_hyp_test], enc_gold_label_test, batch_size=128)
print("test loss, test acc:", results)

Evaluate on test data
test loss, test acc: [0.8876674771308899, 0.5857084393501282]


In [16]:
print("Generate predictions for 3 samples")
predictions = model.predict([sentence_embs_prem_test, sentence_embs_hyp_test])
print("predictions shape:", predictions.shape)

Generate predictions for 3 samples
predictions shape: (9824, 3)


In [17]:
print(predictions[2])
# order: contradiction, entailment, neutral

[0.669341   0.09758869 0.23307024]


In [18]:
print(test_df['premise'][2], test_df['hypothesis'][2], test_df['gold_label'][2])

This church choir sings to the masses as they sing joyous songs from the book at a church. A choir singing at a baseball game. contradiction
