#### installs

In [4]:
# import sys
# !conda install --yes --prefix {sys.prefix} tqdm
# !conda install --yes --prefix {sys.prefix} tensorflow

Collecting package metadata: ...working... done
Solving environment: ...working... done

## Package Plan ##

  environment location: F:\Anaconda3

  added / updated specs:
    - tqdm


The following packages will be downloaded:

    package                    |            build
    ---------------------------|-----------------
    ca-certificates-2019.1.23  |                0         158 KB
    certifi-2019.3.9           |           py36_0         156 KB
    cryptography-2.6.1         |   py36h7a1dbc1_0         563 KB
    kiwisolver-1.0.1           |   py36h6538335_0          61 KB
    krb5-1.16.1                |       hc04afaa_7         819 KB
    libcurl-7.64.0             |       h2a8f88b_2         283 KB
    libpng-1.6.36              |       h2a8f88b_0         550 KB
    openssl-1.1.1b             |       he774522_1         5.7 MB
    pycurl-7.43.0.2            |   py36h7a1dbc1_0         182 KB
    pyqt-5.9.2                 |   py36h6538335_2         4.2 MB
    qt-5.9.7         

Collecting package metadata: ...working... done
Solving environment: ...working... done

# All requested packages already installed.



In [None]:
# import sys
# !conda install --yes --prefix {sys.prefix} ipdb

#### imports

In [1]:
from tqdm import tqdm

In [34]:
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.ticker as ticker
import urllib
import sys
import os
import zipfile
from scipy import stats

#### Download Glove word embeddings

In [3]:
glove_zip_file = "data/glove.6B.zip"
glove_vectors_file = "data/glove.6B.50d.txt"
import zipfile, urllib.request, shutil, os
    
# #large file - 862 MB
# if (not os.path.isfile(glove_zip_file) and
#     not os.path.isfile(glove_vectors_file)):
#     with urllib.request.urlopen("http://nlp.stanford.edu/data/glove.6B.zip") as response, open(glove_zip_file, 'wb') as out_file:
#         shutil.copyfileobj(response, out_file)

Unzip word embeddings

In [4]:
def unzip_single_file(zip_file_name, output_file_name):
    """
        If the outFile is already created, don't recreate
        If the outFile does not exist, create it from the zipFile
    """
    if not os.path.isfile(output_file_name):
        with open(output_file_name, 'wb') as out_file:
            with zipfile.ZipFile(zip_file_name) as zipped:
                for info in zipped.infolist():
                    if output_file_name in info.filename:
                        with zipped.open(info) as requested_file:
                            out_file.write(requested_file.read())
                            return

# unzip_single_file(glove_zip_file, glove_vectors_file)

In [7]:
glove_wordmap = {}
with open(glove_vectors_file, "r", encoding="utf8") as glove:
    for line in glove:
        name, vector = tuple(line.split(" ", 1))
        glove_wordmap[name] = np.fromstring(vector, sep=" ")

In [9]:
# print(glove_wordmap)

In [10]:
def sentence2sequence(sentence):
    """
     
    - Turns an input sentence into an (n,d) matrix, 
        where n is the number of tokens in the sentence
        and d is the number of dimensions each word vector has.
    
      Tensorflow doesn't need to be used here, as simply
      turning the sentence into a sequence based off our 
      mapping does not need the computational power that
      Tensorflow provides. Normal Python suffices for this task.
    """
    tokens = sentence.lower().split(" ")
    rows = []
    words = []
    #Greedy search for tokens
    for token in tokens:
        i = len(token)
        while len(token) > 0 and i > 0:
            word = token[:i]
            if word in glove_wordmap:
                rows.append(glove_wordmap[word])
                words.append(word)
                token = token[i:]
                i = len(token)
            else:
                i = i-1
    return rows, words

#### params

In [11]:



display_step = 10

def score_setup(row):
    convert_dict = {
      'ENTAILMENT': 0,
      'NEUTRAL': 1,
      'CONTRADICTION': 2
    }
    score = np.zeros((3,))
    tag = row["entailment_judgment"]
    score[convert_dict[tag]] += 1
    return score

def fit_to_size(matrix, shape):
    res = np.zeros(shape)
    slices = [slice(0,min(dim,shape[e])) for e, dim in enumerate(matrix.shape)]
    res[slices] = matrix[slices]
    return res

In [54]:
#Constants setup
max_hypothesis_length, max_evidence_length = 30, 30
batch_size, vector_size, hidden_size = 512, 100, 128

training_iterations_count = 100000

lstm_size = hidden_size

weight_decay = 0.0005

learning_rate = 0.01

input_p, output_p = 0.6, 0.3

[Training data](http://www.site.uottawa.ca/~diana/csi5386/A2_2019/SICK_train.txt)

In [38]:
import numpy as np

def split_data_into_scores():
    import csv
    with open("data/training.txt","r") as data:
        train = csv.DictReader(data , delimiter='\t')
        evi_sentences = []
        hyp_sentences = []
        labels = []
        scores = []
        count = 1
        for row in train:
            hyp_sentences.append(np.vstack(
                    sentence2sequence(row["sentence_A"].lower())[0]))
            evi_sentences.append(np.vstack(
                    sentence2sequence(row["sentence_B"].lower())[0]))
            labels.append(row["entailment_judgment"])
            scores.append(float(row["relatedness_score"]))
        
        hyp_sentences = np.stack([fit_to_size(x, (max_hypothesis_length, vector_size))
                          for x in hyp_sentences])
        evi_sentences = np.stack([fit_to_size(x, (max_evidence_length, vector_size))
                          for x in evi_sentences])
                                 
        return (hyp_sentences, evi_sentences), labels, np.reshape(np.array(scores), (len(scores),1))
data_feature_list, correct_values, correct_scores = split_data_into_scores()

l_h, l_e = max_hypothesis_length, max_evidence_length
N, D, H = batch_size, vector_size, hidden_size
l_seq = l_h + l_e



### Cell used for development

In [None]:
def create_model():
    tf.reset_default_graph() 
    data_feature_list, correct_values, correct_scores = split_data_into_scores()

    l_h, l_e = max_hypothesis_length, max_evidence_length
    N, D, H = batch_size, vector_size, hidden_size
    l_seq = l_h + l_e
    lstm = tf.contrib.rnn.BasicLSTMCell(lstm_size)
    lstm_drop =  tf.contrib.rnn.DropoutWrapper(lstm, input_p, output_p)
    hyp = tf.placeholder(tf.float32, [N, l_h, D], 'hypothesis')
    evi = tf.placeholder(tf.float32, [N, l_e, D], 'evidence')
    y = tf.placeholder(tf.float32, [N, 3], 'label')
    lstm_back = tf.contrib.rnn.BasicLSTMCell(lstm_size)

    lstm_drop_back = tf.contrib.rnn.DropoutWrapper(lstm_back, input_p, output_p)

    fc_initializer = tf.random_normal_initializer(stddev=0.1) 
    fc_weight = tf.get_variable('fc_weight', [2*hidden_size, 3], 
                            initializer = fc_initializer)
    fc_bias = tf.get_variable('bias', [3])
    tf.add_to_collection(tf.GraphKeys.REGULARIZATION_LOSSES, 
                     tf.nn.l2_loss(fc_weight)) 

    x = tf.concat([hyp, evi], 1) # N, (Lh+Le), d
    x = tf.transpose(x, [1, 0, 2]) # (Le+Lh), N, d
    x = tf.reshape(x, [-1, vector_size]) # (Le+Lh)*N, d
    x = tf.split(x, l_seq,)
    rnn_outputs, _, _ = tf.contrib.rnn.static_bidirectional_rnn(lstm, lstm_back,
                                                            x, dtype=tf.float32)

    classification_scores = tf.matmul(rnn_outputs[-1], fc_weight) + fc_bias

    with tf.variable_scope('Accuracy'):
        predicts = tf.cast(tf.argmax(classification_scores, 1), 'int32')
        y_label = tf.cast(tf.argmax(y, 1), 'int32')
        corrects = tf.equal(predicts, y_label)
        num_corrects = tf.reduce_sum(tf.cast(corrects, tf.float32))
        accuracy = tf.reduce_mean(tf.cast(corrects, tf.float32))

    with tf.variable_scope("loss"):
        cross_entropy = tf.nn.softmax_cross_entropy_with_logits(
            logits = classification_scores, labels = y)
        loss = tf.reduce_mean(cross_entropy)
        total_loss = loss + weight_decay * tf.add_n(
            tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES))

    optimizer = tf.train.GradientDescentOptimizer(learning_rate)

    opt_op = optimizer.minimize(total_loss)
    # Initialize variables
    init = tf.global_variables_initializer()

    # Use TQDM if installed
    tqdm_installed = False
    try:
        from tqdm import tqdm
        tqdm_installed = True
    except:
        pass

    # Launch the Tensorflow session
    sess = tf.Session()
    sess.run(init)

    # training_iterations_count: The number of data pieces to train on in total
    # batch_size: The number of data pieces per batch
    training_iterations = range(0,training_iterations_count,batch_size)
    if tqdm_installed:
        # Add a progress bar if TQDM is installed
        training_iterations = tqdm(training_iterations)

    for i in training_iterations:
        if i % 1000 == 0:
        # Select indices for a random data subset
            batch = np.random.randint(data_feature_list[0].shape[0], size=batch_size)

        # Use the selected subset indices to initialize the graph's 
        #   placeholder values
        hyps, evis, ys = (data_feature_list[0][batch,:],
                          data_feature_list[1][batch,:],
                          correct_scores[batch])

        # Run the optimization with these initialized values
        sess.run([opt_op], feed_dict={hyp: hyps, evi: evis, y: ys})
        # display_step: how often the accuracy and loss should 
        #   be tested and displayed.
        if (i/batch_size) % display_step == 0:
            # Calculate batch accuracy
            acc = sess.run(accuracy, feed_dict={hyp: hyps, evi: evis, y: ys})
            # Calculate batch loss
            tmp_loss = sess.run(loss, feed_dict={hyp: hyps, evi: evis, y: ys})
            # Display results
            print("Iter " + str(i/batch_size) + ", Minibatch Loss= " + \
                  "{:.6f}".format(tmp_loss) + ", Training Accuracy= " + \
                  "{:.5f}".format(acc))

    data_feature_list, correct_values, correct_scores = split_test_data_into_scores("data/dev.txt")


    hyps, evis, ys = (data_feature_list[0][:],
                      data_feature_list[1][:],
                      correct_scores)
    predictions = sess.run(classification_scores, feed_dict={hyp: hyps, evi: evis, y: ys})
    total = len(predictions)
    correct_predictions = 0
    for i,prediction in enumerate(predictions):
        if np.argmax(prediction[0])==np.argmax(ys[i]):
            correct_predictions += 1
    #     print(["Positive", "Neutral", "Negative"][np.argmax(prediction[0])]+
    #       " / " + ["Positive", "Neutral", "Negative"][np.argmax(ys[i])])
    acc = correct_predictions*100/total
    print("Acc: %s" % str(acc))
    return acc

In [None]:
import numpy as np
accs = [] # 128
batches = [128, 64, 32, 1, 256]
for batch in batches:
    batch_size = batch
    accs.append(create_model())

print(accs)
print("Best batch size %s" % str(batches[np.argmax(accs)]))


### TRAINING

In [55]:
tf.reset_default_graph()

In [56]:
lstm = tf.contrib.rnn.BasicLSTMCell(lstm_size)
lstm_drop =  tf.contrib.rnn.DropoutWrapper(lstm, input_p, output_p)

In [57]:
# N: The number of elements in each of our batches, 
#   which we use to train subsets of data for efficiency's sake.
# l_h: The maximum length of a hypothesis, or the second sentence.  This is
#   used because training an RNN is extraordinarily difficult without 
#   rolling it out to a fixed length.
# l_e: The maximum length of evidence, the first sentence.  This is used
#   because training an RNN is extraordinarily difficult without 
#   rolling it out to a fixed length.
# D: The size of our used GloVe or other vectors.
hyp = tf.placeholder(tf.float32, [None, l_h, D], 'hypothesis')
evi = tf.placeholder(tf.float32, [None, l_e, D], 'evidence')
y = tf.placeholder(tf.float32, [None, 1], 'label')
# hyp: Where the hypotheses will be stored during training.
# evi: Where the evidences will be stored during training.
# y: Where correct scores will be stored during training.

# lstm_size: the size of the gates in the LSTM, 
#    as in the first LSTM layer's initialization.
# lstm_back = tf.contrib.rnn.BasicLSTMCell(lstm_size)
lstm_back = tf.contrib.rnn.BasicLSTMCell(lstm_size)

# lstm_back:  The LSTM used for looking backwards 
#   through the sentences, similar to lstm.

# input_p: the probability that inputs to the LSTM will be retained at each
#   iteration of dropout.
# output_p: the probability that outputs from the LSTM will be retained at 
#   each iteration of dropout.
lstm_drop_back = tf.contrib.rnn.DropoutWrapper(lstm_back, input_p, output_p)
# lstm_drop_back:  A dropout wrapper for lstm_back, like lstm_drop.


fc_initializer = tf.random_normal_initializer(stddev=0.1) 
# fc_initializer: initial values for the fully connected layer's weights.
# hidden_size: the size of the outputs from each lstm layer.  
#   Multiplied by 2 to account for the two LSTMs.
fc_weight = tf.get_variable('fc_weight', [2*hidden_size, 1], 
                            initializer = fc_initializer)
# fc_weight: Storage for the fully connected layer's weights.
fc_bias = tf.get_variable('bias', [1])
# fc_bias: Storage for the fully connected layer's bias.

# tf.GraphKeys.REGULARIZATION_LOSSES:  A key to a collection in the graph
#   designated for losses due to regularization.
#   In this case, this portion of loss is regularization on the weights
#   for the fully connected layer.
tf.add_to_collection(tf.GraphKeys.REGULARIZATION_LOSSES, 
                     tf.nn.l2_loss(fc_weight)) 

x = tf.concat([hyp, evi], 1) # N, (Lh+Le), d
# Permuting batch_size and n_steps
x = tf.transpose(x, [1, 0, 2]) # (Le+Lh), N, d
# Reshaping to (n_steps*batch_size, n_input)
x = tf.reshape(x, [-1, vector_size]) # (Le+Lh)*N, d
# Split to get a list of 'n_steps' tensors of shape (batch_size, n_input)
x = tf.split(x, l_seq,)

# x: the inputs to the bidirectional_rnn


# tf.contrib.rnn.static_bidirectional_rnn: Runs the input through
#   two recurrent networks, one that runs the inputs forward and one
#   that runs the inputs in reversed order, combining the outputs.
rnn_outputs, _, _ = tf.contrib.rnn.static_bidirectional_rnn(lstm, lstm_back,
                                                            x, dtype=tf.float32)
# rnn_outputs: the list of LSTM outputs, as a list. 
#   What we want is the latest output, rnn_outputs[-1]

classification_scores = tf.matmul(rnn_outputs[-1], fc_weight) + fc_bias
# The scores are relative certainties for how likely the output matches
#   a certain entailment: 
#     0: Positive entailment
#     1: Neutral entailment
#     2: Negative entailment

In [58]:
with tf.variable_scope('Accuracy'):
    predicts = tf.cast(tf.argmax(classification_scores, 1), 'int32')
    y_label = tf.cast(tf.argmax(y, 1), 'int32')
    corrects = tf.equal(predicts, y_label)
    num_corrects = tf.reduce_sum(tf.cast(corrects, tf.float32))
    accuracy = tf.reduce_mean(tf.cast(corrects, tf.float32))

with tf.variable_scope("loss"):
#     y = np.reshape(y, (len(y),1))
    loss = tf.losses.mean_squared_error(labels = y, predictions =classification_scores)
    total_loss = loss + weight_decay * tf.add_n(
        tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES))

optimizer = tf.train.GradientDescentOptimizer(learning_rate)

opt_op = optimizer.minimize(total_loss)

In [59]:
# Initialize variables
init = tf.global_variables_initializer()

# Use TQDM if installed
# tqdm_installed = False
# try:
#     from tqdm import tqdm
#     tqdm_installed = True
# except:
#     pass

from tqdm import tqdm

# Launch the Tensorflow session
sess = tf.Session()
sess.run(init)

# training_iterations_count: The number of data pieces to train on in total
# batch_size: The number of data pieces per batch
training_iterations = range(0,training_iterations_count,batch_size)
# if tqdm_installed:
    # Add a progress bar if TQDM is installed
training_iterations = tqdm(training_iterations)
for j in range(25):
    print(j)
    for i in training_iterations:
        if i % 1000 == 0:
        # Select indices for a random data subset
            batch = np.random.randint(data_feature_list[0].shape[0], size=batch_size)

        # Use the selected subset indices to initialize the graph's 
        #   placeholder values
        hyps, evis, ys = (data_feature_list[0][batch,:],
                          data_feature_list[1][batch,:],
                          correct_scores[batch])

        # Run the optimization with these initialized values
        sess.run([opt_op], feed_dict={hyp: hyps, evi: evis, y: ys})
        # display_step: how often the accuracy and loss should 
        #   be tested and displayed.
    if (j) % display_step == 0:
        # Calculate batch accuracy
        acc = sess.run(accuracy, feed_dict={hyp: hyps, evi: evis, y: ys})
        # Calculate batch loss
        tmp_loss = sess.run(loss, feed_dict={hyp: hyps, evi: evis, y: ys})
        # Display results
        print("Iter " + str(j) + ", Minibatch Loss= " + \
              "{:.6f}".format(tmp_loss) + ", Training Accuracy= " + \
              "{:.5f}".format(acc))


  0%|                                                                                                                                                                                   | 0/196 [00:00<?, ?it/s]

0



  1%|▊                                                                                                                                                                          | 1/196 [00:01<04:04,  1.26s/it]
  1%|█▋                                                                                                                                                                         | 2/196 [00:01<03:19,  1.03s/it]
  2%|██▌                                                                                                                                                                        | 3/196 [00:02<02:49,  1.14it/s]
  2%|███▍                                                                                                                                                                       | 4/196 [00:02<02:28,  1.29it/s]
  3%|████▎                                                                                                                                                         

 20%|██████████████████████████████████▋                                                                                                                                       | 40/196 [00:22<01:27,  1.77it/s]
 21%|███████████████████████████████████▌                                                                                                                                      | 41/196 [00:23<01:27,  1.78it/s]
 21%|████████████████████████████████████▍                                                                                                                                     | 42/196 [00:23<01:26,  1.78it/s]
 22%|█████████████████████████████████████▎                                                                                                                                    | 43/196 [00:24<01:25,  1.79it/s]
 22%|██████████████████████████████████████▏                                                                                                                        

 40%|████████████████████████████████████████████████████████████████████▌                                                                                                     | 79/196 [00:46<01:08,  1.71it/s]
 41%|█████████████████████████████████████████████████████████████████████▍                                                                                                    | 80/196 [00:46<01:07,  1.71it/s]
 41%|██████████████████████████████████████████████████████████████████████▎                                                                                                   | 81/196 [00:47<01:06,  1.72it/s]
 42%|███████████████████████████████████████████████████████████████████████                                                                                                   | 82/196 [00:47<01:06,  1.71it/s]
 42%|███████████████████████████████████████████████████████████████████████▉                                                                                       

 60%|█████████████████████████████████████████████████████████████████████████████████████████████████████▋                                                                   | 118/196 [01:09<00:48,  1.61it/s]
 61%|██████████████████████████████████████████████████████████████████████████████████████████████████████▌                                                                  | 119/196 [01:10<00:47,  1.64it/s]
 61%|███████████████████████████████████████████████████████████████████████████████████████████████████████▍                                                                 | 120/196 [01:10<00:46,  1.64it/s]
 62%|████████████████████████████████████████████████████████████████████████████████████████████████████████▎                                                                | 121/196 [01:11<00:45,  1.65it/s]
 62%|█████████████████████████████████████████████████████████████████████████████████████████████████████████▏                                                     

 80%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎                                 | 157/196 [01:33<00:24,  1.61it/s]
 81%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏                                | 158/196 [01:34<00:24,  1.58it/s]
 81%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████                                | 159/196 [01:34<00:23,  1.57it/s]
 82%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▉                               | 160/196 [01:35<00:22,  1.58it/s]
 82%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊                    

Iter 0, Minibatch Loss= 0.997922, Training Accuracy= 1.00000
1
2
3
4
5
6
7
8
9
10
Iter 10, Minibatch Loss= 0.976258, Training Accuracy= 1.00000
11
12
13
14
15
16
17
18
19
20
Iter 20, Minibatch Loss= 0.583423, Training Accuracy= 1.00000
21
22
23
24


In [49]:
import numpy as np

def split_test_data_into_scores(file_path):
    import csv
    with open(file_path,"r") as data:
        train = csv.DictReader(data , delimiter='\t')
        evi_sentences = []
        hyp_sentences = []
        labels = []
        scores = []
        count = 1
        for row in train:
            hyp_sentences.append(np.vstack(
                    sentence2sequence(row["sentence_A"].lower())[0]))
            evi_sentences.append(np.vstack(
                    sentence2sequence(row["sentence_B"].lower())[0]))
            labels.append(row["entailment_judgment"])
            scores.append(float(row["relatedness_score"]))
        
        hyp_sentences = np.stack([fit_to_size(x, (max_hypothesis_length, vector_size))
                          for x in hyp_sentences])
        evi_sentences = np.stack([fit_to_size(x, (max_evidence_length, vector_size))
                          for x in evi_sentences])
                                 
        return (hyp_sentences, evi_sentences), labels, np.reshape(np.array(scores), (len(scores),1))

In [60]:
evidences = ["People wearing costumes are gathering in a forest and are looking in the same direction"]
hypotheses = ["Masked people are looking in the same direction in a forest"
]



sentence1 = [fit_to_size(np.vstack(sentence2sequence(evidence)[0]),
                         (30, 50)) for evidence in evidences]

sentence2 = [fit_to_size(np.vstack(sentence2sequence(hypothesis)[0]),
                         (30,50)) for hypothesis in hypotheses]



data_feature_list, correct_values, correct_scores = split_test_data_into_scores("data/test_labeled.txt")




hyps, evis, ys = (data_feature_list[0],
                  data_feature_list[1],
                  correct_scores)
predictions = sess.run(classification_scores, feed_dict={hyp: hyps, evi: evis, y: [[0]]})

print(stats.pearsonr(ys, predictions))



(array([0.58004148]), array([0.]))


In [61]:
with open("Results.txt", "r") as f:
    with open("results-0.58.txt", "w") as o:
        for i,line in enumerate(f.readlines()):
            a = line[:-1]
            o.write("%s\t%s\n" % (a, str(predictions[i][0])))
#             f.write("%s\t%s\n" % (str(IDS[i]), prediction[i]))



In [62]:
sess.close()

In [139]:
import numpy as np

def split_data(file_path):
    import csv
    with open(file_path,"r") as data:
        train = csv.DictReader(data , delimiter='\t')
        evi_sentences = []
        hyp_sentences = []
        labels = []
        scores = []
        count = 1
        for row in train:
            labels.append(row["entailment_judgment"])                          

#             labels.append((row["pair_ID"],row["entailment_judgment"]))                          
        return  labels

gold = split_data("data/test_labeled.txt")

with open("Results.txt", "r") as f:
    labels = []
    rows=[]
    for i,line in enumerate(f.readlines()):
        rows.append(line)
        labels.append(line.split("\t")[1])
    labels = labels[1:]
# print(gold[:10])
# print(labels[:10])
# print(rows[:10])
# print(labels[3635])
print(sum([1 for x,y in zip(labels,gold) if x==y])/len(gold))

0.5902171706921048
