In [2]:
%load_ext autoreload
%autoreload 2

In [3]:
import json
import pandas as pd
import numpy as np
import spacy
import inflect
from string import punctuation

inflect = inflect.engine()

data_path = "./../../data/"

In [4]:
nlp_lg = spacy.load('en_coref_lg')
nlp = nlp_lg

### SNLI

In [5]:
snli_train_tsv = data_path +'snli/snli_1.0_train.txt'
snli_dev_tsv = data_path +'snli/snli_1.0_dev.txt'

snli_train_data = pd.read_csv(snli_train_tsv,delimiter='\t',encoding='utf-8')
snli_dev_data = pd.read_csv(snli_dev_tsv,delimiter='\t',encoding='utf-8')

snli_train_data = snli_train_data[['gold_label', 'sentence1', 'sentence2']]
snli_dev_data = snli_dev_data[['gold_label', 'sentence1', 'sentence2']]

In [6]:
snli_dev_data_entail = snli_dev_data.loc[snli_dev_data['gold_label'] == 'entailment']
snli_train_data_entail = snli_train_data.loc[snli_train_data['gold_label'] == 'entailment']

In [88]:
with open(data_path + "pronoun_dict", 'r') as f:
    pronoun_dict = json.load(f)

In [99]:
import sys
sys.path.append("./../")

from snli_parser import SNLI_PARSER

snli_parser = SNLI_PARSER(nlp_lg, pronoun_dict)

In [145]:
snli_dev_data_entail.shape

(3329, 3)

In [101]:
ENTAILMENT = 1
NOT_ENTAILMENT = 0

sentence1_list = []
sentence2_list = []
label_list = []
counter = 0

for ind, row in snli_dev_data_entail.iterrows():
    #print("="*20)
    #print(row['sentence1'])
    #print(row['sentence2'])
    
    result = snli_parser.augment(row['sentence1'], row['sentence2'])
    if result is not None:
        (new_sent1, true_example, false_examples) = result
        
        # Save entailment results
        sentence1_list.append(new_sent1)
        sentence2_list.append(true_example)
        label_list.append(ENTAILMENT)
        
        # Save false entailment results
        sentence1_list.extend([new_sent1] * len(false_examples))
        sentence2_list.extend(false_examples)
        label_list.extend([NOT_ENTAILMENT] * len(false_examples))
    
    counter += 1
    if counter % 100 == 0:
        print(counter)
    
    '''
    if ind > 40:
        break
    '''
        
snli_dev_df = pd.DataFrame(list(zip(sentence1_list, sentence2_list, label_list)), columns=['sentence1','sentence2','label'])

100
200
300
400
500
600
700
800
900
1000
1100
1200
1300
1400
1500
1600
1700
1800
1900
2000
2100
2200
2300
2400
2500
2600
2700
2800
2900
3000
3100
3200
3300


### Try our WNLI scheme with coref model


In [112]:
import spacy
import sys
sys.path.append("./../")
from wnli import WNLI

In [131]:
ENTAILMENT = 1
NOT_ENTAILMENT = 0
MAJORITY = ENTAILMENT

In [134]:
print(snli_dev_df[(snli_dev_df['label'] == 1)].count())
print(snli_dev_df[(snli_dev_df['label'] == 0)].count())

sentence1    2507
sentence2    2507
label        2507
dtype: int64
sentence1    338
sentence2    338
label        338
dtype: int64


In [123]:
snli_dev_df[(snli_dev_df['sentence2'] == 'They got ready to cut the cake')]

row = snli_dev_df.iloc[1221]
print(row['sentence1'])
print(row['sentence2'])

People getting ready to cut a birthday cake. They got ready to cut the cake
They got ready to cut the cake


In [141]:
snli_dev_majority = WNLI(nlp_lg, snli_dev_df, majority=MAJORITY, use_coref=False, debug=False)
snli_dev_majority_labels = snli_dev_majority.predict()
snli_dev_majority_score = snli_dev_majority.score(snli_dev_majority_labels)

snli_dev = WNLI(nlp_lg, snli_dev_df, majority=MAJORITY, use_coref=True, debug=False)
snli_dev_labels = snli_dev.predict()
snli_dev_score = snli_dev.score(snli_dev_labels)

Could not use coref model for 789/2845 examples


In [144]:
print(snli_dev_majority_score)
print(snli_dev_score)

0.8811950790861159
0.7065026362038664


In [147]:
for ind, row in snli_dev_df.iterrows():
    if random.random() < 0.05:
        print(row['sentence1'])
        print(row['sentence2'])
        print(row['label'])


A woman is doing a cartwheel while wearing a bikini in the sand next to the beach. She is doing a cartwheel.
A woman is doing a cartwheel.
1
A person powerwashing stairs leading down to a pool. She washes stairs
A person washes stairs
1
A brown a dog and a black dog in the edge of the ocean with a wave under them boats are on the water in the background. They are swimming among the boats.
The dogs are swimming among the boats.
1
A young child is jumping into the arms of a woman wearing a black swimming suit while in a pool. He leaping into arms of a woman wearing swimsuit in a pool.
Child leaping into arms of a woman wearing swimsuit in a pool.
1
A guy waxing a wooden floor with wax. He is waxing
A guy is waxing
1
Two children playing on the floor with toy trains. They played on the floor.
The children played on the floor.
1
Three men in a science lab watch one of them hold a burning object, one man on the side is reaching for the ground. They are in a laboratory doing a scientific stu

1
A man is scrapping paint off a window. He is removing paint.
A man is removing paint.
1
A group of young soccer players run down the field after the ball. It of people play soccer.
Field of people play soccer.
0
A street vendor in chef's clothing with a food cart, which has gathered people around it. She is outside.
A street vendor is outside.
1
A young boy wearing crocs tries to hit yellow golf balls into a large lawn overlooking a town. He was hitting golf balls.
A kid was hitting golf balls.
1
His back facing us, a young man photographs a lovely mountain lake scene under a hazy sky. He photographs a mountain lake scene.
A young man photographs a mountain lake scene.
1
A man in a blue shirt leans on a wall beside a road with a blue van and red car with water in the background. He is wearing a blue shirt.
The man is wearing a blue shirt.
1
A child poses his head through a hole in a painted wall. He is putting his head through a hole in the wall.
The child is putting his head through

### MNLI

### RTE (Entailment)

### 