In [13]:
import random
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import os
import openai
import re
import time
from scripts.prepare_data_helpers import prepare_examples, create_query
import spacy
import textacy
from dotenv import load_dotenv

load_dotenv()
nlp = spacy.load('en_core_web_sm')
import re

from sklearn.feature_extraction.text import CountVectorizer
from sklearn import metrics
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import GaussianNB, MultinomialNB
import xgboost as xgb
import shap

## Prepare Data

In [79]:
train1 = pd.read_csv('../../e-SNLI/dataset/esnli_train_1.csv')
train2 = pd.read_csv('../../e-SNLI/dataset/esnli_train_2.csv')
train = pd.concat([train1, train2])
dev = pd.read_csv('../../e-SNLI/dataset/esnli_dev.csv')
test = pd.read_csv('../../e-SNLI/dataset/esnli_test.csv')

train = train.dropna(subset=['Sentence1', 'Sentence2', 'Explanation_1'])
train = train.reset_index(drop=True)

np.random.seed(12345) # seed for numpy package
test_indices = list(np.random.choice(test.index.values, size=1000, replace=False))
test = test.loc[test_indices]
test = test.reset_index(drop=True)

In [172]:
gpt_pred = pd.read_csv("../output_data/svo_structure_4.csv", sep=";")
gpt2 = pd.read_csv("../output_data/svo_structure_gpt2.csv", sep=";")

# Transform list to single string

# GPT-3
gpt_pred.reduced_expl = [i.replace("'", " ").replace(" ", "").strip("][").replace(",", " ") for i in gpt_pred.reduced_expl]
gpt_pred.pos_expl = [i.replace("'", " ").replace(" ", "").strip("][").replace(",", " ") for i in gpt_pred.pos_expl]

# Gold Explanations
gpt_pred.reduced_gold_expl = [i.replace("'", " ").replace(" ", "").strip("][").replace(",", " ") for i in gpt_pred.reduced_gold_expl]
gpt_pred.pos_gold_expl = [i.replace("'", " ").replace(" ", "").strip("][").replace(",", " ") for i in gpt_pred.pos_gold_expl]

# GPT-2
gpt2.reduced_expl = [i.replace("'", " ").replace(" ", "").strip("][").replace(",", " ") for i in gpt2.reduced_expl]
gpt2.pos_expl = [i.replace("'", " ").replace(" ", "").strip("][").replace(",", " ") for i in gpt2.pos_expl]
gpt2["pred_label"] = gpt2.labels
gpt2_label_mapping = {0: "contradiction", 1: "neutral", 2: "entailment"}
inverse_gpt2_label_mapping = {"contradiction": 0, "neutral": 1, "entailment": 2}

In [173]:
original_patterns = {"general": pd.Series(["^sentence 1 states .+\. sentence 2 is stating .+$", "^sentence 2 states .+\. sentence 1 is stating .+$", "^there is .+$"]),
                     "entailment": ["implies", "^if .+ then .+", "is a rephrasing of", "^in both sentences", "would be", "can also be said as", "is a less specific rephrasing of", "^this clarifies that", "^if .+ it means", "in both sentences\.?$", "in both\.?$", "is same as", "is synonym of"],
                     "neutral": ["^just because .+ doesn't mean", "^cannot infer the", "^one cannot assume", "^one cannot infer that", "^cannot assume", "does not mean", "^we don't know that .+", "^the fact that .+ doesn't mean .+", "^the fact that .+ does not imply .+", "^the fact that .+ does not always mean .+", "^the fact that .+ doesn't always imply .+"],
                     "contradiction": ["^in sentence 1 .+ while in sentence 2 .+", "^it can either be .+ or .+", "^it cannot be .+ if .+", "and other", "after", "is not the same as", "is contradictory to", "contradicts", "cannot also be", "^either .+ or .+", "^either .+ or .+ not both at the same time", ".+ or .+ not both at the same time"]}

In [174]:
def find_pattern(expl, pattern_for_class):
    return np.sum([1 for i in range(len(original_patterns[pattern_for_class])) if len(re.findall(original_patterns[pattern_for_class][i], expl.lower())) != 0])

## GPT-3

In [186]:
for cur_class in ["contradiction", "entailment", "neutral"]:
    print(f"Explanations with template in class {cur_class}")
    print(gpt_pred.pred_explanation[gpt_pred.pred_label == cur_class].apply(find_pattern, pattern_for_class=cur_class).mean())

Explanations with template in class contradiction
0.07954545454545454
Explanations with template in class entailment
0.39544513457556935
Explanations with template in class neutral
0.16463414634146342


In [176]:
np.array([[len(re.findall(original_patterns["contradiction"][i], gpt_pred.pred_explanation[j].lower())) for i in range(len(original_patterns["contradiction"]))] for j in range(gpt_pred.shape[0])]).mean(axis=0)

array([0.   , 0.   , 0.   , 0.   , 0.007, 0.018, 0.   , 0.008, 0.   ,
       0.   , 0.   , 0.002])

In [177]:
original_patterns["contradiction"][6]

'is contradictory to'

In [188]:
np.array([[len(re.findall(original_patterns["entailment"][i], gpt_pred.pred_explanation[gpt_pred.pred_label == "entailment"].iloc[j].lower())) for i in range(len(original_patterns["entailment"]))] for j in range(gpt_pred[gpt_pred.pred_label == "entailment"].shape[0])]).mean(axis=0)

array([0.35403727, 0.02277433, 0.01242236, 0.        , 0.00828157,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        ])

In [179]:
original_patterns["entailment"][0]

'implies'

In [183]:
original_patterns["entailment"][1]

'^if .+ then .+'

In [180]:
np.array([[len(re.findall(original_patterns["neutral"][i], gpt_pred.pred_explanation[j].lower())) for i in range(len(original_patterns["neutral"]))] for j in range(gpt_pred.shape[0])]).mean(axis=0)

array([0.01 , 0.   , 0.   , 0.   , 0.   , 0.018, 0.   , 0.   , 0.   ,
       0.   , 0.   ])

In [181]:
original_patterns["neutral"][5]

'does not mean'

## GPT-2

In [182]:
for cur_class in ["contradiction", "entailment", "neutral"]:
    print(f"Explanations with template in class {cur_class}")
    print(gpt2.generated[gpt2.labels == inverse_gpt2_label_mapping[cur_class]] .apply(find_pattern, pattern_for_class=cur_class).mean())

Explanations with template in class contradiction
0.03867403314917127
Explanations with template in class entailment
0.1557632398753894
Explanations with template in class neutral
0.2082018927444795


In [148]:
np.array([[len(re.findall(original_patterns["contradiction"][i], gpt2.generated[j].lower())) for i in range(len(original_patterns["contradiction"]))] for j in range(gpt2.shape[0])]).mean(axis=0)

array([0.   , 0.   , 0.   , 0.   , 0.001, 0.006, 0.018, 0.   , 0.   ,
       0.   , 0.   , 0.   , 0.   ])

In [149]:
original_patterns["contradiction"][6]

'is not the same as'

In [152]:
np.array([[len(re.findall(original_patterns["entailment"][i], gpt2.generated[j].lower())) for i in range(len(original_patterns["entailment"]))] for j in range(gpt2.shape[0])]).mean(axis=0)

array([0.02 , 0.01 , 0.022, 0.   , 0.001, 0.   , 0.   , 0.   , 0.   ,
       0.   , 0.   , 0.005, 0.   ])

In [153]:
original_patterns["entailment"][2]

'is a rephrasing of'

In [155]:
np.array([[len(re.findall(original_patterns["neutral"][i], gpt2.generated[j].lower())) for i in range(len(original_patterns["neutral"]))] for j in range(gpt2.shape[0])]).mean(axis=0)

array([0.02 , 0.   , 0.   , 0.   , 0.   , 0.062, 0.   , 0.   , 0.   ,
       0.   , 0.   ])

In [157]:
original_patterns["neutral"][5]

'does not mean'

## Gold Explanations

In [72]:
for cur_class in ["contradiction", "entailment", "neutral"]:
    print(f"Explanations with template in class {cur_class}")
    print(dev.Explanation_1[dev.gold_label == cur_class].apply(find_pattern, pattern_for_class=cur_class).mean())

Explanations with template in class contradiction
0.06070774862721171
Explanations with template in class entailment
0.17452688495043556
Explanations with template in class neutral
0.1795981452859351


In [165]:
np.array([[len(re.findall(original_patterns["contradiction"][i], gpt_pred.Explanation_1[j].lower())) for i in range(len(original_patterns["contradiction"]))] for j in range(gpt_pred.shape[0])]).mean(axis=0)

array([0.   , 0.   , 0.   , 0.001, 0.004, 0.002, 0.001, 0.   , 0.003,
       0.008, 0.   , 0.002])

In [166]:
original_patterns["contradiction"][9]

'^either .+ or .+'

In [167]:
np.array([[len(re.findall(original_patterns["entailment"][i], gpt_pred.Explanation_1[j].lower())) for i in range(len(original_patterns["entailment"]))] for j in range(gpt_pred.shape[0])]).mean(axis=0)

array([0.036, 0.013, 0.016, 0.   , 0.007, 0.   , 0.   , 0.   , 0.001,
       0.001, 0.001, 0.   , 0.   ])

In [168]:
original_patterns["entailment"][0]

'implies'

In [169]:
np.array([[len(re.findall(original_patterns["neutral"][i], gpt_pred.Explanation_1[j].lower())) for i in range(len(original_patterns["neutral"]))] for j in range(gpt_pred.shape[0])]).mean(axis=0)

array([0.016, 0.   , 0.   , 0.   , 0.   , 0.026, 0.   , 0.   , 0.   ,
       0.   , 0.   ])

In [170]:
original_patterns["neutral"][5]

'does not mean'