# Setup

## Load Packages

In [1]:
import random
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import os
import openai
import re
import time
from scripts.prepare_data_helpers import prepare_examples, create_query
import spacy
import textacy
from dotenv import load_dotenv

load_dotenv()
nlp = spacy.load('en_core_web_sm')

  from .autonotebook import tqdm as notebook_tqdm


## Prepare Data

In [2]:
train1 = pd.read_csv('../../e-SNLI/dataset/esnli_train_1.csv')
train2 = pd.read_csv('../../e-SNLI/dataset/esnli_train_2.csv')
train = pd.concat([train1, train2])
dev = pd.read_csv('../../e-SNLI/dataset/esnli_dev.csv')
test = pd.read_csv('../../e-SNLI/dataset/esnli_test.csv')

train = train.dropna(subset=['Sentence1', 'Sentence2', 'Explanation_1'])
train = train.reset_index(drop=True)

np.random.seed(12345) # seed for numpy package
test_indices = list(np.random.choice(test.index.values, size=1000, replace=False))
test = test.loc[test_indices]
test = test.reset_index(drop=True)

In [3]:
gpt_pred = pd.read_csv('../output_data/test_predictions_4examples.csv', sep=';')

In [4]:
gpt2 = pd.read_json("../data/esnli_train.json")

## Prepare Input Sentences

In [12]:
def extract_SVO(txt):
    res = list()
    doc = nlp(txt)

    for cur_nsubj in list(textacy.extract.matches.token_matches(doc, [{"DEP": "nsubj"}])) + list(textacy.extract.matches.token_matches(doc, [{"DEP": "nsubjpass"}])):
        verbs = list()
        if cur_nsubj[0].head.pos_ == "AUX":
            for cur_verb in textacy.extract.matches.token_matches(doc, [{"POS": "VERB"}]):
                if cur_verb[0].head == cur_nsubj[0]:
                    verbs += [cur_verb[0]]
        else:
            verbs += [cur_nsubj[0].head]
        for cur_verb in verbs:
            dobjs = list(textacy.extract.matches.token_matches(doc, [{"DEP": "dobj"}]))
            if len(dobjs) == 0:
                res += [(cur_nsubj[0], cur_verb, None)]
            else:
                for cur_dobj in dobjs:
                    if cur_dobj[0].head == cur_verb:
                        res += [(cur_nsubj[0], cur_verb, cur_dobj[0])]
                    else:
                        res += [(cur_nsubj[0], cur_verb, None)]


    if len(res) == 0:
        cur_root = list(textacy.extract.matches.token_matches(doc, [{"DEP": "ROOT", "POS": "NOUN"}]))
        if len(cur_root) != 0:
            cur_root = cur_root[0][0]
            for cur_verb in textacy.extract.matches.token_matches(doc, [{"DEP": "acl"}]):
                if cur_verb[0].head == cur_root:
                    dobjs = list(textacy.extract.matches.token_matches(doc, [{"DEP": "dobj"}]))
                    if len(dobjs) == 0:
                        res += [(cur_root, cur_verb[0], None)]
                    for cur_dobj in dobjs:
                        if cur_dobj[0].head == cur_verb[0]:
                            res += [(cur_root, cur_verb[0], cur_dobj[0])]
                        else:
                            res += [(cur_root, cur_verb[0], None)]

    return res

In [None]:
gpt_pred['S1_SVO'] = [extract_SVO(gpt_pred.Sentence1.iloc[i]) for i in range(gpt_pred.shape[0])]
gpt_pred['S2_SVO'] = [extract_SVO(gpt_pred.Sentence2.iloc[i]) for i in range(gpt_pred.shape[0])]

In [None]:
gpt_pred[["S1_SVO", "S2_SVO", "Explanation_1"]]

In [None]:
has_svo_structure = [not (gpt_pred["S1_SVO"].iloc[i] == list() or gpt_pred["S2_SVO"].iloc[i] == list()) for i in range(gpt_pred.shape[0])]
gpt_pred_svo = gpt_pred[has_svo_structure]

In [None]:
np.mean(has_svo_structure)

In [None]:
gpt_pred_svo

# GPT-3 Explanations

In [None]:
n = gpt_pred.shape[0]
expls = [nlp(gpt_pred.pred_explanation.iloc[i]) for i in range(n)]

In [None]:
verbs_sentence = list()

cur_index = 0
verb_dict = dict()
root_term_dict = dict()

# loop through each row in the test data
for expl in expls:
    cur_verbs = list()
    neg_root = None
    contains_different = False
    contains_same = False

    # loop through each token in current explanation
    for token in expl:
        if token.lemma_ == 'different':
            contains_different = True
        if token.lemma_ == 'same':
            contains_same = True
        if token.dep_ == 'ROOT':
            cur_root = token.lemma_
        if token.dep_ == 'neg' and token.head.dep_ == 'ROOT':
            neg_root = token.lemma_ + " " + token.head.lemma_
    if neg_root:
        cur_root = neg_root
    if contains_different:
        cur_root += " different"
    if contains_same:
        cur_root += " same"
    if cur_root not in root_term_dict.keys():
        root_term_dict[cur_root] = [cur_index]
    else:
        root_term_dict[cur_root] += [cur_index]
    cur_index += 1

In [None]:
pd.Series(root_term_dict).apply(lambda x: len(x)).sort_values(ascending=False)

In [None]:
print("Max number of SVO structures in sentence 1: ", gpt_pred.S1_SVO.apply(lambda x: len(x)).max())
print("Max number of SVO structures in sentence 2: ", gpt_pred.S2_SVO.apply(lambda x: len(x)).max())

In [None]:
for i in root_term_dict.keys():
    doc = nlp(i)
    print([doc[j] for j in range(len(doc))])

In [None]:
gpt_pred["reduced_expl"] = None
for root_term in root_term_dict.keys():
    for i in root_term_dict[root_term]:
        try:
            temp = list()
            doc = nlp(gpt_pred.loc[i].pred_explanation)
            for token in doc:
                new_word = None
                add_S1S2 = False
                root_doc = nlp(root_term)
                if token.lemma_ in [root_doc[j].lemma_ for j in range(len(root_doc))]:
                    temp += [token.lemma_]
                for SVO_structure in gpt_pred.loc[i].S1_SVO:
                    if token.lemma_ == SVO_structure[0].lemma_:
                        new_word = "S1_Subj"
                    if not pd.isna(SVO_structure[1]):
                        if token.lemma_ == SVO_structure[1].lemma_:
                            new_word = "S1_Verb"
                    if not pd.isna(SVO_structure[2]):
                        if token.lemma_ == SVO_structure[2].lemma_:
                            new_word = "S1_Obj"
                for SVO_structure in gpt_pred.loc[i].S2_SVO:
                    if token.lemma_ == SVO_structure[0].lemma_:
                        if new_word:
                            temp += ["S1S2_Subj"]
                            add_S1S2 = True
                        else:
                            temp += ["S2_Subj"]
                    if not pd.isna(SVO_structure[1]):
                        if token.lemma_ == SVO_structure[1].lemma_:
                            if new_word:
                                temp += ["S1S2_Verb"]
                                add_S1S2 = True
                            else:
                                temp += ["S2_Verb"]
                    if not pd.isna(SVO_structure[2]):
                        if token.lemma_ == SVO_structure[2].lemma_:
                            if new_word:
                                temp += ["S1S2_Obj"]
                                add_S1S2 = True
                            else:
                                temp += ["S2_Obj"]
                if not pd.isna(new_word) and not add_S1S2:
                    temp += [new_word]
            gpt_pred['reduced_expl'].loc[i] = temp
        except:
            None

In [None]:
gpt_pred["pos_expl"] = None
for i, row in gpt_pred.iterrows():
    temp = list()
    doc_expl = nlp(row.pred_explanation)
    doc_S1 = nlp(row.Sentence1)
    doc_S2 = nlp(row.Sentence2)
    for token in doc_expl:
        if (token.lemma_ == "not" or token.lemma_ == "be" or not token.is_stop) and token.pos_ != "PUNCT":
            add_S1S2 = False
            new_word = None
            for S1_token in doc_S1:
                if S1_token.lemma_ == token.lemma_:
                    if token.lemma_ in [S2_token.lemma_ for S2_token in doc_S2]:
                        new_word = "S1S2_" + S1_token.pos_
                        add_S1S2 = True
                    else:
                        new_word = "S1_" + S1_token.pos_
                    # Avoid multiple POS get added
                    break
            if not add_S1S2:
                for S2_token in doc_S2:
                    if S2_token.lemma_ == token.lemma_:
                        new_word = "S2_" + S2_token.pos_
            if token.lemma_ == "be":
                new_word = "be"
            if pd.isna(new_word):
                new_word = token.lemma_
            temp += [new_word]
    gpt_pred.pos_expl.loc[i] = temp

In [None]:
gpt_pred[["S1_SVO", "S2_SVO", "Sentence1", "Sentence2", "pred_explanation", "reduced_expl", "pos_expl"]]

# Gold Explanation

In [None]:
n = gpt_pred.shape[0]
expls = [nlp(gpt_pred.Explanation_1.iloc[i]) for i in range(n)]

In [None]:
verbs_sentence = list()

cur_index = 0
verb_dict = dict()
root_term_dict = dict()

# loop through each row in the test data
for expl in expls:
    cur_verbs = list()
    neg_root = None
    contains_different = False
    contains_same = False

    # loop through each token in current explanation
    for token in expl:
        if token.lemma_ == 'different':
            contains_different = True
        if token.lemma_ == 'same':
            contains_same = True
        if token.dep_ == 'ROOT':
            cur_root = token.lemma_
        if token.dep_ == 'neg' and token.head.dep_ == 'ROOT':
            neg_root = token.lemma_ + " " + token.head.lemma_
    if neg_root:
        cur_root = neg_root
    if contains_different:
        cur_root += " different"
    if contains_same:
        cur_root += " same"
    if cur_root not in root_term_dict.keys():
        root_term_dict[cur_root] = [cur_index]
    else:
        root_term_dict[cur_root] += [cur_index]
    cur_index += 1

In [None]:
pd.Series(root_term_dict).apply(lambda x: len(x)).sort_values(ascending=False)

In [None]:
print("Max number of SVO structures in sentence 1: ", gpt_pred.S1_SVO.apply(lambda x: len(x)).max())
print("Max number of SVO structures in sentence 2: ", gpt_pred.S2_SVO.apply(lambda x: len(x)).max())

In [None]:
for i in root_term_dict.keys():
    doc = nlp(i)
    print([doc[j] for j in range(len(doc))])

In [None]:
gpt_pred["reduced_gold_expl"] = None
for root_term in root_term_dict.keys():
    for i in root_term_dict[root_term]:
        try:
            temp = list()
            doc = nlp(gpt_pred.loc[i].Explanation_1)
            for token in doc:
                new_word = None
                add_S1S2 = False
                root_doc = nlp(root_term)
                if token.lemma_ in [root_doc[j].lemma_ for j in range(len(root_doc))]:
                    temp += [token.lemma_]
                for SVO_structure in gpt_pred.loc[i].S1_SVO:
                    if token.lemma_ == SVO_structure[0].lemma_:
                        new_word = "S1_Subj"
                    if not pd.isna(SVO_structure[1]):
                        if token.lemma_ == SVO_structure[1].lemma_:
                            new_word = "S1_Verb"
                    if not pd.isna(SVO_structure[2]):
                        if token.lemma_ == SVO_structure[2].lemma_:
                            new_word = "S1_Obj"
                for SVO_structure in gpt_pred.loc[i].S2_SVO:
                    if token.lemma_ == SVO_structure[0].lemma_:
                        if new_word:
                            temp += ["S1S2_Subj"]
                            add_S1S2 = True
                        else:
                            temp += ["S2_Subj"]
                    if not pd.isna(SVO_structure[1]):
                        if token.lemma_ == SVO_structure[1].lemma_:
                            if new_word:
                                temp += ["S1S2_Verb"]
                                add_S1S2 = True
                            else:
                                temp += ["S2_Verb"]
                    if not pd.isna(SVO_structure[2]):
                        if token.lemma_ == SVO_structure[2].lemma_:
                            if new_word:
                                temp += ["S1S2_Obj"]
                                add_S1S2 = True
                            else:
                                temp += ["S2_Obj"]
                if not pd.isna(new_word) and not add_S1S2:
                    temp += [new_word]
            gpt_pred['reduced_gold_expl'].loc[i] = temp
        except:
            None

In [None]:
gpt_pred["pos_gold_expl"] = None
for i, row in gpt_pred.iterrows():
    temp = list()
    doc_expl = nlp(row.Explanation_1)
    doc_S1 = nlp(row.Sentence1)
    doc_S2 = nlp(row.Sentence2)
    for token in doc_expl:
        if (token.lemma_ == "not" or token.lemma_ == "be" or not token.is_stop) and token.pos_ != "PUNCT":
            add_S1S2 = False
            new_word = None
            for S1_token in doc_S1:
                if S1_token.lemma_ == token.lemma_:
                    if token.lemma_ in [S2_token.lemma_ for S2_token in doc_S2]:
                        new_word = "S1S2_" + S1_token.pos_
                        add_S1S2 = True
                    else:
                        new_word = "S1_" + S1_token.pos_
                    # Avoid multiple POS get added
                    break
            if not add_S1S2:
                for S2_token in doc_S2:
                    if S2_token.lemma_ == token.lemma_:
                        new_word = "S2_" + S2_token.pos_
            if token.lemma_ == "be":
                new_word = "be"
            if pd.isna(new_word):
                new_word = token.lemma_
            temp += [new_word]
    gpt_pred.pos_gold_expl.loc[i] = temp

In [None]:
gpt_pred[["S1_SVO", "S2_SVO", "Sentence1", "Sentence2", "pred_explanation", "reduced_expl", "pos_expl", "reduced_gold_expl", "pos_gold_expl"]]

# GPT-2 Explanations

In [4]:
gpt2 = pd.read_json("../data/esnli_train.json")
np.random.seed(12345) # seed for numpy package
test_indices = list(np.random.choice(gpt2.index.values, size=1000, replace=False))
gpt2 = gpt2.loc[test_indices]
gpt2.head()

Unnamed: 0,index,cls_labels,groups,premise,hypothesis,explanations,labels,data_id,generated
123852,123852,2,2023529314.jpg#4r1e,Man enjoying life while skydiving,A person is skydiving,"A man is a person. If a person is ""enjoying li...",2,esnli,A man is a person.
272061,272061,0,441489325.jpg#2r1c,A red-haired woman in a white and purple outfi...,A woman is next to a blue SUV.,A red sports car cannot be a blue SUV.,0,esnli,A red sports car is not a blue SUV.
140514,140514,2,188244881.jpg#3r1e,A man is standing at a bus stop reading a paper.,A person is at a bus stop.,A man is a person and standing at is same as at,2,esnli,A man is a person.
57451,57451,0,284028406.jpg#0r1c,"A man wearing a gray cap, gold jewelry and win...",There is a man swimming.,"One does not typically wear a gray cap, gold j...",0,esnli,"A man wearing a gray cap, gold jewelry and win..."
166846,166846,0,7599430126.jpg#4r1c,A man wearing a mask plays music by the side o...,A man is walking through an airport.,Playing music on the side of the road cannot t...,0,esnli,One cannot be walking through an airport and b...


In [7]:
n = gpt2.shape[0]
expls = [nlp(gpt2.generated.iloc[i]) for i in range(n)]

In [8]:
verbs_sentence = list()

cur_index = 0
verb_dict = dict()
root_term_dict = dict()

# loop through each row in the test data
for expl in expls:
    cur_verbs = list()
    neg_root = None
    contains_different = False
    contains_same = False

    # loop through each token in current explanation
    for token in expl:
        if token.lemma_ == 'different':
            contains_different = True
        if token.lemma_ == 'same':
            contains_same = True
        if token.dep_ == 'ROOT':
            cur_root = token.lemma_
        if token.dep_ == 'neg' and token.head.dep_ == 'ROOT':
            neg_root = token.lemma_ + " " + token.head.lemma_
    if neg_root:
        cur_root = neg_root
    if contains_different:
        cur_root += " different"
    if contains_same:
        cur_root += " same"
    if cur_root not in root_term_dict.keys():
        root_term_dict[cur_root] = [cur_index]
    else:
        root_term_dict[cur_root] += [cur_index]
    cur_index += 1

In [9]:
pd.Series(root_term_dict).apply(lambda x: len(x)).sort_values(ascending=False)

be                265
not be            126
not mean           81
not imply          58
not be same        39
                 ... 
not move            1
kiss                1
not climb           1
spray               1
not reach same      1
Length: 201, dtype: int64

In [13]:
gpt2['S1_SVO'] = [extract_SVO(gpt2.premise.iloc[i]) for i in range(gpt2.shape[0])]
gpt2['S2_SVO'] = [extract_SVO(gpt2.hypothesis.iloc[i]) for i in range(gpt2.shape[0])]

In [14]:
print("Max number of SVO structures in sentence 1: ", gpt2.S1_SVO.apply(lambda x: len(x)).max())
print("Max number of SVO structures in sentence 2: ", gpt2.S2_SVO.apply(lambda x: len(x)).max())

Max number of SVO structures in sentence 1:  25
Max number of SVO structures in sentence 2:  4


In [15]:
for i in root_term_dict.keys():
    doc = nlp(i)
    print([doc[j] for j in range(len(doc))])

[be]
[not, be]
[not, be, same]
[not, walk, same]
[not, mean]
[not, seat, same]
[not, read]
[not, eat]
[sit, same]
[not, imply]
[singe]
[not, sit, same]
[not, play]
[not, watch]
[perform]
[prepare]
[be, same]
[wear]
[not, sleep, same]
[not, swim, same]
[jump]
[not, hold, same]
[not, take, same]
[not, drive]
[be, different]
[not, relax]
[not, indicate]
[not, ride, same]
[infer]
[screw]
[enjoy]
[eat, same]
[repel]
[not, walk]
[imply]
[gather]
[swinge]
[not, find]
[dress]
[play]
[get]
[have]
[not, read, same]
[not, swim]
[make]
[not, sit]
[not, stand, same]
[not, look, different]
[not, replace]
[not, run, same]
[not, mean, same]
[walk]
[run]
[push]
[not, give]
[wait]
[not, build]
[not, kickboxe, same]
[work]
[look]
[read]
[try]
[hold]
[eat]
[not, enjoy]
[not, singe, same]
[not, greet, same]
[not, go, same]
[not, supervise]
[not, infer]
[mean]
[do]
[stand]
[carry]
[fly]
[expect]
[use]
[not, play, same]
[smile]
[mow]
[attempt]
[wave]
[not, wear]
[not, fight, same]
[not, ride]
[surfer]
[eithe

In [36]:
gpt2["reduced_expl"] = None
for root_term in root_term_dict.keys():
    for i in root_term_dict[root_term]:
        #try:
            temp = list()
            doc = nlp(gpt2.iloc[i].generated)
            for token in doc:
                new_word = None
                add_S1S2 = False
                root_doc = nlp(root_term)
                if token.lemma_ in [root_doc[j].lemma_ for j in range(len(root_doc))]:
                    temp += [token.lemma_]
                for SVO_structure in gpt2.iloc[i].S1_SVO:
                    if token.lemma_ == SVO_structure[0].lemma_:
                        new_word = "S1_Subj"
                    if not pd.isna(SVO_structure[1]):
                        if token.lemma_ == SVO_structure[1].lemma_:
                            new_word = "S1_Verb"
                    if not pd.isna(SVO_structure[2]):
                        if token.lemma_ == SVO_structure[2].lemma_:
                            new_word = "S1_Obj"
                for SVO_structure in gpt2.iloc[i].S2_SVO:
                    if token.lemma_ == SVO_structure[0].lemma_:
                        if new_word:
                            temp += ["S1S2_Subj"]
                            add_S1S2 = True
                        else:
                            temp += ["S2_Subj"]
                    if not pd.isna(SVO_structure[1]):
                        if token.lemma_ == SVO_structure[1].lemma_:
                            if new_word:
                                temp += ["S1S2_Verb"]
                                add_S1S2 = True
                            else:
                                temp += ["S2_Verb"]
                    if not pd.isna(SVO_structure[2]):
                        if token.lemma_ == SVO_structure[2].lemma_:
                            if new_word:
                                temp += ["S1S2_Obj"]
                                add_S1S2 = True
                            else:
                                temp += ["S2_Obj"]
                if not pd.isna(new_word) and not add_S1S2:
                    temp += [new_word]
            gpt2['reduced_expl'].iloc[i] = temp
        #except:
         #   None

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  gpt2['reduced_expl'].iloc[i] = temp


In [18]:
gpt2["pos_expl"] = None
for i, row in gpt2.iterrows():
    temp = list()
    doc_expl = nlp(row.generated)
    doc_S1 = nlp(row.premise)
    doc_S2 = nlp(row.hypothesis)
    for token in doc_expl:
        if (token.lemma_ == "not" or token.lemma_ == "be" or not token.is_stop) and token.pos_ != "PUNCT":
            add_S1S2 = False
            new_word = None
            for S1_token in doc_S1:
                if S1_token.lemma_ == token.lemma_:
                    if token.lemma_ in [S2_token.lemma_ for S2_token in doc_S2]:
                        new_word = "S1S2_" + S1_token.pos_
                        add_S1S2 = True
                    else:
                        new_word = "S1_" + S1_token.pos_
                    # Avoid multiple POS get added
                    break
            if not add_S1S2:
                for S2_token in doc_S2:
                    if S2_token.lemma_ == token.lemma_:
                        new_word = "S2_" + S2_token.pos_
            if token.lemma_ == "be":
                new_word = "be"
            if pd.isna(new_word):
                new_word = token.lemma_
            temp += [new_word]
    gpt2.pos_expl.loc[i] = temp

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  gpt2.pos_expl.loc[i] = temp


In [38]:
gpt2[:50]

Unnamed: 0,index,cls_labels,groups,premise,hypothesis,explanations,labels,data_id,generated,S1_SVO,S2_SVO,reduced_expl,pos_expl
123852,123852,2,2023529314.jpg#4r1e,Man enjoying life while skydiving,A person is skydiving,"A man is a person. If a person is ""enjoying li...",2,esnli,A man is a person.,"[(Man, enjoying, life)]","[(person, skydiving, None)]","[be, S2_Subj]","[man, be, S2_NOUN]"
272061,272061,0,441489325.jpg#2r1c,A red-haired woman in a white and purple outfi...,A woman is next to a blue SUV.,A red sports car cannot be a blue SUV.,0,esnli,A red sports car is not a blue SUV.,[],[],"[be, not]","[S1_ADJ, S1_NOUN, S1_NOUN, be, not, S2_ADJ, S2..."
140514,140514,2,188244881.jpg#3r1e,A man is standing at a bus stop reading a paper.,A person is at a bus stop.,A man is a person and standing at is same as at,2,esnli,A man is a person.,"[(man, standing, None)]",[],"[S1_Subj, be]","[S1_NOUN, be, S2_NOUN]"
57451,57451,0,284028406.jpg#0r1c,"A man wearing a gray cap, gold jewelry and win...",There is a man swimming.,"One does not typically wear a gray cap, gold j...",0,esnli,"A man wearing a gray cap, gold jewelry and win...","[(man, wearing, cap), (man, looking, None)]",[],"[S1_Subj, S1_Verb, S1_Obj, S1_Verb, be, not, s...","[S1S2_NOUN, S1_VERB, S1_ADJ, S1_NOUN, S1_NOUN,..."
166846,166846,0,7599430126.jpg#4r1c,A man wearing a mask plays music by the side o...,A man is walking through an airport.,Playing music on the side of the road cannot t...,0,esnli,One cannot be walking through an airport and b...,"[(man, plays, None), (man, plays, music)]","[(man, walking, None)]","[not, walk, S2_Verb, same]","[not, be, S2_VERB, S2_NOUN, S1_NOUN, time]"
143000,143000,2,4690240999.jpg#1r1e,There are a lot of people on this busy Asian s...,a lot of people on the street,If your on this busy asian street you can say ...,2,esnli,The street is busy because there are a lot of ...,[],[],"[be, be]","[S1S2_NOUN, be, S1_ADJ, be, S1S2_NOUN, S1S2_NOUN]"
43267,43267,1,2552438538.jpg#2r1n,Three little boys standing next to and on a fe...,The boys are outdoors.,The fact that boys standing next to and on fen...,1,esnli,The fence is outdoors.,"[(boys, standing, None)]",[],[be],"[S1_NOUN, be, S2_ADV]"
100479,100479,2,2092922849.jpg#0r1e,"A young adult is wearing shorts, tank top, san...",Someone is looking out of a door.,A young adult is someone looking out of a door.,2,esnli,A young adult is someone.,"[(adult, wearing, shorts)]","[(Someone, looking, None)]","[S1_Subj, be, S2_Subj]","[S1_ADJ, S1_NOUN, be]"
258234,258234,2,2086532897.jpg#2r1e,White water rafter goes through a rough spot.,River rafter in a rough patch of white water.,Answer; Patch refers to the rough spot.,2,esnli,The rafter is either in a patch or rough spot.,"[(rafter, goes, None)]",[],"[S1_Subj, be]","[S1S2_NOUN, be, S2_NOUN, S1S2_ADJ, S1_NOUN]"
90521,90521,0,7734754954.jpg#3r1c,A young man carefully prepares to take a pool ...,The man is old.,Three were young and old man,0,esnli,A young man is not an old man.,"[(man, prepares, None)]",[],"[S1_Subj, be, not, S1_Subj]","[S1_ADJ, S1S2_NOUN, be, not, S2_ADJ, S1S2_NOUN]"


# Store Data

In [None]:
gpt_pred.to_csv('../output_data/svo_structure_4.csv', sep=';')

In [39]:
gpt2.to_csv('../output_data/svo_structure_gpt2.csv', sep=';')