# Setup

## Load Packages

In [1]:
import random
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import os
import openai
import re
import time
from scripts.prepare_data_helpers import prepare_examples, create_query
import spacy
import textacy
from dotenv import load_dotenv

load_dotenv()
nlp = spacy.load('en_core_web_sm')

  from .autonotebook import tqdm as notebook_tqdm


## Prepare Data

In [2]:
train1 = pd.read_csv('../../e-SNLI/dataset/esnli_train_1.csv')
train2 = pd.read_csv('../../e-SNLI/dataset/esnli_train_2.csv')
train = pd.concat([train1, train2])
dev = pd.read_csv('../../e-SNLI/dataset/esnli_dev.csv')
test = pd.read_csv('../../e-SNLI/dataset/esnli_test.csv')

train = train.dropna(subset=['Sentence1', 'Sentence2', 'Explanation_1'])
train = train.reset_index(drop=True)

np.random.seed(12345) # seed for numpy package
test_indices = list(np.random.choice(test.index.values, size=1000, replace=False))
test = test.loc[test_indices]
test = test.reset_index(drop=True)

In [4]:
gpt_pred = pd.read_csv('../output_data/test_predictions_4examples.csv', sep=';')

In [5]:
gpt_pred

Unnamed: 0.1,Unnamed: 0,pairID,gold_label,Sentence1,Sentence2,Explanation_1,Sentence1_marked_1,Sentence2_marked_1,Sentence1_Highlighted_1,Sentence2_Highlighted_1,...,Sentence1_Highlighted_2,Sentence2_Highlighted_2,Explanation_3,Sentence1_marked_3,Sentence2_marked_3,Sentence1_Highlighted_3,Sentence2_Highlighted_3,prompt,pred_explanation,pred_label
0,0,4813951931.jpg#3r1e,entailment,A young man in an orange hat walks his bicycle...,A man in an orange hat walks his bike down a s...,"A bicycle and a bike, are the same object.",A young man in an orange hat walks his *bicyc...,A man in an orange hat walks his *bike* down ...,9,8,...,912,81,The young man in an orange hart is the man wal...,A *young* *man* in *an* *orange* *hat* *walks...,A *man* in *an* *orange* *hat* *walks* *his* ...,124567891012,1543119876,Statement: Two african women bike down a dirt ...,A young man in an orange hat is a rephrasing o...,entailment
1,1,4678320536.jpg#1r1c,contradiction,A woman standing with smile on her face and sh...,A woman stands holding her baby and crying.,If a woman has a smile on her face then she is...,A woman standing with *smile* *on* *her* *face...,A woman stands holding her baby and *crying.*,4567,7,...,13,5,the woman is either crying or has a smile,A *woman* standing with *smile* on her face an...,A *woman* stands holding her baby and *crying.*,41,17,Statement: A man with a striped shirt and glas...,"The woman is smiling and holding a wood, not a...",contradiction
2,2,661749711.jpg#2r1e,entailment,A dog jumps to catch a red ball outside.,the dog is jumping.,Jumping is an action needed to catch a ball.,A *dog* *jumps* to *catch* a red *ball* *outs...,the *dog* is *jumping.*,27814,31,...,12,13,"If the dog jumps to catch, the dog is jumping.",A dog *jumps* to *catch* a red ball outside.,the dog is *jumping.*,24,3,Statement: a lone rock climber in a harness cl...,The statement implies that the dog is jumping ...,entailment
3,3,44904567.jpg#1r1c,contradiction,A man wielding an electric razor is gleefully ...,There is only one person present.,There cannot be one person present if one man ...,A *man* wielding an electric razor is gleefull...,There is only *one* *person* *present.*,11011,345,...,10,32,It cannot be claimed that there is only one pe...,A *man* wielding an electric razor is gleefull...,There is *only* *one* *person* present.,11011,234,Statement: A young girl is climbing on a woode...,"There are two people present, the man wielding...",contradiction
4,4,32542645.jpg#0r1n,neutral,A group of people stares at a wall that is fil...,A group of people analyze art together,Not necessary that they analyze the art.,A group of people stares at a wall that is fil...,A group of people *analyze* art together,{},4,...,{},45,The group may have just been admiring as oppos...,A group of people stares at a wall that is fil...,A group of people *analyze* art *together*,{},46,Statement: A girl in a red and pink shirt is c...,Staring at drawings is a form of analyzing art.,entailment
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
995,995,2429284131.jpg#2r1c,contradiction,Three men are standing on stage performing.,Women are playing trumpets on the street.,"either there are men on stage, or women playin...",Three *men* are standing on stage performing.,*Women* are playing trumpets on the street.,1,0,...,1,0,There can be either men or women.,Three *men* are standing on stage performing.,*Women* are playing trumpets on the street.,1,0,"Statement: Boy in shorts, face down in shallow...",The statement describes three men on stage per...,contradiction
996,996,4460943467.jpg#0r1e,entailment,3 young man in hoods standing in the middle of...,Three hood wearing people stand in a street.,3 Man in hoods are hood wearing people.,*3* young *man* in *hoods* standing in the mi...,Three *hood* *wearing* *people* stand in a st...,204,123,...,102412,0147,If they are standing in the middle of a quiet ...,3 young man in hoods *standing* *in* *the* *m...,Three hood wearing people *stand* *in* *a* *s...,56789111012,4567,Statement: Two youth getting squirted by water...,The answer implies that the people in the stre...,entailment
997,997,2226343167.jpg#0r1n,neutral,"A woman in skirt, torn stockings and combat bo...",The man and woman are going to a movie in the ...,walking down a street does not mean they are g...,"A woman in skirt, torn stockings and combat bo...",The man and woman are going to a *movie* in t...,{},8,...,{},13811,Just because a man and woman are wearing theme...,"A woman in skirt, torn stockings and combat bo...",The man and woman are going to a *movie* in t...,{},8,Statement: A bride and groom stand in front of...,The clothing does not necessarily indicate tha...,neutral
998,998,3389707399.jpg#4r1e,entailment,Two men shining peoples shoes.,Two men shining shoes.,Two men are shining shoes if two men are shini...,Two *men* *shining* peoples shoes.,Two *men* *shining* shoes.,12,12,...,142,123,"""Two men shining shoes"" is a rephrasing of ""tw...",Two men *shining* *peoples* *shoes.*,Two men *shining* *shoes.*,234,23,Statement: A woman praying to her gods.\nState...,Shining shoes is a specific type of activity t...,entailment


# Prepare Input Sentences

In [6]:
def extract_SVO(txt):
    res = list()
    doc = nlp(txt)

    for cur_nsubj in list(textacy.extract.matches.token_matches(doc, [{"DEP": "nsubj"}])) + list(textacy.extract.matches.token_matches(doc, [{"DEP": "nsubjpass"}])):
        verbs = list()
        if cur_nsubj[0].head.pos_ == "AUX":
            for cur_verb in textacy.extract.matches.token_matches(doc, [{"POS": "VERB"}]):
                if cur_verb[0].head == cur_nsubj[0]:
                    verbs += [cur_verb[0]]
        else:
            verbs += [cur_nsubj[0].head]
        for cur_verb in verbs:
            dobjs = list(textacy.extract.matches.token_matches(doc, [{"DEP": "dobj"}]))
            if len(dobjs) == 0:
                res += [(cur_nsubj[0], cur_verb, None)]
            else:
                for cur_dobj in dobjs:
                    if cur_dobj[0].head == cur_verb:
                        res += [(cur_nsubj[0], cur_verb, cur_dobj[0])]
                    else:
                        res += [(cur_nsubj[0], cur_verb, None)]


    if len(res) == 0:
        cur_root = list(textacy.extract.matches.token_matches(doc, [{"DEP": "ROOT", "POS": "NOUN"}]))
        if len(cur_root) != 0:
            cur_root = cur_root[0][0]
            for cur_verb in textacy.extract.matches.token_matches(doc, [{"DEP": "acl"}]):
                if cur_verb[0].head == cur_root:
                    dobjs = list(textacy.extract.matches.token_matches(doc, [{"DEP": "dobj"}]))
                    if len(dobjs) == 0:
                        res += [(cur_root, cur_verb[0], None)]
                    for cur_dobj in dobjs:
                        if cur_dobj[0].head == cur_verb[0]:
                            res += [(cur_root, cur_verb[0], cur_dobj[0])]
                        else:
                            res += [(cur_root, cur_verb[0], None)]

    return res

In [7]:
gpt_pred['S1_SVO'] = [extract_SVO(gpt_pred.Sentence1.iloc[i]) for i in range(gpt_pred.shape[0])]
gpt_pred['S2_SVO'] = [extract_SVO(gpt_pred.Sentence2.iloc[i]) for i in range(gpt_pred.shape[0])]

In [8]:
gpt_pred[["S1_SVO", "S2_SVO", "Explanation_1"]]

Unnamed: 0,S1_SVO,S2_SVO,Explanation_1
0,"[(man, walks, bicycle)]","[(man, walks, bike)]","A bicycle and a bike, are the same object."
1,"[(woman, holding, wood), (she, holding, wood)]","[(woman, stands, None)]",If a woman has a smile on her face then she is...
2,"[(dog, jumps, None)]","[(dog, jumping, None)]",Jumping is an action needed to catch a ball.
3,"[(man, wielding, razor), (man, wielding, None)]",[],There cannot be one person present if one man ...
4,"[(group, stares, None), (that, filled, None)]","[(group, analyze, art)]",Not necessary that they analyze the art.
...,...,...,...
995,"[(men, standing, None)]","[(Women, playing, trumpets)]","either there are men on stage, or women playin..."
996,"[(man, standing, None)]","[(people, stand, None)]",3 Man in hoods are hood wearing people.
997,"[(woman, torn, stockings)]","[(man, going, None)]",walking down a street does not mean they are g...
998,"[(men, shining, shoes)]",[],Two men are shining shoes if two men are shini...


In [9]:
has_svo_structure = [not (gpt_pred["S1_SVO"].iloc[i] == list() or gpt_pred["S2_SVO"].iloc[i] == list()) for i in range(gpt_pred.shape[0])]
gpt_pred_svo = gpt_pred[has_svo_structure]

In [10]:
np.mean(has_svo_structure)

0.667

In [11]:
gpt_pred_svo

Unnamed: 0.1,Unnamed: 0,pairID,gold_label,Sentence1,Sentence2,Explanation_1,Sentence1_marked_1,Sentence2_marked_1,Sentence1_Highlighted_1,Sentence2_Highlighted_1,...,Explanation_3,Sentence1_marked_3,Sentence2_marked_3,Sentence1_Highlighted_3,Sentence2_Highlighted_3,prompt,pred_explanation,pred_label,S1_SVO,S2_SVO
0,0,4813951931.jpg#3r1e,entailment,A young man in an orange hat walks his bicycle...,A man in an orange hat walks his bike down a s...,"A bicycle and a bike, are the same object.",A young man in an orange hat walks his *bicyc...,A man in an orange hat walks his *bike* down ...,9,8,...,The young man in an orange hart is the man wal...,A *young* *man* in *an* *orange* *hat* *walks...,A *man* in *an* *orange* *hat* *walks* *his* ...,124567891012,1543119876,Statement: Two african women bike down a dirt ...,A young man in an orange hat is a rephrasing o...,entailment,"[(man, walks, bicycle)]","[(man, walks, bike)]"
1,1,4678320536.jpg#1r1c,contradiction,A woman standing with smile on her face and sh...,A woman stands holding her baby and crying.,If a woman has a smile on her face then she is...,A woman standing with *smile* *on* *her* *face...,A woman stands holding her baby and *crying.*,4567,7,...,the woman is either crying or has a smile,A *woman* standing with *smile* on her face an...,A *woman* stands holding her baby and *crying.*,41,17,Statement: A man with a striped shirt and glas...,"The woman is smiling and holding a wood, not a...",contradiction,"[(woman, holding, wood), (she, holding, wood)]","[(woman, stands, None)]"
2,2,661749711.jpg#2r1e,entailment,A dog jumps to catch a red ball outside.,the dog is jumping.,Jumping is an action needed to catch a ball.,A *dog* *jumps* to *catch* a red *ball* *outs...,the *dog* is *jumping.*,27814,31,...,"If the dog jumps to catch, the dog is jumping.",A dog *jumps* to *catch* a red ball outside.,the dog is *jumping.*,24,3,Statement: a lone rock climber in a harness cl...,The statement implies that the dog is jumping ...,entailment,"[(dog, jumps, None)]","[(dog, jumping, None)]"
4,4,32542645.jpg#0r1n,neutral,A group of people stares at a wall that is fil...,A group of people analyze art together,Not necessary that they analyze the art.,A group of people stares at a wall that is fil...,A group of people *analyze* art together,{},4,...,The group may have just been admiring as oppos...,A group of people stares at a wall that is fil...,A group of people *analyze* art *together*,{},46,Statement: A girl in a red and pink shirt is c...,Staring at drawings is a form of analyzing art.,entailment,"[(group, stares, None), (that, filled, None)]","[(group, analyze, art)]"
7,7,4911982235.jpg#0r1c,contradiction,A man in a plaid shirt is walking on a street ...,He is walking on his hands.,The man is either walking or walking on his ha...,A man in a plaid shirt is *walking* on a *stre...,He is *walking* on his *hands.*,710,25,...,A man cannot be walking on a street and walkin...,A man in a plaid shirt is *walking* *on* *a* *...,He is *walking* *on* *his* *hands.*,78910,2345,Statement: A woman wearing blue pants and a wh...,Walking on a street and walking on hands are t...,contradiction,"[(man, walking, None)]","[(He, walking, None)]"
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
994,994,3251646144.jpg#0r1n,neutral,A boy is running in a spinning metal tunnel in...,He is getting dizzy.,A boy can run without getting dizzy.,A boy is running in a spinning metal tunnel in...,He is getting *dizzy.*,{},3,...,Running in a spinning metal tunnel does not al...,A boy is running in a spinning metal tunnel in...,He is getting *dizzy.*,{},3,"Statement: A group of women in saris sit, talk...",We don't know if the boy is getting dizzy or not.,neutral,"[(boy, running, None)]","[(He, getting, None)]"
995,995,2429284131.jpg#2r1c,contradiction,Three men are standing on stage performing.,Women are playing trumpets on the street.,"either there are men on stage, or women playin...",Three *men* are standing on stage performing.,*Women* are playing trumpets on the street.,1,0,...,There can be either men or women.,Three *men* are standing on stage performing.,*Women* are playing trumpets on the street.,1,0,"Statement: Boy in shorts, face down in shallow...",The statement describes three men on stage per...,contradiction,"[(men, standing, None)]","[(Women, playing, trumpets)]"
996,996,4460943467.jpg#0r1e,entailment,3 young man in hoods standing in the middle of...,Three hood wearing people stand in a street.,3 Man in hoods are hood wearing people.,*3* young *man* in *hoods* standing in the mi...,Three *hood* *wearing* *people* stand in a st...,204,123,...,If they are standing in the middle of a quiet ...,3 young man in hoods *standing* *in* *the* *m...,Three hood wearing people *stand* *in* *a* *s...,56789111012,4567,Statement: Two youth getting squirted by water...,The answer implies that the people in the stre...,entailment,"[(man, standing, None)]","[(people, stand, None)]"
997,997,2226343167.jpg#0r1n,neutral,"A woman in skirt, torn stockings and combat bo...",The man and woman are going to a movie in the ...,walking down a street does not mean they are g...,"A woman in skirt, torn stockings and combat bo...",The man and woman are going to a *movie* in t...,{},8,...,Just because a man and woman are wearing theme...,"A woman in skirt, torn stockings and combat bo...",The man and woman are going to a *movie* in t...,{},8,Statement: A bride and groom stand in front of...,The clothing does not necessarily indicate tha...,neutral,"[(woman, torn, stockings)]","[(man, going, None)]"


# Analyse Predictions

In [4]:
docs = [nlp(test.iloc[i].Explanation_1) for i in range(20)]

In [5]:
for doc in docs:
    for chunk in doc.noun_chunks:
        print(chunk.text, '//', chunk.root.text, '//', chunk.root.dep_, '//', chunk.root.head.text)
    print('-----------------------------------------')

A bicycle // bicycle // nsubj // are
a bike // bike // conj // bicycle
the same object // object // attr // are
-----------------------------------------
a woman // woman // nsubj // has
a smile // smile // dobj // has
her face // face // pobj // on
she // she // nsubj // crying
-----------------------------------------
Jumping // Jumping // nsubj // is
an action // action // attr // is
a ball // ball // dobj // catch
-----------------------------------------
one person // person // attr // be
one man // man // nsubj // shaving
another man's hair // hair // dobj // shaving
-----------------------------------------
they // they // nsubj // analyze
the art // art // dobj // analyze
-----------------------------------------
A snowboarder // snowboarder // nsubj // is
a person // person // attr // is
a snowboarder // snowboarder // nsubj // sails
the air // air // pobj // through
they // they // nsubj // snowboarding
a person // person // nsubj // snowboarding
-----------------------------

In [74]:
doc = nlp(gpt_pred.iloc[9].Sentence2)
html = spacy.displacy.render(doc, style='dep', jupyter=False)
with open('data_vis' + 'test' + '.html', 'w') as f:
    f.write(html)

# Currently Explanation 1 -> Change to pred_explanation

In [12]:
n = gpt_pred.shape[0]
expls = [nlp(gpt_pred.pred_explanation.iloc[i]) for i in range(n)]

In [13]:
verbs_sentence = list()

cur_index = 0
verb_dict = dict()
root_term_dict = dict()

# loop through each row in the test data
for expl in expls:
    cur_verbs = list()
    neg_root = None
    contains_different = False
    contains_same = False

    # loop through each token in current explanation
    for token in expl:
        if token.lemma_ == 'different':
            contains_different = True
        if token.lemma_ == 'same':
            contains_same = True
        if token.dep_ == 'ROOT':
            cur_root = token.lemma_
        if token.dep_ == 'neg' and token.head.dep_ == 'ROOT':
            neg_root = token.lemma_ + " " + token.head.lemma_
    if neg_root:
        cur_root = neg_root
    if contains_different:
        cur_root += " different"
    if contains_same:
        cur_root += " same"
    if cur_root not in root_term_dict.keys():
        root_term_dict[cur_root] = [cur_index]
    else:
        root_term_dict[cur_root] += [cur_index]
    cur_index += 1

In [14]:
pd.Series(root_term_dict).apply(lambda x: len(x)).sort_values(ascending=False)

be                  184
imply               167
be different         64
not mean             49
not be same          34
                   ... 
not be different      1
include               1
smile                 1
go                    1
shine                 1
Length: 213, dtype: int64

In [36]:
print("Max number of SVO structures in sentence 1: ", gpt_pred.S1_SVO.apply(lambda x: len(x)).max())
print("Max number of SVO structures in sentence 2: ", gpt_pred.S2_SVO.apply(lambda x: len(x)).max())

Max number of SVO structures in sentence 1:  6
Max number of SVO structures in sentence 2:  4


In [37]:
for i in root_term_dict.keys():
    doc = nlp(i)
    print([doc[j] for j in range(len(doc))])

[be]
[smile]
[imply]
[have]
[be, different]
[walk]
[imply, different]
[infer]
[involve]
[not, know]
[have, same]
[not, run]
[provide]
[challenge]
[look]
[not, specify]
[cast]
[indicate]
[wear]
[not, be, same]
[lie]
[write]
[not, infer]
[not, fight]
[describe, different]
[not, be]
[not, mean]
[contradict]
[watch]
[not, imply]
[be, same]
[not, mention]
[not, do, different, same]
[not, sit]
[not, play]
[not, describe]
[not, assume]
[not, match]
[not, play, same]
[pass]
[mention]
[talk]
[not, need]
[touch]
[describe]
[fix]
[singe]
[dump]
[decorate]
[specify]
[sit]
[not, refer]
[married]
[not, cut]
[not, provide]
[race]
[donate]
[mean, same]
[eat]
[not, eat, same]
[hold]
[not, walk, same]
[kneeling]
[not, relate]
[show]
[lay]
[not, drink]
[not, dance]
[breakdance]
[describe, same]
[say]
[occur]
[block]
[depict]
[not, ride, same]
[associate]
[say, same]
[not, gather, same]
[point]
[not, state]
[take]
[chisel]
[rephrase]
[perform]
[not, do, same]
[not, pull]
[practice]
[cut]
[not, fight, same

In [44]:
root_term_dict

{'be': [0,
  3,
  4,
  5,
  20,
  23,
  44,
  48,
  50,
  53,
  55,
  56,
  57,
  62,
  68,
  72,
  82,
  86,
  93,
  97,
  98,
  123,
  128,
  140,
  147,
  156,
  164,
  175,
  177,
  180,
  181,
  184,
  185,
  189,
  198,
  203,
  209,
  211,
  212,
  231,
  232,
  237,
  239,
  243,
  244,
  247,
  252,
  256,
  259,
  269,
  271,
  281,
  296,
  306,
  314,
  328,
  331,
  336,
  337,
  349,
  356,
  357,
  367,
  368,
  369,
  370,
  372,
  387,
  389,
  391,
  402,
  406,
  407,
  416,
  424,
  430,
  438,
  440,
  442,
  447,
  453,
  466,
  479,
  480,
  483,
  486,
  488,
  493,
  495,
  499,
  515,
  521,
  522,
  523,
  524,
  528,
  529,
  538,
  551,
  555,
  560,
  564,
  566,
  567,
  568,
  584,
  585,
  586,
  593,
  604,
  608,
  617,
  618,
  620,
  625,
  633,
  640,
  651,
  661,
  663,
  666,
  671,
  674,
  681,
  685,
  689,
  690,
  691,
  697,
  706,
  710,
  713,
  716,
  721,
  734,
  735,
  740,
  750,
  753,
  761,
  764,
  766,
  777,
  783,
  785,
  78

In [38]:
gpt_pred["reduced_expl"] = None
for root_term in root_term_dict.keys():
    for i in root_term_dict[root_term]:
        try:
            temp = list()
            doc = nlp(gpt_pred.loc[i].pred_explanation)
            for token in doc:
                new_word = None
                add_S1S2 = False
                root_doc = nlp(root_term)
                if token.lemma_ in [root_doc[j].lemma_ for j in range(len(root_doc))]:
                    temp += [token.lemma_]
                for SVO_structure in gpt_pred.loc[i].S1_SVO:
                    if token.lemma_ == SVO_structure[0].lemma_:
                        new_word = "S1_Subj"
                    if not pd.isna(SVO_structure[1]):
                        if token.lemma_ == SVO_structure[1].lemma_:
                            new_word = "S1_Verb"
                    if not pd.isna(SVO_structure[2]):
                        if token.lemma_ == SVO_structure[2].lemma_:
                            new_word = "S1_Obj"
                for SVO_structure in gpt_pred.loc[i].S2_SVO:
                    if token.lemma_ == SVO_structure[0].lemma_:
                        if new_word:
                            temp += ["S1S2_Subj"]
                            add_S1S2 = True
                        else:
                            temp += ["S2_Subj"]
                    if not pd.isna(SVO_structure[1]):
                        if token.lemma_ == SVO_structure[1].lemma_:
                            if new_word:
                                temp += ["S1S2_Verb"]
                                add_S1S2 = True
                            else:
                                temp += ["S2_Verb"]
                    if not pd.isna(SVO_structure[2]):
                        if token.lemma_ == SVO_structure[2].lemma_:
                            if new_word:
                                temp += ["S1S2_Obj"]
                                add_S1S2 = True
                            else:
                                temp += ["S2_Obj"]
                if not pd.isna(new_word) and not add_S1S2:
                    temp += [new_word]
            gpt_pred['reduced_expl'].loc[i] = temp
        except:
            None

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  gpt_pred['reduced_expl'].loc[i] = temp


In [102]:
gpt_pred["pos_expl"] = None
for i, row in gpt_pred.iterrows():
    temp = list()
    doc_expl = nlp(row.pred_explanation)
    doc_S1 = nlp(row.Sentence1)
    doc_S2 = nlp(row.Sentence2)
    for token in doc_expl:
        if (token.lemma_ == "not" or token.lemma_ == "be" or not token.is_stop) and token.pos_ != "PUNCT":
            add_S1S2 = False
            new_word = None
            for S1_token in doc_S1:
                if S1_token.lemma_ == token.lemma_:
                    if token.lemma_ in [S2_token.lemma_ for S2_token in doc_S2]:
                        new_word = "S1S2_" + S1_token.pos_
                        add_S1S2 = True
                    else:
                        new_word = "S1_" + S1_token.pos_
                    # Avoid multiple POS get added
                    break
            if not add_S1S2:
                for S2_token in doc_S2:
                    if S2_token.lemma_ == token.lemma_:
                        new_word = "S2_" + S2_token.pos_
            if token.lemma_ == "be":
                new_word = "be"
            if pd.isna(new_word):
                new_word = token.lemma_
            temp += [new_word]
    gpt_pred.pos_expl.loc[i] = temp

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  gpt_pred.pos_expl.loc[i] = temp


In [103]:
gpt_pred.pred_explanation[1]

'The woman is smiling and holding a wood, not a baby.'

In [105]:
gpt_pred.pos_expl[1]

['S1S2_NOUN', 'be', 'S1_NOUN', 'S1S2_VERB', 'S1_NOUN', 'not', 'S2_NOUN']

In [106]:
gpt_pred[["S1_SVO", "S2_SVO", "Sentence1", "Sentence2", "pred_explanation", "reduced_expl", "pos_expl"]]

Unnamed: 0,S1_SVO,S2_SVO,Sentence1,Sentence2,pred_explanation,reduced_expl,pos_expl
0,"[(man, walks, bicycle)]","[(man, walks, bike)]",A young man in an orange hat walks his bicycle...,A man in an orange hat walks his bike down a s...,A young man in an orange hat is a rephrasing o...,"[S1S2_Subj, be]","[S1_ADJ, S1S2_NOUN, S1S2_ADJ, S1S2_NOUN, be, r..."
1,"[(woman, holding, wood), (she, holding, wood)]","[(woman, stands, None)]",A woman standing with smile on her face and sh...,A woman stands holding her baby and crying.,"The woman is smiling and holding a wood, not a...","[S1S2_Subj, smile, S1_Verb, S1_Obj]","[S1S2_NOUN, be, S1_NOUN, S1S2_VERB, S1_NOUN, n..."
2,"[(dog, jumps, None)]","[(dog, jumping, None)]",A dog jumps to catch a red ball outside.,the dog is jumping.,The statement implies that the dog is jumping ...,"[imply, S1S2_Subj, S1S2_Verb]","[statement, imply, S1S2_NOUN, be, S1S2_VERB, S..."
3,"[(man, wielding, razor), (man, wielding, None)]",[],A man wielding an electric razor is gleefully ...,There is only one person present.,"There are two people present, the man wielding...","[be, S1_Subj, S1_Verb, S1_Obj, S1_Subj, be, be]","[be, people, S2_ADJ, S1_NOUN, S1_VERB, S1_ADJ,..."
4,"[(group, stares, None), (that, filled, None)]","[(group, analyze, art)]",A group of people stares at a wall that is fil...,A group of people analyze art together,Staring at drawings is a form of analyzing art.,"[S1_Verb, be, S2_Verb, S2_Obj]","[S1_VERB, S1_NOUN, be, form, S2_VERB, S2_NOUN]"
...,...,...,...,...,...,...,...
995,"[(men, standing, None)]","[(Women, playing, trumpets)]",Three men are standing on stage performing.,Women are playing trumpets on the street.,The statement describes three men on stage per...,"[describe, S1_Subj, describe, S2_Subj, S2_Verb...","[statement, describe, S1_NOUN, S1_NOUN, S1_VER..."
996,"[(man, standing, None)]","[(people, stand, None)]",3 young man in hoods standing in the middle of...,Three hood wearing people stand in a street.,The answer implies that the people in the stre...,"[imply, S2_Subj, S1_Subj]","[answer, imply, S2_NOUN, S1S2_NOUN, be, S1_ADJ..."
997,"[(woman, torn, stockings)]","[(man, going, None)]","A woman in skirt, torn stockings and combat bo...",The man and woman are going to a movie in the ...,The clothing does not necessarily indicate tha...,"[not, indicate, S2_Verb]","[clothing, not, necessarily, indicate, be, S2_..."
998,"[(men, shining, shoes)]",[],Two men shining peoples shoes.,Two men shining shoes.,Shining shoes is a specific type of activity t...,"[shine, S1_Verb, S1_Obj, S1_Subj, shine, S1_Ve...","[S1S2_VERB, S1S2_NOUN, be, specific, type, act..."


In [107]:
gpt_pred.pos_expl.value_counts

<bound method IndexOpsMixin.value_counts of 0      [S1_ADJ, S1S2_NOUN, S1S2_ADJ, S1S2_NOUN, be, r...
1      [S1S2_NOUN, be, S1_NOUN, S1S2_VERB, S1_NOUN, n...
2      [statement, imply, S1S2_NOUN, be, S1S2_VERB, S...
3      [be, people, S2_ADJ, S1_NOUN, S1_VERB, S1_ADJ,...
4         [S1_VERB, S1_NOUN, be, form, S2_VERB, S2_NOUN]
                             ...                        
995    [statement, describe, S1_NOUN, S1_NOUN, S1_VER...
996    [answer, imply, S2_NOUN, S1S2_NOUN, be, S1_ADJ...
997    [clothing, not, necessarily, indicate, be, S2_...
998    [S1S2_VERB, S1S2_NOUN, be, specific, type, act...
999    [S1_NOUN, S1_VERB, S1_NOUN, imply, be, evening...
Name: pos_expl, Length: 1000, dtype: object>

In [108]:
gpt_pred.to_csv('../output_data/svo_structure_4.csv', sep=';')