# Comparing the Predictions of Graph and Text Entailment Model

In [15]:
import ast
import pandas as pd

In [37]:
data = {
    "premise": [],
    "hypothesis": [],
    "label": [],
    "matchlstm": [],
    "graph": [],
    "matchlstm + graph": [],
    "matchlstm label prob": [],
    "graph label prob": [],
    "matchlstm + graph label prob": [],
}

with open("./matchlstm_graph_predict.jsonl") as joint:
    with open("./graph_twohopentities.jsonl") as graph:
        with open("./matchlstm_predict.jsonl") as text:
            for jline, gline, tline in zip(joint, graph, text):
                jline, gline, tline = jline.strip(), gline.strip(), tline.strip()
                if not jline:
                    continue
                if jline[:8] == "input:  ":
                    instance = ast.literal_eval(gline[8:])
                    data["premise"].append(instance.get("premise"))
                    data["hypothesis"].append(instance.get("hypothesis"))
                    data["label"].append(instance.get("label"))
                else:
                    jpred = ast.literal_eval(jline[13:])
                    gpred = ast.literal_eval(gline[13:])
                    tpred = ast.literal_eval(tline[13:])
                    data["matchlstm"].append(tpred.get("label"))
                    data["graph"].append(gpred.get("label"))
                    data["matchlstm + graph"].append(jpred.get("label"))
                    data["matchlstm label prob"].append(tpred.get("label_probs"))
                    data["matchlstm + graph label prob"].append(jpred.get("label_probs"))
                    data["graph label prob"].append(gpred.get("label_probs"))

In [38]:
parsed = pd.DataFrame(data)

parsed

Unnamed: 0,premise,hypothesis,label,matchlstm,graph,matchlstm + graph,matchlstm label prob,graph label prob,matchlstm + graph label prob
0,"An introduction to atoms and elements, compoun...",Replace another in a molecule happens to atoms...,neutral,neutral,neutral,neutral,"[0.9996228814125061, 0.0003771277843043208]","[0.7815166711807251, 0.2184833586215973]","[0.7815166711807251, 0.2184833586215973]"
1,Wavelength The distance between two consecutiv...,Wavelength is the distance between two corresp...,entails,entails,entails,entails,"[0.0008741169585846364, 0.9991258978843689]","[0.3360331952571869, 0.6639667749404907]","[0.3360331952571869, 0.6639667749404907]"
2,humans normally have 23 pairs of chromosomes.,Humans typically have 23 pairs pairs of chromo...,entails,entails,entails,entails,"[1.3253340092056476e-10, 1.0]","[0.000630471738986671, 0.9993695020675659]","[0.000630471738986671, 0.9993695020675659]"
3,"Photosynthesis, fermentation, glycolysis, aero...",Glycolysis is a series of reactions that is co...,neutral,neutral,neutral,entails,"[0.9881460070610046, 0.011853997595608234]","[0.014235193841159344, 0.9857648015022278]","[0.014235193841159344, 0.9857648015022278]"
4,"The pungent smell of the yellow, toffee colore...",If a substance does not release molecules into...,neutral,neutral,neutral,neutral,"[0.9980006217956543, 0.001999374944716692]","[0.6388182640075684, 0.36118176579475403]","[0.6388182640075684, 0.36118176579475403]"
5,kinetic energy the energy of motion.,Kinetic energy is the energy of motion.,entails,entails,entails,neutral,"[1.885475064966613e-08, 1.0]","[0.9609870910644531, 0.039012935012578964]","[0.9609870910644531, 0.039012935012578964]"
6,A solution is a homogenous mixture of two or m...,Solution is the term for a homogeneous mixture...,entails,entails,entails,entails,"[8.338120238704505e-08, 0.9999999403953552]","[0.011291959322988987, 0.9887080192565918]","[0.011291959322988987, 0.9887080192565918]"
7,Solutions are special mixtures formed when one...,A solution forms when one substance dissolves ...,entails,entails,entails,entails,"[0.0002262640919070691, 0.9997737407684326]","[0.10376197844743729, 0.8962380290031433]","[0.10376197844743729, 0.8962380290031433]"
8,Upwelling The physical process in near-shore o...,Upwelling is the term for when deep ocean wate...,entails,entails,entails,neutral,"[0.00026894372422248125, 0.9997310638427734]","[0.9731948971748352, 0.026805095374584198]","[0.9731948971748352, 0.026805095374584198]"
9,To evaluate the proportion of felsic minerals.,"Felsic igneous rocks contain felsic minerals, ...",neutral,neutral,neutral,neutral,"[0.999969482421875, 3.04897221212741e-05]","[0.984960675239563, 0.015039301477372646]","[0.984960675239563, 0.015039301477372646]"


In [41]:
p = parsed.values

label = p[:, 2]
matchlstm = p[:, 3]
graph = p[:, 4]
joint = p[:, 5]

samples = len(label)
print("Total samples in dev set: {}".format(samples))
count = (matchlstm != label).sum()
print("Matchlstm: {} incorrect predictions, accuracy: {}".format(count, 1 - count / samples))
count = (graph != label).sum()
print("Graph: {} incorrect predictions, accuracy: {}".format(count, 1 - count / samples))
count = (joint != label).sum()
print("Matchlstm + Graph: {} incorrect predictions, accuracy: {}".format(count, 1 - count / samples))
overlap = ((graph != label) & (matchlstm != label)).sum()
print("Overlapping incorrect predictions: {}".format(overlap))

Total samples in dev set: 1304
Matchlstm: 149 incorrect predictions, accuracy: 0.8857361963190185
Graph: 149 incorrect predictions, accuracy: 0.8857361963190185
Matchlstm + Graph: 342 incorrect predictions, accuracy: 0.7377300613496933
Overlapping incorrect predictions: 149


In [40]:
# see what kind of question graph is able to capture but not text
graph_only = (graph == label) & (matchlstm != label)
print("{} questions are able to be answered by graph but not text".format(graph_only.sum()))

parsed[graph_only]

0 questions are able to be answered by graph but not text


Unnamed: 0,premise,hypothesis,label,matchlstm,graph,matchlstm + graph,matchlstm label prob,graph label prob,matchlstm + graph label prob


In [29]:
# see what kind of question text is able to capture but not graph
text_only = (graph != label) & (matchlstm == label)
print("{} questions are able to be answered by text but not graph".format(text_only.sum()))

parsed[text_only]

76 questions are able to be answered by text but not graph


Unnamed: 0,premise,hypothesis,label,matchlstm,matchlstm + graph,matchlstm_label_prob,graph_label_prob
10,This energy comes ultimately from the sun via ...,The energy stored in the organic molecules of ...,neutral,neutral,entails,"[0.997641384601593, 0.0023586032912135124]","[0.23900815844535828, 0.7609918713569641]"
57,"As an adult, mature tree, the pine produces tw...",The end of a pine tree branch bears the male c...,entails,entails,neutral,"[0.07080996036529541, 0.9291900396347046]","[0.6503872871398926, 0.3496127128601074]"
73,Both the dominant and recessive mutations of m...,A mitotic spindle forms from the centrosomes.,neutral,neutral,entails,"[0.6341613531112671, 0.3658386468887329]","[0.33709824085235596, 0.662901759147644]"
82,The coastline can be divided into seven genera...,Mollusks can be divided into seven classes.,neutral,neutral,entails,"[0.9022113680839539, 0.09778861701488495]","[0.22430792450904846, 0.7756921052932739]"
83,"Although, these birds can be quite noisy as ma...",Birds pair up with the same bird in mating sea...,neutral,neutral,entails,"[0.9490692019462585, 0.05093081668019295]","[0.4962325692176819, 0.5037674307823181]"
130,"in helium, two protons and two neutrons, and i...",A carbon atom with 6 protons and 8 neutrons is...,neutral,neutral,entails,"[0.5385523438453674, 0.4614476263523102]","[0.36662206053733826, 0.6333779692649841]"
135,"When brakes are applied, each of the motors is...",Electric motors transform electrical energy in...,neutral,neutral,entails,"[0.9644596576690674, 0.03554033488035202]","[0.07721996307373047, 0.9227800369262695]"
171,Penis The penis is composed of three cylindric...,"The human penis contains the urethra, as well ...",entails,entails,neutral,"[0.3799351155757904, 0.6200648546218872]","[0.5071551203727722, 0.49284490942955017]"
172,Amines are compounds that are derivatives of t...,An amine is an organic compound that can be co...,entails,entails,neutral,"[0.35403677821159363, 0.6459632515907288]","[0.8712287545204163, 0.12877123057842255]"
174,Although his work was originally calibrated on...,Seismologists originally measured the intensit...,neutral,neutral,entails,"[0.9964210391044617, 0.003578950185328722]","[0.2042144536972046, 0.7957855463027954]"


In [30]:
# see what kind of question both of them are able to answer correctly (intersection)
both = (graph == label) & (matchlstm == label)
print("{} questions are able to be answered by both text and graph".format(both.sum()))

parsed[both]

1079 questions are able to be answered by both text and graph


Unnamed: 0,premise,hypothesis,label,matchlstm,matchlstm + graph,matchlstm_label_prob,graph_label_prob
0,"An introduction to atoms and elements, compoun...",Replace another in a molecule happens to atoms...,neutral,neutral,neutral,"[0.9996228814125061, 0.0003771277843043208]","[0.9825951457023621, 0.017404858022928238]"
1,Wavelength The distance between two consecutiv...,Wavelength is the distance between two corresp...,entails,entails,entails,"[0.0008741169585846364, 0.9991258978843689]","[0.004393246024847031, 0.9956067800521851]"
2,humans normally have 23 pairs of chromosomes.,Humans typically have 23 pairs pairs of chromo...,entails,entails,entails,"[1.3253340092056476e-10, 1.0]","[2.0228878572225995e-09, 1.0]"
3,"Photosynthesis, fermentation, glycolysis, aero...",Glycolysis is a series of reactions that is co...,neutral,neutral,neutral,"[0.9881460070610046, 0.011853997595608234]","[0.9963446259498596, 0.0036553910467773676]"
4,"The pungent smell of the yellow, toffee colore...",If a substance does not release molecules into...,neutral,neutral,neutral,"[0.9980006217956543, 0.001999374944716692]","[0.9987031817436218, 0.001296809408813715]"
5,kinetic energy the energy of motion.,Kinetic energy is the energy of motion.,entails,entails,entails,"[1.885475064966613e-08, 1.0]","[0.00035180666600354016, 0.9996482133865356]"
6,A solution is a homogenous mixture of two or m...,Solution is the term for a homogeneous mixture...,entails,entails,entails,"[8.338120238704505e-08, 0.9999999403953552]","[3.6951740867152694e-07, 0.9999996423721313]"
7,Solutions are special mixtures formed when one...,A solution forms when one substance dissolves ...,entails,entails,entails,"[0.0002262640919070691, 0.9997737407684326]","[0.001110701123252511, 0.9988893270492554]"
8,Upwelling The physical process in near-shore o...,Upwelling is the term for when deep ocean wate...,entails,entails,entails,"[0.00026894372422248125, 0.9997310638427734]","[0.02423986792564392, 0.9757601618766785]"
9,To evaluate the proportion of felsic minerals.,"Felsic igneous rocks contain felsic minerals, ...",neutral,neutral,neutral,"[0.999969482421875, 3.04897221212741e-05]","[0.9999986290931702, 1.3785981991532026e-06]"


In [31]:
# see what kind of question either of them are able to answer correctly (union)
either = (graph == label) | (matchlstm == label)
print("{} questions are able to be answered by either text and graph".format(either.sum()))

parsed[either]

1206 questions are able to be answered by either text and graph


Unnamed: 0,premise,hypothesis,label,matchlstm,matchlstm + graph,matchlstm_label_prob,graph_label_prob
0,"An introduction to atoms and elements, compoun...",Replace another in a molecule happens to atoms...,neutral,neutral,neutral,"[0.9996228814125061, 0.0003771277843043208]","[0.9825951457023621, 0.017404858022928238]"
1,Wavelength The distance between two consecutiv...,Wavelength is the distance between two corresp...,entails,entails,entails,"[0.0008741169585846364, 0.9991258978843689]","[0.004393246024847031, 0.9956067800521851]"
2,humans normally have 23 pairs of chromosomes.,Humans typically have 23 pairs pairs of chromo...,entails,entails,entails,"[1.3253340092056476e-10, 1.0]","[2.0228878572225995e-09, 1.0]"
3,"Photosynthesis, fermentation, glycolysis, aero...",Glycolysis is a series of reactions that is co...,neutral,neutral,neutral,"[0.9881460070610046, 0.011853997595608234]","[0.9963446259498596, 0.0036553910467773676]"
4,"The pungent smell of the yellow, toffee colore...",If a substance does not release molecules into...,neutral,neutral,neutral,"[0.9980006217956543, 0.001999374944716692]","[0.9987031817436218, 0.001296809408813715]"
5,kinetic energy the energy of motion.,Kinetic energy is the energy of motion.,entails,entails,entails,"[1.885475064966613e-08, 1.0]","[0.00035180666600354016, 0.9996482133865356]"
6,A solution is a homogenous mixture of two or m...,Solution is the term for a homogeneous mixture...,entails,entails,entails,"[8.338120238704505e-08, 0.9999999403953552]","[3.6951740867152694e-07, 0.9999996423721313]"
7,Solutions are special mixtures formed when one...,A solution forms when one substance dissolves ...,entails,entails,entails,"[0.0002262640919070691, 0.9997737407684326]","[0.001110701123252511, 0.9988893270492554]"
8,Upwelling The physical process in near-shore o...,Upwelling is the term for when deep ocean wate...,entails,entails,entails,"[0.00026894372422248125, 0.9997310638427734]","[0.02423986792564392, 0.9757601618766785]"
9,To evaluate the proportion of felsic minerals.,"Felsic igneous rocks contain felsic minerals, ...",neutral,neutral,neutral,"[0.999969482421875, 3.04897221212741e-05]","[0.9999986290931702, 1.3785981991532026e-06]"


In [24]:
# store all the csvs
# parsed.to_csv("./analysis.csv")
parsed[graph_only].to_csv("./graph+text_but_not_text.csv")
# parsed[text_only].to_csv("./text_but_not_graph.csv")
# parsed[both].to_csv("./both_correct_(intersection).csv")
# parsed[either].to_csv("./either_correct_(union).csv")

In [121]:
# try to combine both models
import numpy as np

matchlstm_probs = np.array(list(p[:, 5]))
graph_probs = np.array(list(p[:, 6]))

avg_probs = (matchlstm_probs + graph_probs) / 2
avg_probs

array([[8.90569776e-01, 1.09430243e-01],
       [1.68453656e-01, 8.31546336e-01],
       [3.15235936e-04, 9.99684751e-01],
       ...,
       [2.39314017e-01, 7.60685951e-01],
       [1.16817695e-02, 9.88318235e-01],
       [6.14544848e-03, 9.93854553e-01]])

In [122]:
all_labels = np.array(["neutral", "entails"])

combined_predictions = all_labels[np.argmax(avg_probs, axis=1)]

correct = label == combined_predictions
print("Correctly predicted {} samples. Accuracy: {}".format(correct.sum(), correct.sum() / samples))

Correctly predicted 1125 samples. Accuracy: 0.8627300613496932


In [123]:
# questions corrected by graph
corrected_by_graph = (label != matchlstm) & (label == combined_predictions)

print("{} questions are corrected by graph".format(corrected_by_graph.sum()))

parsed[corrected_by_graph]

43 questions are corrected by graph


Unnamed: 0,premise,hypothesis,label,matchlstm,graph,matchlstm_label_prob,graph_label_prob
60,Photosynthetic organisms obtain their ener...,The energy stored in the organic molecules of ...,entails,neutral,entails,"[0.8672293424606323, 0.13277064263820648]","[0.026144130155444145, 0.9738558530807495]"
66,"During the breeding season, mourning doves are...",Birds pair up with the same bird in mating sea...,entails,neutral,entails,"[0.834547758102417, 0.1654522716999054]","[0.09661369025707245, 0.9033862948417664]"
138,They are completely dependant upon their host'...,Nucleic acids are found in all living cells an...,neutral,entails,neutral,"[0.0887107104063034, 0.9112892746925354]","[0.9996433258056641, 0.00035667995689436793]"
167,These three fatty acids are important in many ...,Three steps are involved in blood clotting.,neutral,entails,neutral,"[0.46478915214538574, 0.5352108478546143]","[0.9918527007102966, 0.008147316053509712]"
222,Ionic (compound) lattices exhibit Schottky def...,"When naming an ionic compound, the cation is w...",neutral,entails,neutral,"[0.38077402114868164, 0.6192259788513184]","[0.9027475714683533, 0.09725245833396912]"
256,A normal human somatic cell contains 46 chromo...,"Other than gametes, normal human cells have a ...",entails,neutral,entails,"[0.8169394135475159, 0.18306058645248413]","[0.028009910136461258, 0.9719901084899902]"
266,The level of interest in the calorie content o...,The energy content of foods is often expressed...,neutral,entails,neutral,"[0.1672455221414566, 0.8327544927597046]","[0.9999873638153076, 1.2622489521163516e-05]"
315,The reaction produces essentially quantitative...,The amount of product that may be produced by ...,neutral,entails,neutral,"[0.22596845030784607, 0.7740315794944763]","[0.8332105875015259, 0.16678939759731293]"
398,Roney shares her intimate journey with diabete...,Lifestyle diseases are diseases that are cause...,neutral,entails,neutral,"[0.4407925605773926, 0.5592073798179626]","[0.9845760464668274, 0.015423928387463093]"
435,Structure and function of ecosystems is also s...,Pyramid ecosystem is used to show energy flow ...,neutral,entails,neutral,"[0.273535817861557, 0.7264641523361206]","[0.9973110556602478, 0.0026889662258327007]"


In [124]:
# questions make wrong by graph
make_wrong_by_graph = (label == matchlstm) & (label != combined_predictions)

print("{} questions are made wrong by graph".format(make_wrong_by_graph.sum()))

parsed[make_wrong_by_graph]

73 questions are made wrong by graph


Unnamed: 0,premise,hypothesis,label,matchlstm,graph,matchlstm_label_prob,graph_label_prob
29,Calories The energy in food is called calories.,The energy content of foods is often expressed...,entails,entails,neutral,"[0.24008339643478394, 0.7599166035652161]","[0.7736102938652039, 0.22638970613479614]"
47,Intestinal microflora play a crucial role for...,We call the microflora that aid in the digesti...,entails,entails,neutral,"[0.04904358461499214, 0.950956404209137]","[0.9930771589279175, 0.006922862026840448]"
49,5.3.1.5 Inhibition of DNA synthesis in Chinese...,"During the synthesis phase, dna replication oc...",neutral,neutral,entails,"[0.749180018901825, 0.25081995129585266]","[0.10480958223342896, 0.895190417766571]"
57,"As an adult, mature tree, the pine produces tw...",The end of a pine tree branch bears the male c...,entails,entails,neutral,"[0.07080996036529541, 0.9291900396347046]","[0.9992147088050842, 0.0007852759445086122]"
71,"Aside from caffeine, the most commonly consume...",Caffeine and alcohol are two examples of a psy...,entails,entails,neutral,"[0.1701413244009018, 0.829858660697937]","[0.9606882929801941, 0.039311733096838]"
102,Quantum mechanics is the branch of mechanics t...,The specialized study of the motion of objects...,entails,entails,neutral,"[0.026874935254454613, 0.9731250405311584]","[0.9972912669181824, 0.0027087091002613306]"
130,"in helium, two protons and two neutrons, and i...",A carbon atom with 6 protons and 8 neutrons is...,neutral,neutral,entails,"[0.5385523438453674, 0.4614476263523102]","[0.0008400696679018438, 0.9991599321365356]"
158,Electrical energy can be converted into kineti...,Electric motors transform electrical energy in...,entails,entails,neutral,"[0.15158970654010773, 0.8484103083610535]","[0.9991410970687866, 0.0008589325589127839]"
159,The rules af placing electrons within shells i...,Aufbau principle gives the order of electron f...,entails,entails,neutral,"[0.08540257066488266, 0.9145974516868591]","[0.9738458395004272, 0.02615416795015335]"
171,Penis The penis is composed of three cylindric...,"The human penis contains the urethra, as well ...",entails,entails,neutral,"[0.3799351155757904, 0.6200648546218872]","[0.7941827178001404, 0.20581728219985962]"
