In [0]:
!pip install apache-beam --upgrade
!pip install apache-beam[interactive]

In [0]:
import apache_beam as beam
from apache_beam.runners.interactive import interactive_runner
import apache_beam.runners.interactive.interactive_beam as ib
from collections import defaultdict, ChainMap
import json
from sklearn.metrics import f1_score, precision_score, recall_score, confusion_matrix
import numpy as np

In [0]:
%cd drive/My Drive/in5550-exam

/content/drive/My Drive/in5550-exam


In [0]:
def calculate_metrics(x):
  m = confusion_matrix(x.get("negation"), x.get("pred"), 
                             labels=["T", "F", "C", "A"])
  
  x["scopelen"] = len([s for s in x["scope"] if len(s) > 0])
  x["cuelen"] = len([s for s in x["cue"] if len(s) > 0])

  x["length"] = len(x["form"])
  n_corr = float(np.trace(m))
  x["acc"] = n_corr/x["length"]
  x["precision"] = float(precision_score(x.get("negation"), x.get("pred"), 
                                         labels=["T", "F", "C", "A"],
                                         average="macro", 
                                         zero_division=1))
  
  x["recall"] = float(recall_score(x.get("negation"), x.get("pred"), 
                                   labels=["T", "F", "C", "A"],
                                   average="macro",
                                   zero_division=1))
  
  x["mac_f1"] = float(f1_score(x.get("negation"), x.get("pred"), 
                               labels=["T", "F", "C", "A"],
                               average="macro", 
                               zero_division=1))

  return x


In [0]:
p = beam.Pipeline(interactive_runner.InteractiveRunner())

In [0]:
gold = (
    p
    | "Read Gold" >> beam.io.ReadFromText("DataFiles/cde.epe")
    | "Convert to Dict" >> beam.Map(json.loads)
    | "Extract Key" >> beam.Map(
        lambda x: (x["id"]+x["source"]+"X".join(x["cue"]), x)
        )
    )

# BiLSTM-C Baseline

In [0]:
preds_m1 = (
    p
    | "Read Preds" >> beam.io.ReadFromText(
        "Saves/BiLSTM-C/Baseline/eval_pred_corr_tidy.epe"
        )
    | "JSON loads" >> beam.Map(json.loads)
    | "Create Key" >> beam.Map(
        lambda x: (x["id"]+x["source"]+"X".join(x["cue"]), x)
        )
    | "Get Preds Only" >> beam.Map(
        lambda x: (x[0], {"pred": x[1]["negation"]})
        )
    )

res1 = (
    {"gold": gold, "preds": preds_m1} 
    | "Join" >> beam.CoGroupByKey()
    | "Clean" >> beam.Map(
        lambda x: dict(x[1]["gold"][0], **x[1]["preds"][0])
        )
    | "Calculate Metrics" >> beam.Map(calculate_metrics)
    )


# BiLSTM-C Baseline - Larger hidden dimension

In [0]:
preds_m2 = (
    p
    | "Read Preds" >> beam.io.ReadFromText(
        "Saves/BiLSTM-C/Baseline_incr_hidd/eval_pred_corr_tidy.epe"
        )
    | "JSON loads" >> beam.Map(json.loads)
    | "Create Key" >> beam.Map(
        lambda x: (x["id"]+x["source"]+"X".join(x["cue"]), x)
        )
    | "Get Preds Only" >> beam.Map(
        lambda x: (x[0], {"pred": x[1]["negation"]})
        )
    )

res2 = (
    {"gold": gold, "preds": preds_m2} 
    | "Join" >> beam.CoGroupByKey()
    | "Clean" >> beam.Map(
        lambda x: dict(x[1]["gold"][0], **x[1]["preds"][0])
        )
    | "Calculate Metrics" >> beam.Map(calculate_metrics)
    )


# BiLSTM-CE - BERT embeddings

In [0]:
preds_m3 = (
    p
    | "Read Preds" >> beam.io.ReadFromText(
        "Saves/BiLSTM-CE/BERT_base/eval_pred_corr_tidy.epe"
        )
    | "JSON loads" >> beam.Map(json.loads)
    | "Create Key" >> beam.Map(
        lambda x: (x["id"]+x["source"]+"X".join(x["cue"]), x)
        )
    | "Get Preds Only" >> beam.Map(
        lambda x: (x[0], {"pred": x[1]["negation"]})
        )
    )

res3 = (
    {"gold": gold, "preds": preds_m3} 
    | "Join" >> beam.CoGroupByKey()
    | "Clean" >> beam.Map(
        lambda x: dict(x[1]["gold"][0], **x[1]["preds"][0])
        )
    | "Calculate Metrics" >> beam.Map(calculate_metrics)
    )


In [0]:
result = p.run()
res_out1 = result.get(res1)
res_out2 = result.get(res2)
res_out3 = result.get(res3)

In [0]:
pv1 = ib.ir.runners.interactive.display.pcoll_visualization.PCollectionVisualization(res1)
pdf1 = pv1._to_dataframe()
pv2 = ib.ir.runners.interactive.display.pcoll_visualization.PCollectionVisualization(res2)
pdf2 = pv2._to_dataframe()
pv3 = ib.ir.runners.interactive.display.pcoll_visualization.PCollectionVisualization(res3)
pdf3 = pv3._to_dataframe()

  return json_normalize(normalized_list).applymap(


# Model Predictions vs Gold Labels


## BiLSTM-C Baseline

In [0]:
pv1._display_dive(pdf1)

In [0]:
pv1._display_dataframe(pdf1)

Unnamed: 0,id,source,negations,form,lemma,xpos,negation,cue,scope,pred,scopelen,cuelen,length,acc,precision,recall,mac_f1
0,0,cardboard,0,"['In', 'choosing', 'a', 'few', 'typical', 'cas...","['In', 'choose', 'a', 'few', 'typical', 'case'...","['IN', 'VBG', 'DT', 'JJ', 'JJ', 'NNS', 'WDT', ...","['T', 'T', 'T', 'T', 'T', 'T', 'T', 'T', 'T', ...","['', '', '', '', '', '', '', '', '', '', '', '...","['', '', '', '', '', '', '', '', '', '', '', '...","['T', 'T', 'T', 'T', 'T', 'T', 'T', 'T', 'T', ...",0,0,47,1.0,0.25,0.25,0.25
1,1,cardboard,2,"['It', 'is', ',', 'however', ',', 'unfortunate...","['It', 'be', ',', 'however', ',', 'unfortunate...","['PRP', 'VBZ', ',', 'RB', ',', 'RB', 'JJ', 'RB...","['F', 'F', 'T', 'T', 'T', 'T', 'A', 'F', 'F', ...","['', '', '', '', '', '', 'im', '', '', '', '',...","['It', 'is', '', '', '', '', 'possible', 'enti...","['T', 'T', 'T', 'T', 'T', 'F', 'A', 'F', 'F', ...",11,1,63,0.952381,0.712788,0.695192,0.703383
2,1,cardboard,2,"['It', 'is', ',', 'however', ',', 'unfortunate...","['It', 'be', ',', 'however', ',', 'unfortunate...","['PRP', 'VBZ', ',', 'RB', ',', 'RB', 'JJ', 'RB...","['T', 'T', 'T', 'T', 'T', 'T', 'T', 'T', 'T', ...","['', '', '', '', '', '', '', '', '', '', '', '...","['', '', '', '', '', '', '', '', '', '', '', '...","['T', 'T', 'T', 'T', 'T', 'T', 'T', 'T', 'T', ...",6,1,63,0.888889,0.562147,0.532738,0.540338
3,2,cardboard,0,"['With', 'this', 'short', 'preface', 'I', 'sha...","['With', 'this', 'short', 'preface', 'I', 'sha...","['IN', 'DT', 'JJ', 'NN', 'PRP', 'MD', 'VB', 'T...","['T', 'T', 'T', 'T', 'T', 'T', 'T', 'T', 'T', ...","['', '', '', '', '', '', '', '', '', '', '', '...","['', '', '', '', '', '', '', '', '', '', '', '...","['T', 'T', 'T', 'T', 'T', 'T', 'T', 'T', 'T', ...",0,0,27,1.0,0.25,0.25,0.25
4,3,cardboard,0,"['It', 'was', 'a', 'blazing', 'hot', 'day', 'i...","['It', 'be', 'a', 'blazing', 'hot', 'day', 'in...","['PRP', 'VBD', 'DT', 'VBG', 'JJ', 'NN', 'IN', ...","['T', 'T', 'T', 'T', 'T', 'T', 'T', 'T', 'T']","['', '', '', '', '', '', '', '', '']","['', '', '', '', '', '', '', '', '']","['T', 'T', 'T', 'T', 'T', 'T', 'T', 'T', 'T']",0,0,9,1.0,0.25,0.25,0.25
5,4,cardboard,0,"['Baker', 'Street', 'was', 'like', 'an', 'oven...","['Baker', 'Street', 'be', 'like', 'an', 'oven'...","['NNP', 'NNP', 'VBD', 'IN', 'DT', 'NN', ',', '...","['T', 'T', 'T', 'T', 'T', 'T', 'T', 'T', 'T', ...","['', '', '', '', '', '', '', '', '', '', '', '...","['', '', '', '', '', '', '', '', '', '', '', '...","['T', 'T', 'T', 'T', 'T', 'T', 'T', 'T', 'T', ...",0,0,29,1.0,0.25,0.25,0.25
6,5,cardboard,0,"['It', 'was', 'hard', 'to', 'believe', 'that',...","['It', 'be', 'hard', 'to', 'believe', 'that', ...","['PRP', 'VBD', 'JJ', 'TO', 'VB', 'IN', 'DT', '...","['T', 'T', 'T', 'T', 'T', 'T', 'T', 'T', 'T', ...","['', '', '', '', '', '', '', '', '', '', '', '...","['', '', '', '', '', '', '', '', '', '', '', '...","['T', 'T', 'T', 'T', 'T', 'T', 'T', 'T', 'T', ...",0,0,21,1.0,0.25,0.25,0.25
7,6,cardboard,0,"['Our', 'blinds', 'were', 'half-drawn', ',', '...","['Our', 'blind', 'be', 'half-drawn', ',', 'and...","['PRP$', 'NNS', 'VBD', 'JJ', ',', 'CC', 'NNP',...","['T', 'T', 'T', 'T', 'T', 'T', 'T', 'T', 'T', ...","['', '', '', '', '', '', '', '', '', '', '', '...","['', '', '', '', '', '', '', '', '', '', '', '...","['T', 'T', 'T', 'T', 'T', 'T', 'T', 'T', 'T', ...",0,0,27,1.0,0.25,0.25,0.25
8,7,cardboard,1,"['For', 'myself', ',', 'my', 'term', 'of', 'se...","['For', 'myself', ',', 'my', 'term', 'of', 'se...","['IN', 'PRP', ',', 'PRP$', 'NN', 'IN', 'NN', '...","['T', 'T', 'T', 'T', 'T', 'T', 'T', 'T', 'T', ...","['', '', '', '', '', '', '', '', '', '', '', '...","['', '', '', '', '', '', '', '', '', '', '', '...","['T', 'T', 'T', 'T', 'T', 'T', 'T', 'T', 'T', ...",6,1,28,0.892857,0.71875,0.625,0.65
9,8,cardboard,1,"['But', 'the', 'morning', 'paper', 'was', 'uni...","['But', 'the', 'morning', 'paper', 'be', 'unin...","['CC', 'DT', 'NN', 'NN', 'VBD', 'JJ', '.']","['T', 'F', 'F', 'F', 'F', 'A', 'T']","['', '', '', '', '', 'un', '']","['', 'the', 'morning', 'paper', 'was', 'intere...","['T', 'F', 'F', 'F', 'F', 'A', 'T']",5,1,7,1.0,0.75,0.75,0.75


## BiLSTM-C Baseline - Larger hidden dimension

In [0]:
pv2._display_dive(pdf2)

## BiLSTM-CE - BERT embeddings

In [0]:
pv = ib.ir.runners.interactive.display.pcoll_visualization.PCollectionVisualization(res)
pdf1 = pv._to_dataframe()
pv._display_dive(pdf1)

In [0]:
p = beam.Pipeline(interactive_runner.InteractiveRunner())

fpattern = "Saves/*/*/eval_pred_corr_tidy.epe"

preds = (
    p
    | "Read Preds" >> beam.io.ReadFromTextWithFilename(fpattern)
    | "Map JSON" >> beam.Map(
        lambda x: dict({"model": "-".join(x[0].split("/")[1:3])}, **json.loads(x[1]))
        )
    | "Create Key" >> beam.Map(
        lambda x: (x["id"]+x["source"]+"X".join(x["cue"]), x)
        )
    | "Get Preds Only" >> beam.Map(
        lambda x: (x[0], {x[1]["model"]: x[1]["negation"]})
        )
    | "Join Preds" >> beam.GroupByKey()
    | "Merge Preds" >> beam.Map(
        lambda x: (x[0], dict(ChainMap(*x[1])))
        )
)

gold = (
    p 
    | "Read Gold" >> beam.io.ReadFromText("DataFiles/cde.epe")
    | "Convert to Dict" >> beam.Map(json.loads)
    | "Extract Key" >> beam.Map(
        lambda x: (x["id"]+x["source"]+"X".join(x["cue"]), x)
        )
)

res_full = (
    {"gold": gold, "preds": preds} 
    | "Join" >> beam.CoGroupByKey()
    | "Clean" >> beam.Map(
        lambda x: dict(x[1]["gold"][0], **x[1]["preds"][0])
        )
)

In [0]:
ib.show(res)

In [0]:
result = p.run()

In [0]:
with open("negation/cde.epe") as f:
  S = [json.loads(line) for line in f]

In [0]:
def filter(S, id, source):
  for i in S:
    if i["id"] == id:
      if i["source"] == source:
        for n in i.get("nodes"):
          print(n.get("form"))
          for nn in n.get("negation"):
            print("\t", nn, end="\t")
          print("")

In [0]:
filter(S, "50", "circle01")

He
	 {'id': 148}	
has
	 {'id': 148}	
been
	 {'id': 148}	
there
	 {'id': 148}	
for
	 {'id': 148}	
ten
	 {'id': 148}	
days
	 {'id': 148}	
,
	 {'id': 148}	
and
	 {'id': 148}	
neither
	 {'id': 148, 'cue': 'neither'}	
Mr.
	 {'id': 148, 'scope': 'Mr.'}	
Warren
	 {'id': 148, 'scope': 'Warren'}	
,
	 {'id': 148, 'scope': ','}	
nor
	 {'id': 148, 'cue': 'nor'}	
I
	 {'id': 148, 'scope': 'I'}	
,
	 {'id': 148, 'scope': ','}	
nor
	 {'id': 148, 'cue': 'nor'}	
the
	 {'id': 148, 'scope': 'the'}	
girl
	 {'id': 148, 'scope': 'girl'}	
has
	 {'id': 148, 'scope': 'has'}	
once
	 {'id': 148, 'scope': 'once'}	
set
	 {'id': 148, 'scope': 'set', 'event': 'set'}	
eyes
	 {'id': 148, 'scope': 'eyes', 'event': 'eyes'}	
upon
	 {'id': 148, 'scope': 'upon'}	
him
	 {'id': 148, 'scope': 'him'}	
.
	 {'id': 148}	


In [0]:
[1, 2, 3] == [2, 1, 3]

False

In [0]:
len('')

0