In [1]:
import glob
import re
import json

In [2]:
def get_relations(annotation):
    graph = {"nodes":{}, "edges":[]}
    relations =  re.findall("R[0-9]+\t.+",annotation)
    for relation in re.findall("([^\s]+) Arg.+:T([0-9]+) Arg.+:T([0-9]+)",annotation):
        Type, T1, T2 = relation
        graph["edges"].append((T1,T2,Type))
    
    for ADU in re.findall("T[0-9]+\t.+",annotation):
        T, Type, startPos, endPos, text = re.findall("T([0-9]+)\t(.+) (.+) (.+)\t(.+)",ADU)[0]
        graph["nodes"][T]= (Type,int(startPos),int(endPos), ADU)
    
    MClaims = []
    Claims = []
    for id, node in graph["nodes"].items():
        if node[0] == "MajorClaim":
            MClaims.append(id)
        elif node[0] == "Claim":
            Claims.append(id)
    
    for stance in re.findall("A[0-9]+\tStance T([0-9]+) (\w+)",annotation):
        T, Type = stance
        if T in Claims:
            for MClaim in MClaims:
                graph["edges"].append((T,MClaim,Type))
    
    for i, MClaim1 in enumerate(MClaims):
        for MClaim2 in MClaims[i:]:
            graph["edges"].append((MClaim1,MClaim2,"root"))
        
    return graph

In [3]:
txts = {}
anns = {}
graphs = {}
for txt_file in glob.glob("argumentative_essays/*"):
    id_found = re.search("[0-9]+",txt_file)
    if id_found:
        id = id_found[0]
        with open(txt_file) as f:
                data = f.read()
        if txt_file.endswith("txt"):
            txts[id] = data
        elif txt_file.endswith("ann"):
            anns[id] = data

for id, txt in txts.items():
    title = txt.split("\n")[0]
    graphs[id] = get_relations(anns[id])

#here we make a simplification, we drop every other major claim except one
#so that we can end up with a tree structure
for id, graph in graphs.items():
    MJnodes = []
    for nid, node in graph['nodes'].items():
        if node[0] == 'MajorClaim':
            MJnodes.append(nid)
    if len(MJnodes)>1:
        MJnodes.pop()
        for nid in MJnodes:
            del graph['nodes'][nid]
        eid = 0
        while True:
            if eid >= len(graph['edges']):
                break
            edge = graph['edges'][eid]
            if edge[0] in MJnodes or edge[1] in MJnodes:
                graph['edges'].pop(eid)
                eid-=1
            eid+=1
            
for key, graph in graphs.items():
    graph["text"] = txts[key]
    graphs[key]=graph

In [4]:
with open("essay_graphs.json","w") as f:
    json.dump(graphs,f)