In [204]:
import pandas as pd
from typing import List
import numpy as np
import json
import ast

In [263]:
expla_df = pd.read_csv("../data/explagraphs/train_v3.tsv", sep="\t")
copa_df = pd.read_csv("../data/copa/train_v3.tsv", sep="\t")

In [264]:
columns = {
    'linked_paths': 'linked_paths',
    'gold_graph': 'gold_graph',
    'generated_graph_linked': 'generated_graph_linked',
    'generated_graph_gold': 'generated_graph_gold',
    'retrieved_graph': 'retrieved_graph'
}

In [277]:
def calculate_stats(l: List):
    triple_lengths = []
    broken_triples = []
    for en in l:
        try:
            e = ast.literal_eval(en)
        except:
            broken_triples.append(1)
            continue
        if e == -1:
            broken_triples.append(1)
            continue        
        for inner in e:
            has_empty_node = False
            for x in inner:
                if not x:
                    has_empty_node = True
            if len(inner) > 3 or has_empty_node:
                broken_triples.append(1)
            else:
                broken_triples.append(0)
            
        triple_lengths.append(len(e))
    stats = {
        'avg_triple_length': round(np.mean(triple_lengths), 3),
        'broken_triples': round(np.mean(broken_triples),3)
    } 
    return stats

In [280]:
print("----EXPLA----")
for key, value in columns.items():
    stats = calculate_stats(expla_df[value].to_numpy().tolist())
    print(f"{key}: \n {stats}")
    print("\n")


print("\n")
print("----COPA-SSE----")
for key, value in columns.items():
    stats = calculate_stats(copa_df[value].to_numpy().tolist())
    print(f"{key}: \n {stats}")
    print("\n")

----EXPLA----
linked_paths: 
 {'avg_triple_length': 3.031, 'broken_triples': 0.0}


gold_graph: 
 {'avg_triple_length': 4.238, 'broken_triples': 0.0}


generated_graph_linked: 
 {'avg_triple_length': 1.345, 'broken_triples': 0.217}


generated_graph_gold: 
 {'avg_triple_length': 1.589, 'broken_triples': 0.15}


retrieved_graph: 
 {'avg_triple_length': 2.999, 'broken_triples': 0.021}




----COPA-SSE----
linked_paths: 
 {'avg_triple_length': 2.904, 'broken_triples': 0.0}


gold_graph: 
 {'avg_triple_length': 2.128, 'broken_triples': 0.0}


generated_graph_linked: 
 {'avg_triple_length': 1.395, 'broken_triples': 0.202}


generated_graph_gold: 
 {'avg_triple_length': 1.644, 'broken_triples': 0.12}


retrieved_graph: 
 {'avg_triple_length': 2.902, 'broken_triples': 0.053}


