In [69]:
from read_write_files import read_json
from collections import Counter
import itertools
import pandas as pd

In [97]:
def create_sense_zip(alignments):
    attr = "Sense"
    attr_zip = []
    for alignment in alignments:
        line_attr = []
        line_attr += [alignment["gold"][attr][0]]
        
        for pars in alignment["parsers"]:
            if pars == None:
                line_attr += ["None"]
            else:
                line_attr += [pars[attr][0]]
        
        attr_zip += [tuple(line_attr)]
    return attr_zip,alignment["parser_names"]

In [98]:
def sense_statistics(sense_comparison,parser_names,not_mapped_rels):
    zip_sense_comparison = list(zip(*sense_comparison))
    gold_senses = zip_sense_comparison[0]
    #Count all combination of senses
    sense_counter = Counter(sense_comparison)
    #Different Senses
    set_senses = set(gold_senses)
    #How many parsers will be compared
    len_parser = len(zip_sense_comparison)-1
    sense_rows = []
    
    #not mappable predicted relations
    not_mapped_counter = Counter([(rel["Parser"],rel["Sense"][0]) for rel in not_mapped_rels])

    for sense in set_senses:
        tmp_senses = set_senses.copy()
        tmp_senses.remove(sense)
        possible_comb = [[sense]] + [tmp_senses for i in range(len_parser)]
        diff_preds = itertools.product(*possible_comb)

        equal_correct_parsing = sense_counter[tuple([sense]+[sense]*len_parser)]

        all_wrong_parsing = sum([
            sense_counter[tuple(diff_pred)] 
            for diff_pred in diff_preds])

        total_act_sense_count = equal_correct_parsing + sum([1 for i in gold_senses if i == sense])


        equal_wrong_parsing = sum([
            sense_counter[tuple([sense]+[other_sense]*len_parser)] 
            for other_sense in tmp_senses])
        
        parser_columns = []

        #Parser is better than all the other
        parser_better = []
        for index_parser in range(1,len_parser+1):
            tmp_possible_comb = possible_comb[:]
            column_name = parser_names[index_parser-1]
            
            tmp_possible_comb[index_parser] = [sense]
            other_diff_preds = itertools.product(*tmp_possible_comb)
            
            pars_better = sum([sense_counter[other_diff_pred] for other_diff_pred in other_diff_preds])
            total_pred_sense_count = Counter(zip_sense_comparison[index_parser])[sense]#sum([1 for i in zip_sense_comparison[index_parser] if i == sense])
            
            not_mapped = not_mapped_counter[(column_name,sense)]
            
            parser_better += [
                pars_better,
                not_mapped,
                total_pred_sense_count+not_mapped]
            
            
            parser_columns += [string.format(column_name) for string in [
                "{}: unique right",
                "{}: not mapped",
                "{}: total pred"]]
            
        #Two Parser share opinions
        for parser1,parser2 in itertools.combinations([i for i in range(1,len_parser+1)],2):
            tmp_possible_comb = possible_comb[:]
            tmp_possible_comb[parser1] = [sense]
            tmp_possible_comb[parser2] = [sense]
            
            
            pars_better = sum([sense_counter[comb] for comb in itertools.product(*tmp_possible_comb)])
            total_pred_sense_count = sum(
                [1 for pars1_pred,pars2_pred in zip(*[
                    zip_sense_comparison[parser1],
                    zip_sense_comparison[parser2]
                ])
                if (pars2_pred == sense) != (pars1_pred == sense)])
            
            parser_better += [pars_better,total_pred_sense_count]
            
            column_name = " and ".join([parser_names[parser1-1],parser_names[parser2-1]])
            parser_columns += ["{} right".format(column_name),"Total Pred ({})".format(column_name)]
        

        #At least one parser is correct
        correct_sense_poss = [[sense]]+[set_senses]*len_parser
        combination_correct = sum(
            [sense_counter[at_least_one_right] 
             for at_least_one_right in itertools.product(*correct_sense_poss) if sense in at_least_one_right[1:]])

        sense_rows += [[sense,equal_correct_parsing,all_wrong_parsing,total_act_sense_count,equal_wrong_parsing,combination_correct]+parser_better]


    return pd.DataFrame(
        columns=["Sense",
                 "Equal Correct",
                 "All Wrong",
                 "Total Act",
                 "Equal Wrong",
                 "At least one correct"]+parser_columns,
        data=sense_rows)


In [99]:
alignment_path = "data/project_files/total_alignment.json"
not_mappable_path = "data/project_files/not_mappable.json"

total_alignments = read_json(alignment_path)
not_mappable = read_json(not_mappable_path)

In [100]:
sense_zip,sense_zip_names = create_sense_zip(total_alignments)
sense_counter = Counter(sense_zip)

In [101]:

sense_statistics(sense_zip,sense_zip_names,not_mappable)

Unnamed: 0,Sense,Equal Correct,All Wrong,Total Act,Equal Wrong,At least one correct,oslopots: unique right,oslopots: not mapped,oslopots: total pred,nguyenlab: unique right,...,nguyenlab: total pred,steven: unique right,steven: not mapped,steven: total pred,oslopots and nguyenlab right,Total Pred (oslopots and nguyenlab),oslopots and steven right,Total Pred (oslopots and steven),nguyenlab and steven right,Total Pred (nguyenlab and steven)
0,Expansion.Conjunction,125,22,445,11,188,8,1,288,3,...,339,20,2,459,17,237,6,380,9,347
1,Temporal.Asynchronous.Precedence,14,10,64,5,19,0,1,29,0,...,33,0,4,43,5,13,0,37,0,36
2,Temporal.Synchrony,10,8,62,3,26,3,2,54,2,...,59,2,1,40,8,31,0,47,1,50
3,Temporal.Asynchronous.Succession,19,11,77,5,28,0,2,39,1,...,44,0,1,51,7,20,1,43,0,35
4,Contingency.Cause.Result,6,20,58,4,14,0,0,18,6,...,57,2,1,34,0,56,0,35,0,65
5,EntRel,29,12,229,1,112,21,0,298,10,...,248,3,3,213,33,243,8,298,8,255
6,Comparison.Concession,2,49,109,28,7,0,0,4,2,...,14,2,1,16,1,12,0,15,0,23
7,Contingency.Condition,12,0,38,0,13,0,3,22,0,...,25,0,2,38,1,9,0,25,0,24
8,Comparison.Contrast,10,14,64,6,19,0,0,80,2,...,112,1,3,130,5,57,0,113,1,128
9,Expansion.Instantiation,1,22,45,3,4,0,0,6,2,...,10,1,0,5,0,13,0,7,0,12


In [18]:
sense_counter.most_common(10)

[(('Expansion.Conjunction', 'Expansion.Conjunction', 'Expansion.Conjunction'),
  174),
 (('EntRel', 'EntRel', 'EntRel'), 73),
 (('Comparison.Concession', 'Comparison.Contrast', 'Comparison.Contrast'), 45),
 (('Temporal.Asynchronous.Succession',
   'Temporal.Asynchronous.Succession',
   'Temporal.Asynchronous.Succession'),
  28),
 (('Expansion.Conjunction', 'EntRel', 'EntRel'), 28),
 (('Temporal.Asynchronous.Precedence',
   'Temporal.Asynchronous.Precedence',
   'Temporal.Asynchronous.Precedence'),
  26),
 (('EntRel', 'EntRel', 'Expansion.Conjunction'), 24),
 (('Expansion.Conjunction', 'None', 'Expansion.Conjunction'), 23),
 (('Expansion.Restatement', 'EntRel', 'EntRel'), 22),
 (('Temporal.Synchrony', 'Temporal.Synchrony', 'Temporal.Synchrony'), 21)]