In [69]:
from read_write_files import read_json
from collections import Counter
import itertools
import pandas as pd

In [70]:
def create_sense_zip(alignments):
    attr = "Sense"
    attr_zip = []
    for alignment in alignments:
        line_attr = []
        line_attr += [alignment["gold"][attr][0]]
        
        for pars in alignment["parsers"]:
            if pars == None:
                line_attr += ["None"]
            else:
                line_attr += [pars[attr][0]]
        
        attr_zip += [tuple(line_attr)]
    return attr_zip,alignment["parser_names"]

In [93]:
def sense_statistics(sense_comparison,parser_names,not_mapped_rels):
    zip_sense_comparison = list(zip(*sense_comparison))
    gold_senses = zip_sense_comparison[0]
    #Count all combination of senses
    sense_counter = Counter(sense_comparison)
    #Different Senses
    set_senses = set(gold_senses)
    #How many parsers will be compared
    len_parser = len(zip_sense_comparison)-1
    sense_rows = []
    
    #not mappable predicted relations
    not_mapped_counter = Counter([(rel["Parser"],rel["Sense"][0]) for rel in not_mapped_rels])

    for sense in set_senses:
        tmp_senses = set_senses.copy()
        tmp_senses.remove(sense)
        possible_comb = [[sense]] + [tmp_senses for i in range(len_parser)]
        diff_preds = itertools.product(*possible_comb)

        equal_correct_parsing = sense_counter[tuple([sense]+[sense]*len_parser)]

        all_wrong_parsing = sum([
            sense_counter[tuple(diff_pred)] 
            for diff_pred in diff_preds])

        total_act_sense_count = equal_correct_parsing + sum([1 for i in gold_senses if i == sense])


        equal_wrong_parsing = sum([
            sense_counter[tuple([sense]+[other_sense]*len_parser)] 
            for other_sense in tmp_senses])
        
        parser_columns = []

        #Parser is better than all the other
        parser_better = []
        for index_parser in range(1,len_parser+1):
            tmp_possible_comb = possible_comb[:]
            column_name = parser_names[index_parser-1]
            
            tmp_possible_comb[index_parser] = [sense]
            other_diff_preds = itertools.product(*tmp_possible_comb)
            
            pars_better = sum([sense_counter[other_diff_pred] for other_diff_pred in other_diff_preds])
            total_pred_sense_count = Counter(zip_sense_comparison[index_parser])[sense]#sum([1 for i in zip_sense_comparison[index_parser] if i == sense])
            
            not_mapped = not_mapped_counter[(column_name,sense)]
            
            parser_better += [
                pars_better,
                not_mapped,
                total_pred_sense_count+not_mapped]
            
            
            parser_columns += [string.format(column_name) for string in [
                "{}: unique right",
                "{}: not mapped",
                "{}: total pred"]]
            
        #Two Parser share opinions
        for parser1,parser2 in itertools.combinations([i for i in range(1,len_parser+1)],2):
            tmp_possible_comb = possible_comb[:]
            tmp_possible_comb[parser1] = [sense]
            tmp_possible_comb[parser2] = [sense]
            
            
            pars_better = sum([sense_counter[comb] for comb in itertools.product(*tmp_possible_comb)])
            total_pred_sense_count = sum(
                [1 for pars1_pred,pars2_pred in zip(*[
                    zip_sense_comparison[parser1],
                    zip_sense_comparison[parser2]
                ])
                if (pars2_pred == sense) != (pars1_pred == sense)])
            
            parser_better += [pars_better,total_pred_sense_count]
            
            column_name = " and ".join([parser_names[parser1-1],parser_names[parser2-1]])
            parser_columns += ["{} right".format(column_name),"Total Pred ({})".format(column_name)]
        

        #At least one parser is correct
        correct_sense_poss = [[sense]]+[set_senses]*len_parser
        combination_correct = sum(
            [sense_counter[at_least_one_right] 
             for at_least_one_right in itertools.product(*correct_sense_poss) if sense in at_least_one_right[1:]])

        sense_rows += [[sense,equal_correct_parsing,all_wrong_parsing,total_act_sense_count,equal_wrong_parsing,combination_correct]+parser_better]


    return pd.DataFrame(
        columns=["Sense",
                 "Equal Correct",
                 "All Wrong",
                 "Total Act",
                 "Equal Wrong",
                 "At least one correct"]+parser_columns,
        data=sense_rows)


In [94]:
alignment_path = "data/project_files/total_alignment.json"
not_mappable_path = "data/project_files/not_mappable.json"

total_alignments = read_json(alignment_path)
not_mappable = read_json(not_mappable_path)

In [95]:
sense_zip,sense_zip_names = create_sense_zip(total_alignments)
sense_counter = Counter(sense_zip)

In [96]:
sense_statistics(sense_zip,sense_zip_names,not_mappable)

Unnamed: 0,Sense,Equal Correct,All Wrong,Total Act,Equal Wrong,At least one correct,oslopots: unique right,oslopots: not mapped,oslopots: total pred,nguyenlab: unique right,nguyenlab: not mapped,nguyenlab: total pred,oslopots and nguyenlab right,Total Pred (oslopots and nguyenlab)
0,Expansion.Conjunction,174,49,494,33,208,17,4,334,17,1,335,174,216
1,Temporal.Asynchronous.Precedence,26,11,76,5,29,3,2,35,0,1,33,26,11
2,Temporal.Synchrony,21,11,73,9,29,5,1,52,3,0,56,21,33
3,Temporal.Asynchronous.Succession,28,12,86,10,30,2,6,44,0,1,44,28,19
4,Contingency.Cause.Result,9,27,61,12,17,0,0,19,8,0,56,9,53
5,EntRel,73,30,273,13,131,40,1,332,18,0,239,73,254
6,Comparison.Concession,5,75,112,63,9,1,0,6,3,0,17,5,13
7,Contingency.Condition,20,0,46,0,20,0,4,28,0,2,25,20,5
8,Comparison.Contrast,18,21,72,12,23,0,3,89,5,1,110,18,49
9,Expansion.Instantiation,2,33,46,15,5,1,0,7,2,1,10,2,12


In [18]:
sense_counter.most_common(10)

[(('Expansion.Conjunction', 'Expansion.Conjunction', 'Expansion.Conjunction'),
  174),
 (('EntRel', 'EntRel', 'EntRel'), 73),
 (('Comparison.Concession', 'Comparison.Contrast', 'Comparison.Contrast'), 45),
 (('Temporal.Asynchronous.Succession',
   'Temporal.Asynchronous.Succession',
   'Temporal.Asynchronous.Succession'),
  28),
 (('Expansion.Conjunction', 'EntRel', 'EntRel'), 28),
 (('Temporal.Asynchronous.Precedence',
   'Temporal.Asynchronous.Precedence',
   'Temporal.Asynchronous.Precedence'),
  26),
 (('EntRel', 'EntRel', 'Expansion.Conjunction'), 24),
 (('Expansion.Conjunction', 'None', 'Expansion.Conjunction'), 23),
 (('Expansion.Restatement', 'EntRel', 'EntRel'), 22),
 (('Temporal.Synchrony', 'Temporal.Synchrony', 'Temporal.Synchrony'), 21)]