In [41]:
import json
import random

In [42]:
def read_parser_rel(file_path):
    return [json.loads(x) for x in open(file_path)]

In [43]:
def save_parser_rel(relations,file_path):
    with open(file_path, 'w') as f:
        for rel in relations:
            f.write(json.dumps(rel)+"\n")

In [49]:
def create_comparison_format1(relation,gold=True):
    conn = dict(relation["Connective"])
    if conn.has_key("CharacterSpanList"):
        del conn["CharacterSpanList"]
    if not conn.has_key("RawText"):
        conn["RawText"] = ""

    dic = {
        "DocID": relation["DocID"],
        "Sense": relation["Sense"],
        "Type" : relation["Type"],
        "Connective": conn,
        "Change": "None"
    }

    if gold:
        dic["Arg1TokenList"] = [token[2] for token in relation["Arg1"]["TokenList"]]
        dic["Arg2TokenList"] = [token[2] for token in relation["Arg2"]["TokenList"]]
        dic["Parser"] = "Gold"
    else:
        dic["Arg1TokenList"] = relation["Arg1"]["TokenList"]
        dic["Arg2TokenList"] = relation["Arg2"]["TokenList"]
        dic["Parser"] = "Pred"

    return dic

In [53]:
def add_compare_attr(relation,gold=True):
    new_rel = relation.copy()
    conn = new_rel["Connective"]
    if not conn.has_key("RawText"):
        new_rel["Connective"]["RawText"] = ""
        
    new_rel["Change"] = "None"
    new_rel["Parser"] = "Pred"
    if gold:
        new_rel["Parser"] = "Gold"
    
    return new_rel

In [54]:
class Randomizer:
    
    def __init__(self,parser_path,gold_path):
        
        self.parser_rel = [add_compare_attr(rel,gold=False) for rel in read_parser_rel(parser_path)]
        self.gold_rel = [add_compare_attr(rel) for rel in read_parser_rel(gold_path)]
        self.get_unique_attr(self.gold_rel)
        
    
    
    def get_unique_attr(self,relations):
        self.unique_connect = set()
        self.unique_sense = set()
        self.unique_type = set()
        
        for rel_dic in relations:
            self.unique_connect.update([rel_dic["Connective"]["RawText"]])
            self.unique_sense.update(rel_dic["Sense"])
            self.unique_type.update([rel_dic["Type"]])
            
        self.unique_connect = self.unique_connect.difference([""])
        
    def randomize_rel(self,relation,change):
        relation["Change"] = change
        
        if change == "Arg1" or change == "Arg2":
            numb_add_tok = random.randint(0,10)
            last_tok = relation[change]["TokenList"][-1]
            add_tok = [i for i in range(last_tok+1,last_tok+numb_add_tok)]
            relation[change]["TokenList"] += add_tok
        elif change == "Args":
            relation = self.randomize_rel(relation,"Arg1")
            relation = self.randomize_rel(relation,"Arg2")
        elif change == "Type":
            if relation[change] == "Explicit":
                types = self.unique_type.copy()
                types = types.difference(["Explicit"])
                relation[change] = random.sample(types,1)[0]
                relation["Connective"]["RawText"] = ""
                relation["Connective"]["TokenList"] = []
            else:
                types = self.unique_type.copy()
                types = types.difference(["Explicit"])
                relation[change] = random.sample(types,1)[0]
                if relation[change] == "Explicit":
                    relation = self.change_connective(relation)
                        
        elif change == "Connective":
            relation = self.change_connective(relation)
        
        return relation
    
    def change_connective(self,relation):
        arg1 = relation["Arg1"]["TokenList"]
        arg2 = relation["Arg2"]["TokenList"]
        args = arg1+arg2
        relation["Connective"]["RawText"] = random.sample(self.unique_connect,1)[0]
        relation["Connective"]["TokenList"] = [random.randint(min(args),max(args))]
        
        return relation
    
    def create_randomized_parser_output(self):
        
        select_len = int(len(self.parser_rel)*(random.randrange(80,100,5))/100)
        selected_rel = random.sample(self.parser_rel,select_len)[:]
            
        
        change_len = int(select_len*0.6)
        change_rel_ind = random.sample(range(change_len),change_len)
        
        changes = ["Arg1","Arg2","Args","Type","Connective"] 
        for ind,rel in enumerate(selected_rel):
            if ind in change_rel_ind:
                change = random.sample(changes,1)[0]
                rel = randomizer.randomize_rel(rel,change)
                
        return selected_rel
        

In [55]:
oslopots_path = "data/submissions/sense_only/blind/oslopots/output/output.json"
gold_path = "data/gold_standard/conll16st-en-03-29-16-blind-test/relations.json"
randomizer = Randomizer(oslopots_path,gold_path)
rels = randomizer.create_randomized_parser_output()
save_parser_rel(rels,"data/submissions/randomized/oslopots.json")

In [56]:
oslopots_path = "data/submissions/sense_only/blind/nguyenlab/output/output.json"
gold_path = "data/gold_standard/conll16st-en-03-29-16-blind-test/relations.json"
randomizer = Randomizer(oslopots_path,gold_path)
rels = randomizer.create_randomized_parser_output()
save_parser_rel(rels,"data/submissions/randomized/nguyenlab.json")

In [58]:
save_parser_rel(randomizer.gold_rel,"data/gold_standard/blind/gold.json")

In [None]:
#Test

In [100]:
changes = ["Arg1","Arg2","Args","Type","Connective"] 
changes = ["Type","Connective"] 
change = random.sample(changes,1)[0]
print(randomizer.parser_rel[0])
print(change)
after = randomizer.randomize_rel(randomizer.parser_rel[0].copy(),change)
print(after)

{'Arg2TokenList': [42, 43, 44, 45, 46, 47, 48, 49, 50, 51], 'DocID': u'wikinews_101184', 'Parser': 'Pred', 'Connective': {u'RawText': '', u'TokenList': []}, 'Arg1TokenList': [9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40], 'Sense': [u'Temporal.Asynchronous.Succession'], 'Type': u'Explicit'}
Connective
Connective
{'Arg2TokenList': [42, 43, 44, 45, 46, 47, 48, 49, 50, 51], 'Arg1TokenList': [9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40], 'DocID': u'wikinews_101184', 'Sense': [u'Temporal.Asynchronous.Succession'], 'Parser': 'Pred', 'Type': u'Explicit', 'Connective': {u'RawText': u'As', u'TokenList': [36]}}
