In [4]:
import argparse
import csv
from conceptnet import ConceptNet,preprocess_conceptnet

class SWOW(object):
    def __init__(self,args):

        self.args = args
        swow_data = self.load_swow(args.swow_file)
        self.swow_data = swow_data
        self.swow_cue_responses = self.forward_associations(swow_data)


    def load_swow(self, input_file):
        cues, R1, R2, R3 = list(),list(),list(),list()
        reader =csv.DictReader(open(input_file))
        print(input_file)
        for row in reader:
            cues.append(row['cue'].lower())
            R1.append(row['R1'].lower())
            R2.append( row['R2'].lower())
            R3.append( row['R3'].lower())

        swow_data = list(zip(cues, R1, R2, R3))
        print("loading %d lines from %s"%(len(cues),input_file))
        return swow_data

    def swow_conceptnet_overlap(self,swow_data):
        net = ConceptNet(self.args.conceptnet_tuple_file)
        cue_r_in_net, cue_r_not_in_net = list(),list()
        for i, (cue, r1,r2,r3)in enumerate(swow_data):
            for r in [r1,r2,r3]:
                if r=='na' or r=='NA': #filter the useless responses
                    continue
                prob_in,rel = self.classify_relations(cue,r,net)
                frequency = self.swow_cue_responses[cue][r]
                out = (cue,r,rel,frequency)
                cue_r_in_net.append(out) if prob_in else cue_r_not_in_net.append(out)

        self.compute_ratio(cue_r_in_net, cue_r_not_in_net)

        return cue_r_in_net, cue_r_not_in_net

    def classify_relations(self,cue, r,net):
        '''judge where a pair of cue and resoponse is in the net'''
        in_prob=False
        cue_r_rel = net.get_relation(cue,r)
        #print(cue,r, cue_r_rel)
        in_prob=True if cue_r_rel !='<NULL>' else False

        return in_prob,cue_r_rel

    def compute_ratio(self,cue_r_in, cue_r_not_in):
        num_in = len(cue_r_in)
        num_not_in = len(cue_r_not_in)
        num_total = num_in+ num_not_in

        in_ratio = num_in/ num_total
        print("num_in/ num_not_in/ ratio: %d\t%d\t%.4f "%(num_in, num_not_in,in_ratio))



    def forward_associations(self, swow_data):
        cue_responses={}
        for i, (cue,r1,r2,r3) in enumerate(swow_data):

            if cue not in cue_responses.keys():
                cue_responses[cue]={r1:1, r2:1, r3:1}
            else:
                cue_responses = self.add_elements(cue_responses,cue,r1)
                cue_responses =  self.add_elements(cue_responses,cue,r2)
                cue_responses =  self.add_elements(cue_responses,cue,r3)
        return cue_responses

    def add_elements(self,outter, outter_key, inner_key):
        if inner_key not in outter[outter_key].keys():
            outter[outter_key].update({inner_key:1}) #there is update insetad of =
        else:
            outter[outter_key][inner_key]+=1
            #outter[outter_key].update({_key: outter[outter_key][inner_key]+1})
        return outter

def draw_frequency_instances(data):
    def construct_axis_data(data):
        import collections 
        freq_count = collections.OrderedDict()
        for (cue,r,rel,fre) in data:
            if fre not in freq_count:
                freq_count[fre]={(cue,r,rel)}
            else:
                freq_count.update({fre:(cue,r,rel)})
        return freq_count
    
        freq_count = construct_axis_data(data)
    
        x_axis = list(freq_count.keys())
        y_axis = [len(list(vs)) for vs in freq_count.values()]
        fig = {
              "data": [{"type": "bar",
              "x": x_axis,
              "y": y_axis}],
               "layout": {"title": {"text": "A Bar Chart"}}
               }
    # To display the figure defined by this dict, use the low-level plotly.io.show function
    import plotly.io as pio
    pio.show(fig)




if __name__=='__main__':
    parser= argparse.ArgumentParser()
    parser.add_argument('--swow_file',type=str, default='/Users/chunhua/Desktop/1.ResearchInUoM-NLP/1.ComSense/00-Learn_human_association_contextulized_concept_embedding/1.Data/SWOW/SWOW-EN.R100.csv')
    parser.add_argument('--process_conceptnet',action='store_true')
    parser.add_argument('--conceptnet_raw_file',type=str, default='../../0.Dataset/conceptnet-assertions-5.7.0.csv')
    parser.add_argument('--conceptnet_tuple_file',type=str, default='./data/concept.filter')
    parser.add_argument('--conceptnet_weight_threshold',type=float, default=1.0)
    args= parser.parse_args()


    if args.process_conceptnet:
        print("preprocessing the conceptnet raw file ...")
        preprocess_conceptnet(args.conceptnet_raw_file,args.conceptnet_tuple_file,args.conceptnet_weight_threshold)

    swow = SWOW(args)
    cue_r_in, cue_r_not_in = swow.swow_conceptnet_overlap(swow.swow_data)

    draw_frequency_instances(cue_r_in)
    

usage: ipykernel_launcher.py [-h] [--swow_file SWOW_FILE]
                             [--process_conceptnet]
                             [--conceptnet_raw_file CONCEPTNET_RAW_FILE]
                             [--conceptnet_tuple_file CONCEPTNET_TUPLE_FILE]
                             [--conceptnet_weight_threshold CONCEPTNET_WEIGHT_THRESHOLD]
ipykernel_launcher.py: error: unrecognized arguments: -f /Users/chunhua/Library/Jupyter/runtime/kernel-3da73e5d-556a-447f-b794-afc194cadfde.json


SystemExit: 2

In [3]:
%tb

SystemExit: 2