In [1]:
import networkx as nx
import json

import numpy as np

from collections import defaultdict
from itertools import groupby
from tqdm.notebook import tqdm

In [2]:
candidate_side = {"hasSubType", "hasDegree", "inCurrentIndustry", "wantsIndustry", "inCurrentType",
                  "hasSkill", "wantsType", "interactedWith"}

company_side = {"interactedBy", "isHeldBy", "isWantedTypeOf", "isSubTypeOf", "isRequiredDegreeOf", 
                "isMaxDegreeOf", "isUserDegreeOf", "isCurrentIndustryOf", "isWantedIndustryOf", "isCurrentTypeOf"}


converter = {"hasSubType" : "valt onder",
             "isSubTypeOf" : "is overkoepelend over",
             "requiresDegree" : "vereist diploma",
             "maxDegree" : "maximaal diploma",
             "hasDegree" : "heeft diploma",
             "inCurrentIndustry" : "werkt binnen industrie", 
             "wantsIndustry" : "wil werken in industrie",
             "inCurrentType" : "werkt binnen type", 
             "hasSkill" : "heeft vaardigheid", 
             "wantsType" : "wil type",
             "isRequiredDegreeOf" : "is vereist diploma van", 
             "isMaxDegreeOf" : "is maximaal diploma van",
             "isUserDegreeOf" : "heeft diploma",
             "isCurrentIndustryOf" : "is huidige industrie van",
             "isWantedIndustryOf" : "is gewilde industrie van",
             "isCurrentTypeOf" : "is huidig type van",
             "isWantedTypeOf" : "is gewild type van",
             "isHeldBy" : "vaardigheid in bezit van",
             "interactedWith" : "heeft vervuld",
             "interactedBy" : "is vervuld door"}

converter = {v: k for k, v in converter.items()}

In [9]:
for diri in ["candidate", "company"]:
    new_data = defaultdict(lambda : defaultdict(lambda : defaultdict(lambda : defaultdict(lambda : defaultdict(list)))))

    with open(f"./data/{diri}_explanations.json") as f:
        exp = json.load(f)

        # Loop over all candidates
        for candidate in tqdm(exp):
            with open(f"./data/hits/{candidate}.json") as f2:
                hits = json.load(f2)

            with open(f"./data/misses/{candidate}.json") as f3:
                misses = json.load(f3)

            # Find metadata
            hits_and_misses = {user : 
                                   {job: values for job, values in {**hits, **misses}[user].items()}
                               for user in {**hits, **misses}.keys()}

            # update all jobs
            for job in exp[candidate]:
                all_links = hits_and_misses[job]["links"]

                for direction in ["candidate", "company"]:
                    full_explanation = exp[candidate][job]["explanation"][direction]   
                    G = nx.DiGraph([[i[0], i[1]] for i in full_explanation])
                                        
                    if direction == "candidate":
                        keep = list(nx.all_simple_paths(G, candidate, job))
                    else:
                        keep = list(nx.all_simple_paths(G, job, candidate))
                                        
                    keep = [list(path) for path in map(nx.utils.pairwise, keep)]
                    keep = set([item for sublist in keep for item in sublist])
                    
                    filtered_explanation = sorted([[i[0], i[1], i[2]] for i in full_explanation if (i[0], i[1]) in keep
                                                   and ((i[0], i[1]) != (candidate, job)) 
                                                   and ((i[1], i[0]) != (candidate, job))])
                    


                    sums = {}


                    for k, g in groupby(filtered_explanation, lambda x: x[0]):
                        sums[k] = sum([float(i[2]) for i in g])

                    # Only look at edges for the current direction, and fix their values so that they add to 1
                    filtered_explanations = [[i[0], i[1], str(float(i[2]) / sums[i[0]])] if sums[i[0]] else 
                                             [i[0], i[1], 1] for i in filtered_explanation]
                    
#                     if candidate == "u4186" and job == "j127874":
#                         print(filtered_explanation)
#                         print(filtered_explanations)
#                         print("\n\n")

                    new_data[candidate][job]["explanation"][direction]["real"] = filtered_explanations
                    new_data[candidate][job]["explanation"]["gen_pred"] = exp[candidate][job]["explanation"]["gen_pred"]
                    new_data[candidate][job]["explanation"]["can_pred"] = exp[candidate][job]["explanation"]["can_pred"]
                    new_data[candidate][job]["explanation"]["com_pred"] = exp[candidate][job]["explanation"]["com_pred"]
                    new_data[candidate][job]["explanation"]["ground_truth"] = exp[candidate][job]["explanation"]["ground_truth"]

                    # Make random weights
                    total = {}
                    for k, g in groupby(filtered_explanations, lambda x: x[0]):  
                        total[k] = len(list(g))

                    random_values = {k: list(np.random.random(size=v)) for k, v in total.items()}
                    # random_weights = {k: list(v/sum(v)) for k, v in random_values.items()}  
                                                           
                    random_explanations = [[i[0], i[1], str(random_values[i[0]].pop())] for i in filtered_explanations]
                    
                    new_data[candidate][job]["explanation"][direction]["random"] = random_explanations
                    
    with open(f"./data/{diri}_explanations_updated.json", "w+") as f_new:
        json.dump(new_data, f_new)

  0%|          | 0/13 [00:00<?, ?it/s]

  0%|          | 0/13 [00:00<?, ?it/s]