In [1]:
import pickle, json, requests, csv, copy
from json import JSONDecodeError
import numpy as np
import pprint as pp
import urllib.request, urllib.parse

In [2]:
rel_list = ['/r/IsA', '/rr/IsA', '/r/PartOf', '/rr/PartOf', '/r/AtLocation', '/rr/AtLocation', '/r/RelatedTo']

In [3]:
class_uri = ['/c/en/company',
            '/c/en/education',
            '/c/en/artist',
            '/c/en/athlete',
            '/c/en/officer',
            '/c/en/transport',
            '/c/en/building',
            '/c/en/nature',
            '/c/en/village',
            '/c/en/animal',
            '/c/en/plant',
            '/c/en/album',
            '/c/en/film',
            '/c/en/writing']

In [4]:
class Path:
    
    def __init__(self): # Create an empty path
        self.path_uri = ''
        self.start = None
        self.end = None
        self.length = 0
        self.edges = []
        self.weights = []
        self.nodes = []
        
    def __repr__(self):
        return self.path_uri + ' (' + ','.join([str(x) for x in self.weights]) + ')'
        
    def create_unit_node_path(uri):
        p = Path()
        p.path_uri = uri
        p.start = uri
        p.end = uri
        p.length = 0
        p.edges = []
        p.weights = []
        p.nodes = [uri]
        return p
    
    def create_path_from_edge(sub, rel, obj, weight):
        p = Path()
        p.path_uri = sub + '$' + rel + '$' + obj
        p.start = sub
        p.end = obj
        p.length = 1
        p.edges = [rel]
        p.weights = [weight]
        p.nodes = [sub, obj]
        return p
    
    def concatenate(pa, pb):
        assert pa.end == pb.start, "Cannot concatenate as the end of the first path is not the start of the second path"
        if pb.length == 0:
            return copy.copy(pa)
        if pa.length == 0:
            return copy.copy(pb)
        
        p = Path()
        p.path_uri = pa.path_uri + pb.path_uri[pb.path_uri.find('$'):]
        p.start = pa.start
        p.end = pb.end
        p.length = pa.length + pb.length
        p.edges = list(pa.edges)
        p.edges.extend(pb.edges)
        p.weights = list(pa.weights)
        p.weights.extend(pb.weights)
        p.nodes = list(pa.nodes)
        p.nodes.extend(pb.nodes[1:])
        assert len(p.edges) + 1 == len(p.nodes), "Nodes and edges are inconsistent"
        
        return p
    
    def is_simple_path(self): # Visit each node only once
        return len(set(self.nodes)) == len(self.nodes)
    
    def form_single_path_with(self, p):
        assert self.end == p.start
        if (not self.is_simple_path()) or (not p.is_simple_path()):
            return False
        nodes = list(self.nodes)
        nodes.extend(p.nodes[1:])
        return len(set(nodes)) == len(nodes) 
    
    def average_weight(self):
        if self.weights == []:
            return 1.0
        return Path.geo_mean(self.weights)
    
    def geo_mean(iterable):
        a = np.array(iterable)
        return a.prod()**(1.0/len(a))

In [5]:
# PATHS_FROM = {}
PATHS_FROM = pickle.load(open("../wordEmbeddings/PATHS_FROM.pickle", "rb"))

In [6]:
def find_edges_of(uri, rel_list = None):
    edges = []
    with open('../wordEmbeddings/conceptnet-assertions-en-filter-5.6.0.csv', 'r', encoding = "utf8") as csvfile:
        reader = csv.reader(csvfile, delimiter='\t')
        for line in reader:
            if uri == line[2] or uri + '/' in line[2] or uri == line[3] or uri + '/' in line[3]:
                if rel_list is None or line[1] in rel_list:
                    details = json.loads(line[4])
                    w = details['weight']
                    edges.append({'sub': line[2],
                                 'rel': line[1],
                                 'obj': line[3],
                                 'weight': w})
    return edges

In [7]:
def get_neighbors(uri, rel_list = None):
    neighbors = {}
    edge_list = find_edges_of(uri, rel_list)
    neighbors = process_edges(neighbors, uri, edge_list)
    return neighbors

In [8]:
def remove_word_sense(sub):
    if sub.count('/') > 3:
        if sub.count('/') > 4:
            print(sub)
            assert False, "URI error (with more than 4 slashes)"
        sub = sub[:sub.rfind('/')]
    return sub

In [9]:
def process_edges(a_dict, uri, edge_data):
    for e in edge_data:
        sub = remove_word_sense(e['sub'])
        rel = e['rel']
        obj = remove_word_sense(e['obj'])
        weight = e['weight']
                   
        if sub == uri:
            if obj in a_dict:
                if rel in a_dict[obj]:
                    a_dict[obj][rel] = max(a_dict[obj][rel], weight)
                else:
                    a_dict[obj][rel] = weight
            else:
                a_dict[obj] = { rel:weight }
        elif obj == uri:
            if rel != '/r/RelatedTo': # Bi-directional
                rel = rel.replace('/r/', '/rr/', 1)
            if sub in a_dict:
                if rel in a_dict[sub]:
                    a_dict[sub][rel] = max(a_dict[sub][rel], weight)
                else:
                    a_dict[sub][rel] = weight
            else:
                a_dict[sub] = { rel:weight }
        else:
            print(e)
            assert False, "This edge does not belong to the given uri"
    return a_dict        

In [10]:
def get_all_paths_from(uri, hops):
    global PATHS_FROM
    assert hops >= 0, "Invalid hops (less than 0)"
    
    if uri in PATHS_FROM and hops in PATHS_FROM[uri]:
        return PATHS_FROM[uri][hops]
    
    if uri not in PATHS_FROM:
        PATHS_FROM[uri] = {}
        
    if hops == 0:
        PATHS_FROM[uri][hops] = [Path.create_unit_node_path(uri)]
    elif hops == 1:
        all_paths = []
        neighbors = get_neighbors(uri)
        for end, edges in neighbors.items():
            for r, w in edges.items():
                all_paths.append(Path.create_path_from_edge(uri, r, end, w))
        PATHS_FROM[uri][hops] = all_paths
    else: # hops > 1
        all_paths = []
        previous_paths = get_all_paths_from(uri, hops - 1)
        for p in previous_paths:
            one_hop_paths = get_all_paths_from(p.end, 1)
            for unit_path in one_hop_paths:
                if p.form_single_path_with(unit_path):
                    all_paths.append(Path.concatenate(p, unit_path))
        PATHS_FROM[uri][hops] = all_paths
    print('Memorise all paths from %s with hops = %d, number of paths = %d' % (uri, hops, len(PATHS_FROM[uri][hops])))
                    
    return PATHS_FROM[uri][hops]

In [11]:
def get_kg_vectors_for_a_class(uri, max_hops, rel_list):
    total_rel = len(rel_list)
    vector_size = int(((total_rel ** (max_hops + 1)) - 1)/(total_rel - 1)) # Total size of vector
    all_paths = []
    for i in range(max_hops+1):
        all_paths.extend(get_all_paths_from(uri, i))
    
    end_dict = {}
    for p in all_paths:
        assert p.start == uri, "The start node is not the given class"
        if p.end in end_dict:
            end_dict[p.end].append(p)
        else:
            end_dict[p.end] = [p]
    
    vectors = {}
    for end, paths in end_dict.items():
        v = np.zeros(vector_size) 
        for p in paths:
            if p.edges == []:
                v[-1] += 1
            else:
                v[get_index_from_edges(p.edges, max_hops, rel_list)] += p.average_weight()
        vectors[end] = v
    
    return vectors

In [12]:
def get_index_from_edges(edges, max_hops, rel_list):
    assert len(edges) <= max_hops, "The path is longer than the given max_hops"
    total_rel = len(rel_list)
    
    if edges == []:
        return sum([total_rel ** i for i in range(1, max_hops+1)]) # The last index refers to itself
    
    index = 0
    for i, e in enumerate(reversed(edges)):
        if e not in rel_list:
            assert False, "Found an unsupported relation" + e
        index += (total_rel ** i) * rel_list.index(e)
    return index

In [13]:
def get_kg_vectors_for_classes(class_uri, max_hops, rel_list):
    V = {}
    for uri in class_uri:
        V[uri] = get_kg_vectors_for_a_class(uri, max_hops, rel_list)
        print('Finish producing vectors of class', uri, 'Max hops', max_hops)
    return V

In [None]:
# KG_VECTORS_1 = get_kg_vectors_for_classes(class_uri, max_hops = 1, rel_list = rel_list)
# pickle.dump(KG_VECTORS_1, open("../wordEmbeddings/KG_VECTORS_1.pickle", "wb"))
KG_VECTORS_2 = get_kg_vectors_for_classes(class_uri, max_hops = 2, rel_list = rel_list)
pickle.dump(KG_VECTORS_2, open("../wordEmbeddings/KG_VECTORS_2.pickle", "wb"))
pickle.dump(PATHS_FROM, open("../wordEmbeddings/PATHS_FROM.pickle", "wb"))

Memorise all paths from /c/en/takeover with hops = 1, number of paths = 32
Memorise all paths from /c/en/third_party_processor with hops = 1, number of paths = 9
Memorise all paths from /c/en/through_ticketing with hops = 1, number of paths = 9
Memorise all paths from /c/en/title_company with hops = 1, number of paths = 13
Memorise all paths from /c/en/tnc with hops = 1, number of paths = 9
Memorise all paths from /c/en/tour_operator with hops = 1, number of paths = 2
Memorise all paths from /c/en/traiteur with hops = 1, number of paths = 3
Memorise all paths from /c/en/travel_agency with hops = 1, number of paths = 16
Memorise all paths from /c/en/troop with hops = 1, number of paths = 99
Memorise all paths from /c/en/troupe with hops = 1, number of paths = 27
Memorise all paths from /c/en/trouper with hops = 1, number of paths = 8
Memorise all paths from /c/en/turnverein with hops = 1, number of paths = 5
Memorise all paths from /c/en/umbrella_company with hops = 1, number of paths =

Memorise all paths from /c/en/sputnik_moment with hops = 1, number of paths = 11
Memorise all paths from /c/en/educations with hops = 1, number of paths = 1
Memorise all paths from /c/en/virtue_most_people_value with hops = 1, number of paths = 1
Memorise all paths from /c/en/untutoredly with hops = 1, number of paths = 2
Memorise all paths from /c/en/abuse with hops = 1, number of paths = 137
Memorise all paths from /c/en/maleducation with hops = 1, number of paths = 5
Memorise all paths from /c/en/tirana with hops = 1, number of paths = 9
Memorise all paths from /c/en/autodidacticism with hops = 1, number of paths = 5
Memorise all paths from /c/en/psychopedagogy with hops = 1, number of paths = 4
Memorise all paths from /c/en/extracurricular_activity with hops = 1, number of paths = 3
Memorise all paths from /c/en/buenos_aires with hops = 1, number of paths = 16
Memorise all paths from /c/en/law with hops = 1, number of paths = 733
Memorise all paths from /c/en/kilkenny with hops = 1

Memorise all paths from /c/en/didactic with hops = 1, number of paths = 33
Memorise all paths from /c/en/benighted with hops = 1, number of paths = 14
Memorise all paths from /c/en/lore with hops = 1, number of paths = 34
Memorise all paths from /c/en/cesg with hops = 1, number of paths = 20
Memorise all paths from /c/en/homework with hops = 1, number of paths = 32
Memorise all paths from /c/en/educationless with hops = 1, number of paths = 3
Memorise all paths from /c/en/teaching with hops = 1, number of paths = 111
Memorise all paths from /c/en/film with hops = 1, number of paths = 1185
Memorise all paths from /c/en/mastering with hops = 1, number of paths = 5
Memorise all paths from /c/en/information with hops = 1, number of paths = 668
Memorise all paths from /c/en/student with hops = 1, number of paths = 358
Memorise all paths from /c/en/general_practitioner with hops = 1, number of paths = 23
Memorise all paths from /c/en/fact with hops = 1, number of paths = 245
Memorise all pat

Memorise all paths from /c/en/gallery_opening with hops = 1, number of paths = 39
Memorise all paths from /c/en/groomsian with hops = 1, number of paths = 3
Memorise all paths from /c/en/stuccoer with hops = 1, number of paths = 2
Memorise all paths from /c/en/master with hops = 1, number of paths = 224
Memorise all paths from /c/en/cinquecentist with hops = 1, number of paths = 10
Memorise all paths from /c/en/concretist with hops = 1, number of paths = 4
Memorise all paths from /c/en/keyframe with hops = 1, number of paths = 19
Memorise all paths from /c/en/claude_glass with hops = 1, number of paths = 6
Memorise all paths from /c/en/concert with hops = 1, number of paths = 147
Memorise all paths from /c/en/decorator with hops = 1, number of paths = 8
Memorise all paths from /c/en/ornamentalist with hops = 1, number of paths = 3
Memorise all paths from /c/en/vorticist with hops = 1, number of paths = 2
Memorise all paths from /c/en/maestro with hops = 1, number of paths = 10
Memorise

Memorise all paths from /c/en/cubist with hops = 1, number of paths = 11
Memorise all paths from /c/en/aquatinter with hops = 1, number of paths = 2
Memorise all paths from /c/en/pavement_artist with hops = 1, number of paths = 5
Memorise all paths from /c/en/photographer with hops = 1, number of paths = 42
Memorise all paths from /c/en/bohemian with hops = 1, number of paths = 31
Memorise all paths from /c/en/leonardeschi with hops = 1, number of paths = 4
Memorise all paths from /c/en/actor with hops = 1, number of paths = 237
Memorise all paths from /c/en/singing_telegram with hops = 1, number of paths = 4
Memorise all paths from /c/en/art_person with hops = 1, number of paths = 2
Memorise all paths from /c/en/person with hops = 1, number of paths = 3579
Memorise all paths from /c/en/white_soul with hops = 1, number of paths = 5
Memorise all paths from /c/en/verve with hops = 1, number of paths = 21
Memorise all paths from /c/en/self_portrait with hops = 1, number of paths = 11
Memo

Memorise all paths from /c/en/john_napier with hops = 1, number of paths = 2
Memorise all paths from /c/en/redshirt with hops = 1, number of paths = 24
Memorise all paths from /c/en/athleticism with hops = 1, number of paths = 6
Memorise all paths from /c/en/athletism with hops = 1, number of paths = 1
Memorise all paths from /c/en/volleyball_player with hops = 1, number of paths = 4
Memorise all paths from /c/en/free_agent with hops = 1, number of paths = 7
Memorise all paths from /c/en/corinthian with hops = 1, number of paths = 32
Memorise all paths from /c/en/fit with hops = 1, number of paths = 310
Memorise all paths from /c/en/sports_card with hops = 1, number of paths = 8
Memorise all paths from /c/en/skateboarder with hops = 1, number of paths = 17
Memorise all paths from /c/en/hurler with hops = 1, number of paths = 10
Memorise all paths from /c/en/carb_up with hops = 1, number of paths = 2
Memorise all paths from /c/en/coxswain with hops = 1, number of paths = 13
Memorise all

Memorise all paths from /c/en/weightlifter with hops = 1, number of paths = 12
Memorise all paths from /c/en/windsurfer with hops = 1, number of paths = 14
Memorise all paths from /c/en/triathletes with hops = 1, number of paths = 1
Memorise all paths from /c/en/sweater with hops = 1, number of paths = 57
Memorise all paths from /c/en/pentathlete with hops = 1, number of paths = 6
Memorise all paths from /c/en/road_running with hops = 1, number of paths = 2
Memorise all paths from /c/en/paratriathlete with hops = 1, number of paths = 2
Memorise all paths from /c/en/ironwoman with hops = 1, number of paths = 3
Memorise all paths from /c/en/amateur with hops = 1, number of paths = 77
Memorise all paths from /c/en/heptathlete with hops = 1, number of paths = 5
Memorise all paths from /c/en/tennis_player with hops = 1, number of paths = 27
Memorise all paths from /c/en/formula_one_racer with hops = 1, number of paths = 6
Memorise all paths from /c/en/relayer with hops = 1, number of paths 

Memorise all paths from /c/en/rabbi with hops = 1, number of paths = 57
Memorise all paths from /c/en/apposer with hops = 1, number of paths = 6
Memorise all paths from /c/en/undermate with hops = 1, number of paths = 1
Memorise all paths from /c/en/roster with hops = 1, number of paths = 19
Memorise all paths from /c/en/lieutenant_colonel with hops = 1, number of paths = 16
Memorise all paths from /c/en/person_with_authority with hops = 1, number of paths = 1
Memorise all paths from /c/en/groom with hops = 1, number of paths = 70
Memorise all paths from /c/en/whiffler with hops = 1, number of paths = 12
Memorise all paths from /c/en/lictor with hops = 1, number of paths = 5
Memorise all paths from /c/en/defensor with hops = 1, number of paths = 9
Memorise all paths from /c/en/sailing_master with hops = 1, number of paths = 1
Memorise all paths from /c/en/mariner with hops = 1, number of paths = 25
Memorise all paths from /c/en/aide with hops = 1, number of paths = 18
Memorise all path

Memorise all paths from /c/en/chief_legal_officer with hops = 1, number of paths = 6
Memorise all paths from /c/en/kirkman with hops = 1, number of paths = 4
Memorise all paths from /c/en/long_arm_of_law with hops = 1, number of paths = 5
Memorise all paths from /c/en/purveyor with hops = 1, number of paths = 15
Memorise all paths from /c/en/smokey_bear with hops = 1, number of paths = 17
Memorise all paths from /c/en/cuinage with hops = 1, number of paths = 7
Memorise all paths from /c/en/officerless with hops = 1, number of paths = 2
Memorise all paths from /c/en/servant with hops = 1, number of paths = 466
Memorise all paths from /c/en/operations_planner with hops = 1, number of paths = 2
Memorise all paths from /c/en/dispatch with hops = 1, number of paths = 43
Memorise all paths from /c/en/brevetcy with hops = 1, number of paths = 3
Memorise all paths from /c/en/officers with hops = 1, number of paths = 8
Memorise all paths from /c/en/board_of_appeals with hops = 1, number of path

Memorise all paths from /c/en/wardroom with hops = 1, number of paths = 9
Memorise all paths from /c/en/justiciary with hops = 1, number of paths = 50
Memorise all paths from /c/en/counter_roll with hops = 1, number of paths = 6
Memorise all paths from /c/en/mountie with hops = 1, number of paths = 5
Memorise all paths from /c/en/gangbuster with hops = 1, number of paths = 4
Memorise all paths from /c/en/cut with hops = 1, number of paths = 797
Memorise all paths from /c/en/halfpike with hops = 1, number of paths = 6
Memorise all paths from /c/en/oinker with hops = 1, number of paths = 15
Memorise all paths from /c/en/decurion with hops = 1, number of paths = 9
Memorise all paths from /c/en/officership with hops = 1, number of paths = 1
Memorise all paths from /c/en/penitentiary with hops = 1, number of paths = 38
Memorise all paths from /c/en/bureaucracy with hops = 1, number of paths = 70
Memorise all paths from /c/en/flag_officer with hops = 1, number of paths = 19
Memorise all path

Memorise all paths from /c/en/aquaglyceroporin with hops = 1, number of paths = 4
Memorise all paths from /c/en/particular with hops = 1, number of paths = 171
Memorise all paths from /c/en/flatbed_truck with hops = 1, number of paths = 9
Memorise all paths from /c/en/car_plane with hops = 1, number of paths = 1
Memorise all paths from /c/en/skip_hoist with hops = 1, number of paths = 9
Memorise all paths from /c/en/deliver with hops = 1, number of paths = 221
Memorise all paths from /c/en/train_tube with hops = 1, number of paths = 1
Memorise all paths from /c/en/railroad with hops = 1, number of paths = 143
Memorise all paths from /c/en/transportedness with hops = 1, number of paths = 1
Memorise all paths from /c/en/penal_colony with hops = 1, number of paths = 9
Memorise all paths from /c/en/hang_gliding with hops = 1, number of paths = 11
Memorise all paths from /c/en/line_haul with hops = 1, number of paths = 4
Memorise all paths from /c/en/taxiplane with hops = 1, number of paths

Memorise all paths from /c/en/fardage with hops = 1, number of paths = 3
Memorise all paths from /c/en/railhead with hops = 1, number of paths = 21
Memorise all paths from /c/en/greater_manchester with hops = 1, number of paths = 81
Memorise all paths from /c/en/carry with hops = 1, number of paths = 243
Memorise all paths from /c/en/sea with hops = 1, number of paths = 561
Memorise all paths from /c/en/dirac_cone with hops = 1, number of paths = 5
Memorise all paths from /c/en/passive_transport with hops = 1, number of paths = 7
Memorise all paths from /c/en/train with hops = 1, number of paths = 360
Memorise all paths from /c/en/transhydrogenase with hops = 1, number of paths = 6
Memorise all paths from /c/en/emotion with hops = 1, number of paths = 389
Memorise all paths from /c/en/chad with hops = 1, number of paths = 62
Memorise all paths from /c/en/paddy_wagon with hops = 1, number of paths = 9
Memorise all paths from /c/en/move_in with hops = 1, number of paths = 3
Memorise all 

Memorise all paths from /c/en/gig_bag with hops = 1, number of paths = 6
Memorise all paths from /c/en/overhead with hops = 1, number of paths = 63
Memorise all paths from /c/en/iminoglycinuria with hops = 1, number of paths = 15
Memorise all paths from /c/en/earthmover with hops = 1, number of paths = 6
Memorise all paths from /c/en/plasmodesma with hops = 1, number of paths = 10
Memorise all paths from /c/en/tórshavn with hops = 1, number of paths = 4
Memorise all paths from /c/en/lifeline with hops = 1, number of paths = 19
Memorise all paths from /c/en/car with hops = 1, number of paths = 823
Memorise all paths from /c/en/port with hops = 1, number of paths = 242
Memorise all paths from /c/en/landing_craft with hops = 1, number of paths = 18
Memorise all paths from /c/en/getting_around with hops = 1, number of paths = 5
Memorise all paths from /c/en/write with hops = 1, number of paths = 239
Memorise all paths from /c/en/high_density_lipoprotein with hops = 1, number of paths = 12


Memorise all paths from /c/en/train_truck with hops = 1, number of paths = 1
Memorise all paths from /c/en/transportive with hops = 1, number of paths = 2
Memorise all paths from /c/en/trains with hops = 1, number of paths = 21
Memorise all paths from /c/en/strong with hops = 1, number of paths = 239
Memorise all paths from /c/en/trucks with hops = 1, number of paths = 16
Memorise all paths from /c/en/towing with hops = 1, number of paths = 17
Memorise all paths from /c/en/slaver with hops = 1, number of paths = 16
Memorise all paths from /c/en/car_purpose with hops = 1, number of paths = 1
Memorise all paths from /c/en/move_something with hops = 1, number of paths = 1
Memorise all paths from /c/en/jetload with hops = 1, number of paths = 2
Memorise all paths from /c/en/bagman with hops = 1, number of paths = 13
Memorise all paths from /c/en/transported with hops = 1, number of paths = 16
Memorise all paths from /c/en/karaj with hops = 1, number of paths = 5
Memorise all paths from /c/

Memorise all paths from /c/en/loft with hops = 1, number of paths = 94
Memorise all paths from /c/en/main_entrance with hops = 1, number of paths = 8
Memorise all paths from /c/en/mouse with hops = 1, number of paths = 371
Memorise all paths from /c/en/movie_theater_seat with hops = 1, number of paths = 10
Memorise all paths from /c/en/nightclub with hops = 1, number of paths = 84
Memorise all paths from /c/en/offices with hops = 1, number of paths = 20
Memorise all paths from /c/en/parking_structure with hops = 1, number of paths = 10
Memorise all paths from /c/en/parlor with hops = 1, number of paths = 32
Memorise all paths from /c/en/passageway with hops = 1, number of paths = 52
Memorise all paths from /c/en/platform with hops = 1, number of paths = 252
Memorise all paths from /c/en/playroom with hops = 1, number of paths = 19
Memorise all paths from /c/en/post_office with hops = 1, number of paths = 104
Memorise all paths from /c/en/radio_studio with hops = 1, number of paths = 7


Memorise all paths from /c/en/convent with hops = 1, number of paths = 77
Memorise all paths from /c/en/crystal_palace with hops = 1, number of paths = 3
Memorise all paths from /c/en/dakota with hops = 1, number of paths = 15
Memorise all paths from /c/en/department_store with hops = 1, number of paths = 85
Memorise all paths from /c/en/detached_house with hops = 1, number of paths = 12
Memorise all paths from /c/en/dormitory with hops = 1, number of paths = 81
Memorise all paths from /c/en/duplex with hops = 1, number of paths = 50
Memorise all paths from /c/en/eiffel_tower with hops = 1, number of paths = 18
Memorise all paths from /c/en/eldorado with hops = 1, number of paths = 8
Memorise all paths from /c/en/empire_state_building with hops = 1, number of paths = 9
Memorise all paths from /c/en/farm_building with hops = 1, number of paths = 7
Memorise all paths from /c/en/farmhouse with hops = 1, number of paths = 19
Memorise all paths from /c/en/feedlot with hops = 1, number of pa

Memorise all paths from /c/en/sydney_opera_house with hops = 1, number of paths = 7
Memorise all paths from /c/en/taliesin with hops = 1, number of paths = 4
Memorise all paths from /c/en/tall_building with hops = 1, number of paths = 13
Memorise all paths from /c/en/tavern with hops = 1, number of paths = 50
Memorise all paths from /c/en/telecom_hotel with hops = 1, number of paths = 1
Memorise all paths from /c/en/temple with hops = 1, number of paths = 228
Memorise all paths from /c/en/thatched_roof_building with hops = 1, number of paths = 2
Memorise all paths from /c/en/theater with hops = 1, number of paths = 202
Memorise all paths from /c/en/train_station with hops = 1, number of paths = 84
Memorise all paths from /c/en/transamerica_pyramid with hops = 1, number of paths = 4
Memorise all paths from /c/en/trianon with hops = 1, number of paths = 8
Memorise all paths from /c/en/trinity with hops = 1, number of paths = 58
Memorise all paths from /c/en/tuileries_palace with hops = 1

Memorise all paths from /c/en/doors_windows with hops = 1, number of paths = 4
Memorise all paths from /c/en/dwelling with hops = 1, number of paths = 134
Memorise all paths from /c/en/generic with hops = 1, number of paths = 173
Memorise all paths from /c/en/human with hops = 1, number of paths = 1669
Memorise all paths from /c/en/human_construction with hops = 1, number of paths = 2
Memorise all paths from /c/en/is_structure with hops = 1, number of paths = 1
Memorise all paths from /c/en/office_skyscraper with hops = 1, number of paths = 1
Memorise all paths from /c/en/scrapers with hops = 1, number of paths = 2
Memorise all paths from /c/en/sky with hops = 1, number of paths = 357
Memorise all paths from /c/en/sky_scrapers with hops = 1, number of paths = 1
Memorise all paths from /c/en/tall with hops = 1, number of paths = 117
Memorise all paths from /c/en/tall_structure with hops = 1, number of paths = 2
Memorise all paths from /c/en/building_code with hops = 1, number of paths =

Memorise all paths from /c/en/ecclesia with hops = 1, number of paths = 8
Memorise all paths from /c/en/ecolodge with hops = 1, number of paths = 3
Memorise all paths from /c/en/edifice with hops = 1, number of paths = 32
Memorise all paths from /c/en/edified with hops = 1, number of paths = 2
Memorise all paths from /c/en/eightplex with hops = 1, number of paths = 5
Memorise all paths from /c/en/empire with hops = 1, number of paths = 180
Memorise all paths from /c/en/epigraph with hops = 1, number of paths = 20
Memorise all paths from /c/en/exercise with hops = 1, number of paths = 490
Memorise all paths from /c/en/exposed_services with hops = 1, number of paths = 7
Memorise all paths from /c/en/fabric with hops = 1, number of paths = 632
Memorise all paths from /c/en/facadectomy with hops = 1, number of paths = 7
Memorise all paths from /c/en/face_brick with hops = 1, number of paths = 2
Memorise all paths from /c/en/fall_out_shelter with hops = 1, number of paths = 3
Memorise all p

Memorise all paths from /c/en/make with hops = 1, number of paths = 373
Memorise all paths from /c/en/mansard with hops = 1, number of paths = 11
Memorise all paths from /c/en/mast_climbing with hops = 1, number of paths = 6
Memorise all paths from /c/en/masthouse with hops = 1, number of paths = 3
Memorise all paths from /c/en/material with hops = 1, number of paths = 795
Memorise all paths from /c/en/matroneum with hops = 1, number of paths = 5
Memorise all paths from /c/en/matter with hops = 1, number of paths = 396
Memorise all paths from /c/en/mausoleum with hops = 1, number of paths = 14


In [29]:
pickle.dump(PATHS_FROM, open("../wordEmbeddings/PATHS_FROM.pickle", "wb"))

In [63]:
"""
v = get_kg_vectors_for_a_class('/c/en/company', 1, rel_list)
pp.pprint(v)
"""

{'/c/en/3m': array([0. , 0.5, 0. , 0. , 0. , 0. , 0. , 0. ]),
 '/c/en/abandonware': array([0., 0., 0., 0., 0., 0., 1., 0.]),
 '/c/en/abb': array([0., 1., 0., 0., 0., 0., 0., 0.]),
 '/c/en/abbreviate': array([0.   , 0.   , 0.   , 0.   , 0.   , 0.   , 0.216, 0.   ]),
 '/c/en/abbreviate_co': array([0.   , 0.   , 0.   , 0.   , 0.   , 0.   , 0.216, 0.   ]),
 '/c/en/abbreviated': array([0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.73, 0.  ]),
 '/c/en/abbreviated_co': array([0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.73, 0.  ]),
 '/c/en/abc': array([0., 0., 0., 0., 0., 0., 1., 0.]),
 '/c/en/acc': array([0. , 0.5, 0. , 0. , 0. , 0. , 0. , 0. ]),
 '/c/en/access': array([0. , 0.5, 0. , 0. , 0. , 0. , 0. , 0. ]),
 '/c/en/accolade': array([0. , 0.5, 0. , 0. , 0. , 0. , 0. , 0. ]),
 '/c/en/accompany': array([0., 0., 0., 0., 0., 0., 1., 0.]),
 '/c/en/acquire': array([0. , 0.5, 0. , 0. , 0. , 0. , 0. , 0. ]),
 '/c/en/acquiree': array([0., 0., 0., 0., 0., 0., 1., 0.]),
 '/c/en/adidas': array([0. , 0.5, 0. , 0. , 

 '/c/en/golden_parachute': array([0., 0., 0., 0., 0., 0., 1., 0.]),
 '/c/en/goliath': array([0. , 0.5, 0. , 0. , 0. , 0. , 0. , 0. ]),
 '/c/en/good': array([0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.15, 0.  ]),
 '/c/en/good_to_go': array([0. , 0.5, 0. , 0. , 0. , 0. , 0. , 0. ]),
 '/c/en/goods': array([0., 0., 0., 0., 0., 0., 1., 0.]),
 '/c/en/google': array([0. , 0.5, 0. , 0. , 0. , 0. , 0. , 0. ]),
 '/c/en/gravity': array([0. , 0.5, 0. , 0. , 0. , 0. , 0. , 0. ]),
 '/c/en/group': array([0.   , 0.   , 0.   , 0.   , 0.   , 0.   , 2.615, 0.   ]),
 '/c/en/group_people': array([0.   , 0.   , 0.   , 0.   , 0.   , 0.   , 0.318, 0.   ]),
 '/c/en/gucci': array([0. , 0.5, 0. , 0. , 0. , 0. , 0. , 0. ]),
 '/c/en/guest': array([0.   , 0.   , 0.   , 0.   , 0.   , 0.   , 0.278, 0.   ]),
 '/c/en/guests': array([0.   , 0.   , 0.   , 0.   , 0.   , 0.   , 0.806, 0.   ]),
 '/c/en/guests_visitors': array([0. , 0. , 0. , 0. , 0. , 0. , 0.1, 0. ]),
 '/c/en/guinea_pig_director': array([0., 0., 0., 0., 0., 0., 

In [8]:
"""
pp.pprint(get_neighbors('/c/en/company'))
"""

{'/c/en/3m': {'/rr/IsA': 0.5},
 '/c/en/abandonware': {'/r/RelatedTo': 1.0},
 '/c/en/abb': {'/rr/IsA': 1.0},
 '/c/en/abbreviate': {'/r/RelatedTo': 0.216},
 '/c/en/abbreviate_co': {'/r/RelatedTo': 0.216},
 '/c/en/abbreviated': {'/r/RelatedTo': 0.73},
 '/c/en/abbreviated_co': {'/r/RelatedTo': 0.73},
 '/c/en/abc': {'/r/RelatedTo': 1.0},
 '/c/en/acc': {'/rr/IsA': 0.5},
 '/c/en/access': {'/rr/IsA': 0.5},
 '/c/en/accolade': {'/rr/IsA': 0.5},
 '/c/en/accompany': {'/r/RelatedTo': 1.0},
 '/c/en/acquire': {'/rr/IsA': 0.5},
 '/c/en/acquiree': {'/r/RelatedTo': 1.0},
 '/c/en/adidas': {'/rr/IsA': 0.5},
 '/c/en/administration': {'/r/RelatedTo': 1.0},
 '/c/en/aeon': {'/rr/IsA': 0.5},
 '/c/en/aeros': {'/rr/IsA': 0.5},
 '/c/en/aerostar': {'/rr/IsA': 0.5},
 '/c/en/aetna': {'/rr/IsA': 0.5},
 '/c/en/agco': {'/rr/IsA': 0.5},
 '/c/en/agency': {'/r/RelatedTo': 0.25},
 '/c/en/agent_provocateur': {'/rr/IsA': 0.5},
 '/c/en/ahold': {'/rr/IsA': 0.5},
 '/c/en/airbud': {'/rr/IsA': 1.0},
 '/c/en/airbus': {'/rr/IsA': 1

In [78]:
"""
pp.pprint(find_edges_of('/c/en/company'))
"""

[{'obj': '/c/en/armed_forces',
  'rel': '/r/AtLocation',
  'sub': '/c/en/company',
  'weight': 1.0},
 {'obj': '/c/en/city',
  'rel': '/r/AtLocation',
  'sub': '/c/en/company',
  'weight': 1.0},
 {'obj': '/c/en/country',
  'rel': '/r/AtLocation',
  'sub': '/c/en/company',
  'weight': 1.0},
 {'obj': '/c/en/market_place',
  'rel': '/r/AtLocation',
  'sub': '/c/en/company',
  'weight': 1.0},
 {'obj': '/c/en/phone_book',
  'rel': '/r/AtLocation',
  'sub': '/c/en/company',
  'weight': 1.0},
 {'obj': '/c/en/yellow_pages',
  'rel': '/r/AtLocation',
  'sub': '/c/en/company',
  'weight': 2.0},
 {'obj': '/c/en/company',
  'rel': '/r/AtLocation',
  'sub': '/c/en/connection',
  'weight': 1.0},
 {'obj': '/c/en/company',
  'rel': '/r/AtLocation',
  'sub': '/c/en/employees',
  'weight': 3.464},
 {'obj': '/c/en/company',
  'rel': '/r/AtLocation',
  'sub': '/c/en/humans',
  'weight': 1.0},
 {'obj': '/c/en/company',
  'rel': '/r/AtLocation',
  'sub': '/c/en/leader',
  'weight': 1.0},
 {'obj': '/c/en/comp

  'sub': '/c/en/jive/n',
  'weight': 0.5},
 {'obj': '/c/en/company/n',
  'rel': '/r/IsA',
  'sub': '/c/en/jockey_club',
  'weight': 0.5},
 {'obj': '/c/en/company/n',
  'rel': '/r/IsA',
  'sub': '/c/en/john_deere',
  'weight': 0.5},
 {'obj': '/c/en/company/n',
  'rel': '/r/IsA',
  'sub': '/c/en/joint_stock_company/n',
  'weight': 2.0},
 {'obj': '/c/en/company/n',
  'rel': '/r/IsA',
  'sub': '/c/en/judd/n',
  'weight': 0.5},
 {'obj': '/c/en/company/n',
  'rel': '/r/IsA',
  'sub': '/c/en/junkers',
  'weight': 0.5},
 {'obj': '/c/en/company/n', 'rel': '/r/IsA', 'sub': '/c/en/jvc', 'weight': 0.5},
 {'obj': '/c/en/company/n', 'rel': '/r/IsA', 'sub': '/c/en/k_1', 'weight': 0.5},
 {'obj': '/c/en/company/n',
  'rel': '/r/IsA',
  'sub': '/c/en/k_line',
  'weight': 0.5},
 {'obj': '/c/en/company/n', 'rel': '/r/IsA', 'sub': '/c/en/k_s', 'weight': 0.5},
 {'obj': '/c/en/company/n',
  'rel': '/r/IsA',
  'sub': '/c/en/kappa/n',
  'weight': 0.5},
 {'obj': '/c/en/company/n',
  'rel': '/r/IsA',
  'sub': '/

  'sub': '/c/en/board',
  'weight': 0.437},
 {'obj': '/c/en/company',
  'rel': '/r/RelatedTo',
  'sub': '/c/en/board_meeting/n',
  'weight': 1.0},
 {'obj': '/c/en/company',
  'rel': '/r/RelatedTo',
  'sub': '/c/en/board_member/n',
  'weight': 1.0},
 {'obj': '/c/en/company',
  'rel': '/r/RelatedTo',
  'sub': '/c/en/body/n',
  'weight': 1.0},
 {'obj': '/c/en/company',
  'rel': '/r/RelatedTo',
  'sub': '/c/en/body_corporate/n',
  'weight': 1.0},
 {'obj': '/c/en/company',
  'rel': '/r/RelatedTo',
  'sub': '/c/en/booly/n',
  'weight': 1.0},
 {'obj': '/c/en/company',
  'rel': '/r/RelatedTo',
  'sub': '/c/en/boss',
  'weight': 0.102},
 {'obj': '/c/en/company',
  'rel': '/r/RelatedTo',
  'sub': '/c/en/boss/n',
  'weight': 1.0},
 {'obj': '/c/en/company',
  'rel': '/r/RelatedTo',
  'sub': '/c/en/brand_avatar/n',
  'weight': 1.0},
 {'obj': '/c/en/company',
  'rel': '/r/RelatedTo',
  'sub': '/c/en/brandwidth/n',
  'weight': 1.0},
 {'obj': '/c/en/company',
  'rel': '/r/RelatedTo',
  'sub': '/c/en/b

 {'obj': '/c/en/company',
  'rel': '/r/RelatedTo',
  'sub': '/c/en/intalk/n',
  'weight': 1.0},
 {'obj': '/c/en/company',
  'rel': '/r/RelatedTo',
  'sub': '/c/en/intercompany/a',
  'weight': 1.0},
 {'obj': '/c/en/company',
  'rel': '/r/RelatedTo',
  'sub': '/c/en/interoffice/a',
  'weight': 1.0},
 {'obj': '/c/en/company',
  'rel': '/r/RelatedTo',
  'sub': '/c/en/intracompany/a',
  'weight': 1.0},
 {'obj': '/c/en/company',
  'rel': '/r/RelatedTo',
  'sub': '/c/en/intrapreneur/n',
  'weight': 1.0},
 {'obj': '/c/en/company',
  'rel': '/r/RelatedTo',
  'sub': '/c/en/introduce',
  'weight': 0.148},
 {'obj': '/c/en/company',
  'rel': '/r/RelatedTo',
  'sub': '/c/en/invoice',
  'weight': 0.101},
 {'obj': '/c/en/company',
  'rel': '/r/RelatedTo',
  'sub': '/c/en/ipcc',
  'weight': 1.0},
 {'obj': '/c/en/company',
  'rel': '/r/RelatedTo',
  'sub': '/c/en/issue/n',
  'weight': 1.0},
 {'obj': '/c/en/company',
  'rel': '/r/RelatedTo',
  'sub': '/c/en/jeep',
  'weight': 0.227},
 {'obj': '/c/en/comp

In [18]:
"""
pa = Path.create_path_from_edge('/c/en/beavertails', '/r/IsA', '/c/en/company', 0.5)
pb = Path.create_path_from_edge('/c/en/company', '/r/AtLocation', '/c/en/city', 1.0)
print(pa)
print(pb)
if pa.form_single_path_with(pb):
    pc = Path.concatenate(pa, pb)
    print(pc)
    print(pc.is_simple_path(), pc.average_weight())
    print(pc.start, pc.end, pc.length)
"""

/c/en/beavertails$/r/IsA$/c/en/company (0.5)
/c/en/company$/r/AtLocation$/c/en/city (1.0)
/c/en/beavertails$/r/IsA$/c/en/company$/r/AtLocation$/c/en/city (0.5,1.0)
True 0.7071067811865476
/c/en/beavertails /c/en/city 2


In [2]:
"""
class_uri = ['/c/en/company',
            '/c/en/education',
            '/c/en/artist',
            '/c/en/athlete',
            '/c/en/officer',
            '/c/en/transport',
            '/c/en/building',
            '/c/en/nature',
            '/c/en/village',
            '/c/en/animal',
            '/c/en/plant',
            '/c/en/album',
            '/c/en/film',
            '/c/en/writing']
rel_uri = ['/r/IsA', '/r/PartOf', '/r/AtLocation', '/r/RelatedTo']
"""

In [55]:
"""
def add_edges_of(uri, edges = {}, rel = None):
    url_string = 'http://api.conceptnet.io/query?node=' + uri + '&other=/c/en'
    if rel is not None:
        url_string += '&rel=' + rel
    try:
        r = requests.get(url_string)
        json_data = r.json()
    except JSONDecodeError:
        print(r.text)
        print('Cannot decode the json')
        sys.exit(0)
    edges = process_edges(edges, uri, json_data['edges'])
    next_page = 'http://api.conceptnet.io' + json_data['view']['nextPage'] if 'view' in json_data else None # whether it has next page
    while next_page is not None:
        try:
            r = requests.get(next_page)
            json_data = r.json()
        except JSONDecodeError:
            print(r.text)
            print('Cannot decode the json')
            sys.exit(0)
        edges = process_edges(edges, uri, json_data['edges'])
        if 'view' in json_data and 'nextPage' in json_data['view']:
            next_page = 'http://api.conceptnet.io' + json_data['view']['nextPage']  
        else:
            next_page = None # whether it has next page
    return edges
"""

In [64]:
"""
def process_edges(a_dict, uri, edge_data):
    for e in edge_data:
        sub = e['start']['term']
        rel = e['rel']['@id']
        obj = e['end']['term']
        weight = e['weight']
        if sub == uri:
            if obj in a_dict:
                if rel in a_dict[obj]:
                    a_dict[obj][rel] = max(a_dict[obj][rel], weight)
                else:
                    a_dict[obj][rel] = weight
            else:
                a_dict[obj] = { rel:weight }
        elif obj == uri:
            if rel != '/r/RelatedTo': # Bi-directional
                rel = rel.replace('/r/', '/rr/', 1)
            if sub in a_dict:
                if rel in a_dict[sub]:
                    a_dict[sub][rel] = max(a_dict[sub][rel], weight)
                else:
                    a_dict[sub][rel] = weight
            else:
                a_dict[sub] = { rel:weight }
        else:
            print(e)
            assert False, "This edge does not belong to the given uri"
    return a_dict  
"""

In [72]:
"""
def get_neighbors(uri):
    neighbors = {}
    for rel in rel_uri:
        neighbors = add_edges_of(uri, edges = neighbors, rel = rel)
        print('Finish running', uri, rel)
    return neighbors
"""