In [4]:
import requests # For REST calls
import json # for modelling objects in the JSON format

In [7]:
# This line opens the file from the file system, the file is in the same folder of the notebook and it is opened in "read-only mode"
with open("config.json", 'r') as json_file:
    config = json.load(json_file) # load the json object inside the config file
    KEY = config['d4science_KEY'] # this is the key we will be using for REST calls

In [8]:
TAGME_ENDPOINT = "https://tagme.d4science.org/tagme/tag"
LANG = "en" # Also works in italian and german

In [9]:
# The URL where the relatedness is given
ENDPOINT_RELATEDNESS = "https://tagme.d4science.org/tagme/rel"

# In case I need efficiency I can do batch queries of 100 couples per HTTP call
def query_relatedness(e1, e2):
    # Entities require underscores in-place of the spaces. The space is between entity one and entity two
    tt = e1.replace(" ", "_") + " " + e2.replace(" ", "_")
    payload = {"tt": tt, "gcube-token": KEY, "lang": LANG}
    r = requests.post(ENDPOINT_RELATEDNESS, payload)
    if r.status_code != 200:
        raise Exception("Error on relatedness computation: {}\n{}".format(tt, r.text))
    return r.json()

In [18]:
first = query_relatedness("Roberta Metsola", "SAU")
second = query_relatedness("Roberta Metsola", "Malta")
print(first['result'])
print(second['result'])

[{'couple': 'Roberta_Metsola SAU', 'err': 'Unable to parse second title'}]
[{'couple': 'Roberta_Metsola Malta', 'rel': 0.4323458671569824}]


In [19]:
import json
from wikipedia2vec import Wikipedia2Vec
from sklearn.metrics.pairwise import cosine_similarity
import numpy as np

MODEL_FILE = "enwiki_20180420_100d.pkl"

wiki2vec = Wikipedia2Vec.load(MODEL_FILE)

   0.11309847]
 [-0.12473089 -0.6455471   0.13622157 ... -0.6347396   0.5410465
   0.32159257]
 [-0.05129661 -0.4987064  -0.09563554 ... -0.30148467  0.24280211
  -0.08308805]
 ...
 [-1.0583687  -0.83086014 -0.4427682  ... -1.0160557  -0.02469787
   0.99688977]
 [-1.0772408  -0.32139003 -0.79673034 ... -1.3150353  -0.12155519
   0.57841384]
 [-0.86319923 -0.5425662  -0.5506818  ... -1.1069874   0.26995388
  -1.15518689e-01  1.95024401e-01]
 [ 1.06650688e-01 -1.39282122e-01  1.28345221e-01 ... -2.58541018e-01
   1.06945192e-03  4.36867744e-01]
 [ 2.41883561e-01 -3.22016366e-02 -1.21213362e-01 ...  2.10319590e-02
  -3.88098627e-01 -7.62230903e-02]
 ...
 [-6.40646815e-01  2.35682666e-01  2.19560400e-01 ...  1.07549876e-01
  -1.31226122e+00  1.51679659e+00]
 [-2.97203183e-01  2.59723127e-01  2.77167350e-01 ... -1.70121565e-01
  -1.67249894e+00  1.12165082e+00]
 [-6.73093140e-01  2.12552413e-01  4.16892409e-01 ...  2.12317891e-02
 [649936 302186]
 [869658 382059]
 ...
 [     0      0]
 [   

In [20]:
def get_entity_vector(e):
    try:
        emb = wiki2vec.get_entity_vector(e)
    except:
        raise Exception("Entity vector {} not found\n".format(e))
    return emb

def similarity(v1, v2):
    x = np.array(v1).reshape(1,-1)
    y = np.array(v2).reshape(1,-1)
    return cosine_similarity(x, y)[0][0]

In [46]:
v1 = ("European Parliament", get_entity_vector("European Parliament"))
v2 = ("Green Deal", get_entity_vector("Green Deal"))
v3 = ("Agriculture", get_entity_vector("Agriculture"))
v1

('European Parliament',
 memmap([ 0.32559958, -0.49910313,  1.0043432 ,  0.01733444, -0.12877917,
         -0.3833318 ,  0.9526427 ,  0.21671446, -0.699357  , -0.46911693,
         -0.4539235 ,  0.25402513,  0.5264579 , -1.3997765 ,  0.38581952,
          0.13243324, -0.25360656,  0.6015653 , -0.10148839, -0.24216361,
         -0.9591717 ,  2.1751957 , -0.7027072 , -0.1774086 , -0.53077173,
          0.2637302 ,  0.11219589, -1.2789522 ,  0.50882083,  0.61073834,
         -0.72055393, -0.28003865,  1.2260796 ,  1.1812795 , -0.00586406,
         -0.8349619 ,  0.5994383 ,  0.6358481 ,  0.7769877 , -0.16194658,
         -0.05701641, -0.9635012 ,  1.1561038 ,  0.01084604,  0.04601151,
         -1.3398298 , -0.11817376,  0.16577002,  0.58518296, -0.1766347 ,
         -0.1857903 ,  0.07901498, -0.34180877, -0.7495053 ,  0.549753  ,
          1.0031368 , -0.3288226 ,  0.06679147, -0.14424427,  0.03914331,
          0.5324565 ,  0.5775707 , -1.750892  ,  1.2654934 ,  1.9484026 ,
         -0.76

In [47]:
print("======================================================================")
from itertools import combinations
for x, y in combinations([v1, v2, v3], 2):
    print("Cosine similarity between {} and {} is {:.2f}".format(x[0], y[0], similarity(x[1], y[1])))

Cosine similarity between European Parliament and Green Deal is 0.39
Cosine similarity between European Parliament and Agriculture is 0.26
Cosine similarity between Green Deal and Agriculture is 0.24
