In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
import os
os.chdir('/content/drive/Shared drives/TecnologieSemantiche')

#Initialization

In [None]:
!pip install ampligraph
!pip install tensorflow==1.13.2

In [None]:
import numpy as np
import pandas as pd
import ampligraph
import tensorflow as tf
import requests
from ampligraph.datasets import load_from_csv

ampligraph.__version__

#EX1: Load Ampligraph Dataset

In [None]:
'''import requests
from ampligraph.datasets import load_from_csv
url = 'https://ampligraph.s3-eu-west-1.amazonaws.com/datasets/GoT.csv'
open('GoT.csv', 'wb').write(requests.get(url).content) 
X_ = load_from_csv('.', 'GoT.csv', sep=',')
X_'''


#Load Our Dataset

##Generate dbpedia URIs for the dataset 

In [None]:
##Uncomment next lines if you need
import csv 

df = pd.read_csv('Triple.csv', names=["subject","relation","object"])

dbpedia = "http://dbpedia.org/resource/"

for index,row in df.iterrows():
  s = dbpedia + row['subject'].replace(" ","_")
  p = dbpedia + row['relation'].replace(" ","_")
  o = dbpedia + row['object'].replace(" ","_")
  
  with open('HP_uri.csv', 'a+',newline='') as f:
      writer = csv.writer(f)
      writer.writerow([s,p,o])
      f.close()


In [None]:
with open('HP_uri_2.csv', 'rb') as f:
  X = load_from_csv('.','HP_uri_2.csv', sep=',')
  #X = X.to_numpy()
X

array([['http://dbpedia.org/resource#subject',
        'http://dbpedia.org/resource#relation',
        'http://dbpedia.org/resource#object'],
       ['http://dbpedia.org/resource#central_character',
        'http://dbpedia.org/resource#is',
        'http://dbpedia.org/resource#Harry_Potter'],
       ['http://dbpedia.org/resource#central_character',
        'http://dbpedia.org/resource#is',
        'http://dbpedia.org/resource#wizard'],
       ...,
       ['http://dbpedia.org/resource#Harry',
        'http://dbpedia.org/resource#awakens_faces',
        'http://dbpedia.org/resource#Voldemort'],
       ['http://dbpedia.org/resource#curse',
        'http://dbpedia.org/resource#rebounds',
        'http://dbpedia.org/resource#Harry'],
       ['http://dbpedia.org/resource#lives',
        'http://dbpedia.org/resource#surviving',
        'http://dbpedia.org/resource#characters_effects_Voldemort']],
      dtype=object)

#Train/Test

In [None]:
from ampligraph.evaluation import train_test_split_no_unseen 

num_test = int(len(X) * (20 / 100))

data = {}
data['train'], data['test'] = train_test_split_no_unseen(X, test_size=num_test, seed=0, allow_duplication=False) 

print('Train set size: ', data['train'].shape)
print('Test set size: ', data['test'].shape)

Train set size:  (861, 3)
Test set size:  (215, 3)


In [None]:
from ampligraph.latent_features import HolE

#optimizer
#adam     MRR = 0.44

#loss
#pairwise         MRR = 0.36

#regularizer
#'p':2  MRR = 0.44

#lr
#1e-4   MRR = 0.36
model = HolE(batches_count=100, 
                seed=0, 
                epochs=200, 
                k=150, 
                eta=5,
                optimizer='adam', 
                optimizer_params={'lr':1e-2}, 
                loss='multiclass_nll', 
                regularizer='LP', 
                regularizer_params={'p':3, 'lambda':1e-5}, 
                verbose=True)

#Other models

In [None]:
from ampligraph.latent_features import ComplEx

#MRR = 0.37
model = ComplEx(batches_count=100, 
                seed=0, 
                epochs=200, 
                k=150, 
                eta=5,
                optimizer='adam', 
                optimizer_params={'lr':1e-3}, #va peggio per 1e-2 e 1e-5
                loss='multiclass_nll', 
                regularizer='LP', 
                regularizer_params={'p':3, 'lambda':1e-5}, 
                verbose=True)

In [None]:
from ampligraph.latent_features import TransE

#MRR = 0.21
model = TransE(batches_count=100, 
                seed=0, 
                epochs=200, 
                k=150, 
                eta=5,
                optimizer='adam', 
                optimizer_params={'lr':1e-3}, 
                loss='multiclass_nll', 
                regularizer='LP', 
                regularizer_params={'p':3, 'lambda':1e-5}, 
                verbose=True)

In [None]:
from ampligraph.latent_features import DistMult

#MRR= 0.34
model = DistMult(batches_count=100, 
                seed=0, 
                epochs=200, 
                k=150, 
                eta=5,
                optimizer='adam', 
                optimizer_params={'lr':1e-3}, 
                loss='multiclass_nll', 
                regularizer='LP', 
                regularizer_params={'p':3, 'lambda':1e-5}, 
                verbose=True)

#Fit

In [None]:
tf.logging.set_verbosity(tf.logging.ERROR)

model.fit(data['train'], early_stopping = False)

Average HolE Loss:   0.275317: 100%|██████████| 200/200 [04:08<00:00,  1.24s/epoch]


#Evaluate Test

In [None]:
from ampligraph.evaluation import evaluate_performance

filter_triples = np.concatenate((data['train'], data['test']))
ranks = evaluate_performance(data['test'], 
                             model=model, 
                             filter_triples=filter_triples,   # Corruption strategy filter defined above 
                             use_default_protocol=True, # corrupt subj and obj separately while evaluating
                             verbose=True)



100%|██████████| 215/215 [00:01<00:00, 139.79it/s]


In [None]:
from ampligraph.evaluation import mr_score, mrr_score, hits_at_n_score

mrr = mrr_score(ranks)
print("MRR: %.2f" % (mrr))

hits_10 = hits_at_n_score(ranks, n=10)
print("Hits@10: %.2f" % (hits_10))
hits_3 = hits_at_n_score(ranks, n=3)
print("Hits@3: %.2f" % (hits_3))
hits_1 = hits_at_n_score(ranks, n=1)
print("Hits@1: %.2f" % (hits_1))

MRR: 0.73
Hits@10: 0.77
Hits@3: 0.75
Hits@1: 0.71


#Evaluate Unseen triples

In [None]:
#GoT Unseen Triples
X_unseen = np.array([
    ['http://dbpedia.org/resource/Jorah_Mormont','http://dbpedia.org/resource/SPOUSE','http://dbpedia.org/resource/Daenerys_Targaryen'],
    ['http://dbpedia.org/resource/Tyrion_Lannister','http://dbpedia.org/resource/SPOUSE','http://dbpedia.org/resource/Missandei'],
    ["http://dbpedia.org/resource/King's_Landing",'http://dbpedia.org/resource/SEAT_OF','http://dbpedia.org/resource/House_Lannister_of_Casterly_Rock'],
    ['http://dbpedia.org/resource/Sansa_Stark','http://dbpedia.org/resource/SPOUSE','http://dbpedia.org/resource/Petyr_Baelish'],
    ['http://dbpedia.org/resource/Daenerys_Targaryen','http://dbpedia.org/resource/SPOUSE','http://dbpedia.org/resource/Jon_Snow'],
    ['http://dbpedia.org/resource/Daenerys_Targaryen','http://dbpedia.org/resource/SPOUSE','http://dbpedia.org/resource/Craster'],
    ['http://dbpedia.org/resource/House_Stark_of_Winterfell','http://dbpedia.org/resource/IN_REGION','http://dbpedia.org/resource/The_North'],
    ['http://dbpedia.org/resource/House_Stark_of_Winterfell','http://dbpedia.org/resource/IN_REGION','http://dbpedia.org/resource/Dorne'],
    ['http://dbpedia.org/resource/House_Tyrell_of_Highgarden','http://dbpedia.org/resource/IN_REGION','http://dbpedia.org/resource/Beyond_the_Wall'],
    ['http://dbpedia.org/resource/Brandon_Stark','http://dbpedia.org/resource/ALLIED_WITH','http://dbpedia.org/resource/House_Stark_of_Winterfell'],
    ['http://dbpedia.org/resource/Brandon_Stark','http://dbpedia.org/resource/ALLIED_WITH','http://dbpedia.org/resource/House_Lannister_of_Casterly_Rock'],    
    ['http://dbpedia.org/resource/Rhaegar_Targaryen','http://dbpedia.org/resource/PARENT_OF','http://dbpedia.org/resource/Jon_Snow'],
    ['http://dbpedia.org/resource/House_Hutcheson','http://dbpedia.org/resource/SWORN_TO','http://dbpedia.org/resource/House_Tyrell_of_Highgarden'],
    ['http://dbpedia.org/resource/Daenerys_Targaryen','http://dbpedia.org/resource/ALLIED_WITH','http://dbpedia.org/resource/House_Stark_of_Winterfell'],
    ['http://dbpedia.org/resource/Daenerys_Targaryen','http://dbpedia.org/resource/ALLIED_WITH','http://dbpedia.org/resource/House_Lannister_of_Casterly_Rock'],
    ['http://dbpedia.org/resource/Jaime_Lannister','http://dbpedia.org/resource/PARENT_OF','http://dbpedia.org/resource/Myrcella_Baratheon'],
    ['http://dbpedia.org/resource/Robert_I_Baratheon','http://dbpedia.org/resource/PARENT_OF','http://dbpedia.org/resource/Myrcella_Baratheon'],
    ['http://dbpedia.org/resource/Cersei_Lannister','http://dbpedia.org/resource/PARENT_OF','http://dbpedia.org/resource/Myrcella_Baratheon'],
    ['http://dbpedia.org/resource/Cersei_Lannister','http://dbpedia.org/resource/PARENT_OF','http://dbpedia.org/resource/Brandon_Stark'],
    ["http://dbpedia.org/resource/Tywin_Lannister",'http://dbpedia.org/resource/PARENT_OF','http://dbpedia.org/resource/Jaime_Lannister'],
    ["http://dbpedia.org/resource/Missandei",'http://dbpedia.org/resource/SPOUSE','http://dbpedia.org/resource/Grey_Worm'],
    ["http://dbpedia.org/resource/Brienne_of_Tarth",'http://dbpedia.org/resource/SPOUSE','http://dbpedia.org/resource/Jaime_Lannister']
])

In [None]:
#HP_unseen triples

df = pd.read_csv('Unseen_uri.csv', names=["subject","relation","object"])
X_unseen = df.to_numpy()

In [None]:
unseen_filter = np.array(list({tuple(i) for i in np.vstack((filter_triples, X_unseen))}))

ranks_unseen = evaluate_performance(
    X_unseen, 
    model=model, 
    filter_triples=unseen_filter,   # Corruption strategy filter defined above 
    corrupt_side = 's+o',
    use_default_protocol=False, # corrupt subj and obj separately while evaluating
    verbose=True
)

scores = model.predict(X_unseen)

100%|██████████| 35/35 [00:00<00:00, 106.72it/s]


In [None]:
from scipy.special import expit
probs = expit(scores)

output = pd.DataFrame(list(zip([' '.join(x) for x in X_unseen], 
                      ranks_unseen, 
                      np.squeeze(scores),
                      np.squeeze(probs))), 
             columns=['statement', 'rank', 'score', 'prob']).sort_values("score")

output

Unnamed: 0,statement,rank,score,prob
11,http://dbpedia.org/resource#Ron http://dbpedia...,1523,-0.638364,0.345616
26,http://dbpedia.org/resource#Harry http://dbped...,1496,-0.517893,0.373345
34,http://dbpedia.org/resource#Lupin http://dbped...,1573,-0.495625,0.378569
32,http://dbpedia.org/resource#Snape http://dbped...,1448,-0.35088,0.413169
13,http://dbpedia.org/resource#Harry http://dbped...,1217,-0.294509,0.4269
5,http://dbpedia.org/resource#Harry_Potter http:...,1552,-0.260742,0.435181
25,http://dbpedia.org/resource#Tom_Marvolo_Riddle...,1378,-0.167682,0.458177
22,http://dbpedia.org/resource#Ron http://dbpedia...,1197,-0.136934,0.46582
14,http://dbpedia.org/resource#various_memories h...,1137,-0.090618,0.477361
7,http://dbpedia.org/resource#wizarding_Britain ...,1126,-0.089227,0.477708


#Save Evaluated statements in nt format

In [None]:
##Convert triples in nt format
import pandas as pd 

lines = []
th = 1000

for index, row in output.iterrows():
  if row['rank'] < th:
      #print(row['prob'])
      statement = row['statement']
      subject,predicate,objectt = statement.split(" ")
      s = "<" + subject.replace(" ","_") + ">"
      p = "<" + predicate.replace(" ","_") + ">"
      o = "<" + objectt.replace(" ","_") + "> ."
      lines.append(s + " " + p + " " + o)

with open('triples.nt', 'w+') as f:
    f.write('\n'.join(lines))