# AMPLIGRAPH TUTORIAL

## Section 2.1

In [1]:
import numpy as np
import pandas as pd
import ampligraph
import requests
from ampligraph.datasets import load_from_csv

In [2]:
ampligraph.__version__

'1.4.0'

In [3]:
url = 'https://ampligraph.s3-eu-west-1.amazonaws.com/datasets/GoT.csv'
open('GoT.csv', 'wb').write(requests.get(url).content)
X = load_from_csv('.', 'GoT.csv', sep=',')
X[:5, ]

array([['Smithyton', 'SEAT_OF', 'House Shermer of Smithyton'],
       ['House Mormont of Bear Island', 'LED_BY', 'Maege Mormont'],
       ['Margaery Tyrell', 'SPOUSE', 'Joffrey Baratheon'],
       ['Maron Nymeros Martell', 'ALLIED_WITH',
        'House Nymeros Martell of Sunspear'],
       ['House Gargalen of Salt Shore', 'IN_REGION', 'Dorne']],
      dtype=object)

In [4]:
entities = np.unique(np.concatenate([X[:, 0], X[:, 2]]))
entities

array(['Abelar Hightower', 'Acorn Hall', 'Addam Frey', ..., 'the Antlers',
       'the Paps', 'unnamed tower'], dtype=object)

In [5]:
relations = np.unique(X[:, 1])
relations

array(['ALLIED_WITH', 'BRANCH_OF', 'FOUNDED_BY', 'HEIR_TO', 'IN_REGION',
       'LED_BY', 'PARENT_OF', 'SEAT_OF', 'SPOUSE', 'SWORN_TO'],
      dtype=object)

In [6]:
X_train, X_test = X[:3000], X[3000:]

In [7]:
print('Train set size: ', X_train.shape)
print('Test set size: ', X_test.shape)

Train set size:  (3000, 3)
Test set size:  (175, 3)


In [8]:
from ampligraph.latent_features import ComplEx, TransE, DistMult

In [9]:
model_cex = ComplEx(batches_count=100, 
                seed=0, 
                epochs=200, 
                k=150, 
                eta=5,
                optimizer='adam', 
                optimizer_params={'lr':1e-3},
                loss='multiclass_nll', 
                regularizer='LP', 
                regularizer_params={'p':3, 'lambda':1e-5}, 
                verbose=True)

In [10]:
model_te = TransE(batches_count=100, 
                seed=0, 
                epochs=200, 
                k=150, 
                eta=5,
                optimizer='adam', 
                optimizer_params={'lr':1e-3},
                loss='multiclass_nll', 
                regularizer='LP', 
                regularizer_params={'p':3, 'lambda':1e-5}, 
                verbose=True)

In [11]:
model_dm = DistMult(batches_count=100, 
                seed=0, 
                epochs=200, 
                k=150, 
                eta=5,
                optimizer='adam', 
                optimizer_params={'lr':1e-3},
                loss='multiclass_nll', 
                regularizer='LP', 
                regularizer_params={'p':3, 'lambda':1e-5}, 
                verbose=True)

In [12]:
positives_filter = X

In [13]:
import tensorflow as tf
tf.logging.set_verbosity(tf.logging.ERROR)

model_cex.fit(X_train, early_stopping = False)
model_te.fit(X_train, early_stopping = False)
model_dm.fit(X_train, early_stopping = False)

2022-10-17 10:36:40.364861: I tensorflow/core/platform/cpu_feature_guard.cc:142] Your CPU supports instructions that this TensorFlow binary was not compiled to use: AVX2 FMA
2022-10-17 10:36:40.380194: I tensorflow/compiler/xla/service/service.cc:168] XLA service 0x7f94f3591d40 initialized for platform Host (this does not guarantee that XLA will be used). Devices:
2022-10-17 10:36:40.380212: I tensorflow/compiler/xla/service/service.cc:176]   StreamExecutor device (0): Host, Default Version
Average ComplEx Loss:   0.032386: 100%|██████████| 200/200 [02:42<00:00,  1.23epoch/s]
Average TransE Loss:   0.046736: 100%|██████████| 200/200 [00:57<00:00,  3.50epoch/s]
Average DistMult Loss:   0.031232: 100%|██████████| 200/200 [00:53<00:00,  3.75epoch/s]


In [14]:
from ampligraph.latent_features import save_model, restore_model

In [15]:
save_model(model_cex, './best_model_cex.pkl')
save_model(model_te, './best_model_te.pkl')
save_model(model_dm, './best_model_dm.pkl')

In [16]:
from ampligraph.evaluation import evaluate_performance

In [17]:
ranks_cex = evaluate_performance(X_test, 
                             model=model_cex, 
                             filter_triples=positives_filter,   # Corruption strategy filter defined above 
                             use_default_protocol=True, # corrupt subj and obj separately while evaluating
                             verbose=True)

ranks_te = evaluate_performance(X_test, 
                             model=model_te, 
                             filter_triples=positives_filter,   # Corruption strategy filter defined above 
                             use_default_protocol=True, # corrupt subj and obj separately while evaluating
                             verbose=True)

ranks_dm = evaluate_performance(X_test, 
                             model=model_dm, 
                             filter_triples=positives_filter,   # Corruption strategy filter defined above 
                             use_default_protocol=True, # corrupt subj and obj separately while evaluating
                             verbose=True)



100%|██████████| 119/119 [00:01<00:00, 73.97it/s]




100%|██████████| 119/119 [00:00<00:00, 306.43it/s]




100%|██████████| 119/119 [00:00<00:00, 309.82it/s]


In [18]:
from ampligraph.evaluation import mr_score, mrr_score, hits_at_n_score

In [19]:
mrr_cex = mrr_score(ranks_cex)
mrr_te = mrr_score(ranks_te)
mrr_dm = mrr_score(ranks_dm)
print("MRR for CompleX: %.2f" % (mrr_cex))
print("MRR for TransE: %.2f" % (mrr_te))
print("MRR for DistMult: %.2f" % (mrr_dm))
print("")


hits_10_cex = hits_at_n_score(ranks_cex, n=10)
hits_10_te = hits_at_n_score(ranks_te, n=10)
hits_10_dm = hits_at_n_score(ranks_dm, n=10)
print("Hits@10 for CompleX: %.2f" % (hits_10_cex))
print("Hits@10 for TransE: %.2f" % (hits_10_te))
print("Hits@10 for DistMult: %.2f" % (hits_10_dm))
print("")

hits_3_cex = hits_at_n_score(ranks_cex, n=3)
hits_3_te = hits_at_n_score(ranks_te, n=3)
hits_3_dm = hits_at_n_score(ranks_dm, n=3)
print("Hits@3 for CompleX: %.2f" % (hits_3_cex))
print("Hits@3 for TransE: %.2f" % (hits_3_te))
print("Hits@3 for DistMult: %.2f" % (hits_3_dm))
print("")


hits_1_cex = hits_at_n_score(ranks_cex, n=1)
hits_1_te = hits_at_n_score(ranks_te, n=1)
hits_1_dm = hits_at_n_score(ranks_dm, n=1)
print("Hits@1 for CompleX: %.2f" % (hits_1_cex))
print("Hits@1 for TransE: %.2f" % (hits_1_te))
print("Hits@1 for DistMult: %.2f" % (hits_1_dm))
print("")

MRR for CompleX: 0.49
MRR for TransE: 0.36
MRR for DistMult: 0.47

Hits@10 for CompleX: 0.62
Hits@10 for TransE: 0.55
Hits@10 for DistMult: 0.60

Hits@3 for CompleX: 0.52
Hits@3 for TransE: 0.43
Hits@3 for DistMult: 0.51

Hits@1 for CompleX: 0.43
Hits@1 for TransE: 0.22
Hits@1 for DistMult: 0.40



In [20]:
X_unseen = np.array([
    ['Jorah Mormont', 'SPOUSE', 'Daenerys Targaryen'],
    ['Tyrion Lannister', 'SPOUSE', 'Missandei'],
    ["King's Landing", 'SEAT_OF', 'House Lannister of Casterly Rock'],
    ['Sansa Stark', 'SPOUSE', 'Petyr Baelish'],
    ['Daenerys Targaryen', 'SPOUSE', 'Jon Snow'],
    ['Daenerys Targaryen', 'SPOUSE', 'Craster'],
    ['House Stark of Winterfell', 'IN_REGION', 'The North'],
    ['House Stark of Winterfell', 'IN_REGION', 'Dorne'],
    ['House Tyrell of Highgarden', 'IN_REGION', 'Beyond the Wall'],
    ['Brandon Stark', 'ALLIED_WITH', 'House Stark of Winterfell'],
    ['Brandon Stark', 'ALLIED_WITH', 'House Lannister of Casterly Rock'],    
    ['Rhaegar Targaryen', 'PARENT_OF', 'Jon Snow'],
    ['House Hutcheson', 'SWORN_TO', 'House Tyrell of Highgarden'],
    ['Daenerys Targaryen', 'ALLIED_WITH', 'House Stark of Winterfell'],
    ['Daenerys Targaryen', 'ALLIED_WITH', 'House Lannister of Casterly Rock'],
    ['Jaime Lannister', 'PARENT_OF', 'Myrcella Baratheon'],
    ['Robert I Baratheon', 'PARENT_OF', 'Myrcella Baratheon'],
    ['Cersei Lannister', 'PARENT_OF', 'Myrcella Baratheon'],
    ['Cersei Lannister', 'PARENT_OF', 'Brandon Stark'],
    ["Tywin Lannister", 'PARENT_OF', 'Jaime Lannister'],
    ["Missandei", 'SPOUSE', 'Grey Worm'],
    ["Brienne of Tarth", 'SPOUSE', 'Jaime Lannister']
])

In [21]:
unseen_filter = np.array(list({tuple(i) for i in np.vstack((positives_filter, X_unseen))}))

In [22]:
ranks_unseen_cex = evaluate_performance(
    X_unseen, 
    model=model_cex, 
    filter_triples=unseen_filter,   # Corruption strategy filter defined above 
    corrupt_side = 's+o',
    use_default_protocol=False, # corrupt subj and obj separately while evaluating
    verbose=True
)

ranks_unseen_te = evaluate_performance(
    X_unseen, 
    model=model_te, 
    filter_triples=unseen_filter,   # Corruption strategy filter defined above 
    corrupt_side = 's+o',
    use_default_protocol=False, # corrupt subj and obj separately while evaluating
    verbose=True
)

ranks_unseen_dm = evaluate_performance(
    X_unseen, 
    model=model_dm, 
    filter_triples=unseen_filter,   # Corruption strategy filter defined above 
    corrupt_side = 's+o',
    use_default_protocol=False, # corrupt subj and obj separately while evaluating
    verbose=True
)

100%|██████████| 22/22 [00:00<00:00, 78.01it/s]
100%|██████████| 22/22 [00:00<00:00, 197.12it/s]
100%|██████████| 22/22 [00:00<00:00, 195.50it/s]


In [23]:
ranks_unseen_cex

array([2410, 1993,  672,  471, 2469, 3298,    1, 1696, 3646,    1,  978,
        865,    2, 1242,  738,  774,  263,   21,  420,   13,   85,  282],
      dtype=int32)

In [24]:
scores_cex = model_cex.predict(X_unseen)
scores_te = model_te.predict(X_unseen)
scores_dm = model_dm.predict(X_unseen)

In [25]:
from scipy.special import expit

In [26]:
probs_cex = expit(scores_cex)
probs_te = expit(scores_te)
probs_dm = expit(scores_dm)

In [27]:
pd.DataFrame(list(zip([' '.join(x) for x in X_unseen], 
                      ranks_unseen_cex, 
                      np.squeeze(scores_cex),
                      np.squeeze(probs_cex))), 
             columns=['statement', 'rank', 'score', 'prob']).sort_values("score")

Unnamed: 0,statement,rank,score,prob
8,House Tyrell of Highgarden IN_REGION Beyond th...,3646,-0.865583,0.296174
5,Daenerys Targaryen SPOUSE Craster,3298,-0.720684,0.327242
4,Daenerys Targaryen SPOUSE Jon Snow,2469,-0.243896,0.439326
13,Daenerys Targaryen ALLIED_WITH House Stark of ...,1242,-0.180674,0.454954
1,Tyrion Lannister SPOUSE Missandei,1993,-0.073245,0.481697
0,Jorah Mormont SPOUSE Daenerys Targaryen,2410,-0.010857,0.497286
7,House Stark of Winterfell IN_REGION Dorne,1696,0.001867,0.500467
10,Brandon Stark ALLIED_WITH House Lannister of C...,978,0.058437,0.514605
14,Daenerys Targaryen ALLIED_WITH House Lannister...,738,0.082403,0.520589
11,Rhaegar Targaryen PARENT_OF Jon Snow,865,0.282471,0.570152


In [28]:
pd.DataFrame(list(zip([' '.join(x) for x in X_unseen], 
                      ranks_unseen_te, 
                      np.squeeze(scores_te),
                      np.squeeze(probs_dm))), 
             columns=['statement', 'rank', 'score', 'prob']).sort_values("score")

Unnamed: 0,statement,rank,score,prob
8,House Tyrell of Highgarden IN_REGION Beyond th...,1986,-31.161249,0.799314
5,Daenerys Targaryen SPOUSE Craster,2419,-30.02351,0.627172
7,House Stark of Winterfell IN_REGION Dorne,386,-25.503752,0.439759
18,Cersei Lannister PARENT_OF Brandon Stark,1133,-25.293221,0.641591
2,King's Landing SEAT_OF House Lannister of Cast...,205,-25.09688,0.703194
15,Jaime Lannister PARENT_OF Myrcella Baratheon,995,-24.875906,0.601366
0,Jorah Mormont SPOUSE Daenerys Targaryen,1058,-23.70072,0.377389
16,Robert I Baratheon PARENT_OF Myrcella Baratheon,65,-23.57692,0.78465
4,Daenerys Targaryen SPOUSE Jon Snow,592,-23.25827,0.722639
6,House Stark of Winterfell IN_REGION The North,26,-22.809088,0.97298


In [29]:
pd.DataFrame(list(zip([' '.join(x) for x in X_unseen], 
                      ranks_unseen_dm, 
                      np.squeeze(scores_dm),
                      np.squeeze(probs_dm))), 
             columns=['statement', 'rank', 'score', 'prob']).sort_values("score")

Unnamed: 0,statement,rank,score,prob
14,Daenerys Targaryen ALLIED_WITH House Lannister...,3653,-1.506466,0.181463
10,Brandon Stark ALLIED_WITH House Lannister of C...,2978,-0.74121,0.32274
0,Jorah Mormont SPOUSE Daenerys Targaryen,2870,-0.500647,0.377389
1,Tyrion Lannister SPOUSE Missandei,2482,-0.303652,0.424665
7,House Stark of Winterfell IN_REGION Dorne,2160,-0.242142,0.439759
21,Brienne of Tarth SPOUSE Jaime Lannister,2047,-0.107624,0.47312
19,Tywin Lannister PARENT_OF Jaime Lannister,2361,-0.045968,0.48851
11,Rhaegar Targaryen PARENT_OF Jon Snow,1732,0.03292,0.508229
17,Cersei Lannister PARENT_OF Myrcella Baratheon,1289,0.270324,0.567172
15,Jaime Lannister PARENT_OF Myrcella Baratheon,635,0.411162,0.601366


In [30]:
from ampligraph.utils import create_tensorboard_visualizations

In [31]:
create_tensorboard_visualizations(model_cex, 'GoT_embeddings_cex')
create_tensorboard_visualizations(model_te, 'GoT_embeddings_te')
create_tensorboard_visualizations(model_dm, 'GoT_embeddings_dm')

In [32]:
!tensorboard --logdir=./visualizations

Traceback (most recent call last):
  File "/Users/devyanbiswas/Desktop/DSCI558/Homework/558/bin/tensorboard", line 10, in <module>
    sys.exit(run_main())
  File "/Users/devyanbiswas/Desktop/DSCI558/Homework/558/lib/python3.7/site-packages/tensorboard/main.py", line 58, in run_main
    default.get_plugins() + default.get_dynamic_plugins(),
  File "/Users/devyanbiswas/Desktop/DSCI558/Homework/558/lib/python3.7/site-packages/tensorboard/default.py", line 110, in get_dynamic_plugins
    for entry_point in pkg_resources.iter_entry_points('tensorboard_plugins')
  File "/Users/devyanbiswas/Desktop/DSCI558/Homework/558/lib/python3.7/site-packages/tensorboard/default.py", line 110, in <listcomp>
    for entry_point in pkg_resources.iter_entry_points('tensorboard_plugins')
  File "/Users/devyanbiswas/Desktop/DSCI558/Homework/558/lib/python3.7/site-packages/pkg_resources/__init__.py", line 2470, in load
    self.require(*args, **kwargs)
  File "/Users/devyanbiswas/Desktop/DSCI558/Homework/558/l

## 2.2

NOTE: "The model returns a rank of 7. This tells us that while it’s not the highest likelihood true statement (which would be given a rank 1), it’s pretty likely." tells us that lowest will be most likely

In [33]:
def relation_pred(subject, object, model):
    possible_triples = list()
    for relation in relations:
        temp = list()
        temp.append(subject)
        temp.append(relation)
        temp.append(object)
        possible_triples.append(temp)

    possible_triples = np.array(possible_triples)

    unseen_filter = np.array(list({tuple(i) for i in np.vstack((possible_triples, X_unseen))}))
    
    ranks_unseen = evaluate_performance(
        possible_triples, 
        model=model, 
        filter_triples=unseen_filter,   # Corruption strategy filter defined above 
        corrupt_side = 's+o',
        use_default_protocol=False, # corrupt subj and obj separately while evaluating
        verbose=True
    )
    scores = model.predict(possible_triples)
    probs = expit(scores)

    df = pd.DataFrame(list(zip(possible_triples, 
                      ranks_unseen, 
                      np.squeeze(scores),
                      np.squeeze(probs))), 
             columns=['statement', 'rank', 'score', 'prob']).sort_values("score")

    row = df[df['rank']==df['rank'].min()]
    top_relation = row['statement'].tolist()[0][1]

    return top_relation


In [34]:
cex_relation_pred = relation_pred('Jorah Mormont', 'Daenerys Targaryen', model_cex)
te_relation_pred = relation_pred('Jorah Mormont', 'Daenerys Targaryen', model_te)
dm_relation_pred = relation_pred('Jorah Mormont', 'Daenerys Targaryen', model_dm)

print(cex_relation_pred)
print(te_relation_pred)
print(dm_relation_pred)

100%|██████████| 10/10 [00:00<00:00, 45.91it/s]
100%|██████████| 10/10 [00:00<00:00, 101.77it/s]
100%|██████████| 10/10 [00:00<00:00, 106.55it/s]

ALLIED_WITH
SPOUSE
FOUNDED_BY





## 2.3

In [35]:
from ampligraph.discovery import find_nearest_neighbours

In [36]:

arya_stark_neighbors_cex = find_nearest_neighbours(model_cex, entities=['Arya Stark'], n_neighbors=6)[0][0][1:]
arya_stark_neighbors_te = find_nearest_neighbours(model_te, entities=['Arya Stark'], n_neighbors=6)[0][0][1:]
arya_stark_neighbors_dm = find_nearest_neighbours(model_dm, entities=['Arya Stark'], n_neighbors=6)[0][0][1:]
print("CEX ARYA NEIGHBORS", arya_stark_neighbors_cex)
print("TE ARYA NEIGHBORS", arya_stark_neighbors_te)
print("DM ARYA NEIGHBORS", arya_stark_neighbors_dm)

CEX ARYA NEIGHBORS ['Edrick Stark' 'Desmond' 'Gariss' 'Poxy Tym' 'Edwyn Stark']
TE ARYA NEIGHBORS ['Hayhead' 'Jonos Stark' 'TomToo' 'Quent' 'Edrick Stark']
DM ARYA NEIGHBORS ['Desmond' 'Edrick Stark' 'Poxy Tym' 'Kyra' 'Gariss']
