# Importing Packages

In [1]:
!pip install -r requirements.txt

In [2]:
import tensorflow as tf
import numpy as np
import pandas as pd
import ampligraph
import pickle
from ampligraph.evaluation import evaluate_performance
from ampligraph.evaluation import train_test_split_no_unseen 
from ampligraph.utils import save_model
from ampligraph.utils import restore_model
from ampligraph.datasets import load_from_csv
from ampligraph.latent_features import ComplEx
# from google.colab import files
# from google.colab import drive
#drive.mount('/content/drive')

print(f'• Tensorflow version: {tf.__version__}')
print(f'• GPU device name: {tf.test.gpu_device_name()}')
print(f'• Num GPUs Available: {len(tf.config.experimental.list_physical_devices("GPU"))}')
print(f'• Ampligraph version: {ampligraph.__version__}')

• Tensorflow version: 1.15.3
• GPU device name: 
• Num GPUs Available: 0
• Ampligraph version: 1.3.2


In [None]:
drive.mount('/content/drive')

In [3]:
def pickle_out(name, obj, default_path='./sub_result/'):
    pickle_out = open(default_path + name + '.pickle','wb')
    pickle.dump(obj, pickle_out)
    pickle_out.close()
    return

def pickle_in(name, default_path='./sub_result/'):
    pickle_in = open(default_path + name + '.pickle','rb')
    obj = pickle.load(pickle_in)
    pickle_in.close()
    return obj

# Importing the dataset

In [6]:
X = load_from_csv('.', 'kg_triples_freesound.txt', sep='\t')
X_train = load_from_csv('./drive/My Drive', 'train_fs_re.txt', sep='\t')
X_test = load_from_csv('./drive/My Drive', 'test_fs_re.txt', sep='\t')

# Loading the model

In [None]:
#model = restore_model(model_name_path = 'complex_model_opt.pkl')
#model = restore_model(model_name_path = 'complex_model_opt_mll.pkl')

In [None]:
entities = np.unique(np.concatenate([X[:, 0], X[:, 2]]))
entities

In [None]:
relations = np.unique(X[:, 1])
relations

# Defining train and test datasets

In [5]:
# we create a 20% test set split
X_train, X_test = train_test_split_no_unseen(X, test_size=int(len(X)*.2), 
                                             seed=0, allow_duplication=False)

#### Saving train/test set

In [None]:
with open('./train.txt', 'w') as f:
    for (s, p, o) in X_train:
        f.write(s + '\t' + p + '\t' + o + '\n')

In [None]:
with open('./test.txt', 'w') as f:
    for (s, p, o) in X_test:
        f.write(s + '\t' + p + '\t' + o + '\n')

In [None]:
print('Train set size: ', X_train.shape)
print('Test set size: ', X_test.shape)

# Training with tuned params

In [7]:
EmbeddingMethod = ComplEx
positives_filter = X

model = EmbeddingMethod(batches_count=40, 
                seed=0, 
                epochs=1000, 
                k=350, 
                eta=5,
                optimizer='adam', 
                optimizer_params={'lr':1e-5},
                loss='multiclass_nll', 
                regularizer='LP', 
                regularizer_params={'p':3, 'lambda':1e-7}, 
                verbose=False)

with tf.device('/device:GPU:0'):
    tf.logging.set_verbosity(tf.logging.ERROR)
    model.fit(X_train, early_stopping = False)

save_model(model, model_name_path='complex_model_opt_fs.pkl')
#files.download('complex_model_opt.pkl') 

# Evaluation

In [8]:
entities_subset = np.unique(X_test[X_test[:, 1]=='downloaded', 2])
positives_filter = X
with tf.device('/device:GPU:0'):
    tf.logging.set_verbosity(tf.logging.ERROR)
    ranks = evaluate_performance(X_test, 
                              model=model,
                              entities_subset=entities_subset,
                              filter_triples=positives_filter,
                              corrupt_side='s,o',
                              #use_default_protocol=True,
                              verbose=True)
    
ranks_name = 'ranks_complex_opt_fs'
pickle_out(ranks_name, ranks)

In [9]:
ranks = pickle_in('ranks_complex_opt_fs')

In [10]:
from ampligraph.evaluation import mr_score, mrr_score, hits_at_n_score

mrr = mrr_score(ranks)
print("MRR: %.2f" % (mrr))

hits_10 = hits_at_n_score(ranks, n=10)
print("Hits@10: %.2f" % (hits_10))
hits_3 = hits_at_n_score(ranks, n=3)
print("Hits@3: %.2f" % (hits_3))
hits_1 = hits_at_n_score(ranks, n=1)
print("Hits@1: %.2f" % (hits_1))

MRR: 0.53
Hits@10: 0.53
Hits@3: 0.53
Hits@1: 0.52
