## Install

In [3]:
!pip install 'pykeen[tensorboard]'

Collecting pykeen[tensorboard]
  Downloading pykeen-1.10.2-py3-none-any.whl (703 kB)
[K     |████████████████████████████████| 703 kB 5.6 MB/s eta 0:00:01
[?25hCollecting dataclasses-json
  Downloading dataclasses_json-0.6.6-py3-none-any.whl (28 kB)
Collecting pystow>=0.4.3
  Downloading pystow-0.5.4-py3-none-any.whl (32 kB)
Collecting torch-ppr>=0.0.7
  Downloading torch_ppr-0.0.8-py3-none-any.whl (12 kB)
Collecting click-default-group
  Downloading click_default_group-1.2.4-py2.py3-none-any.whl (4.1 kB)
Collecting more-click
  Downloading more_click-0.1.2-py3-none-any.whl (6.7 kB)
Collecting docdata
  Downloading docdata-0.0.3-py3-none-any.whl (5.8 kB)
Collecting torch-max-mem>=0.1.1
  Downloading torch_max_mem-0.1.3-py3-none-any.whl (10 kB)
Collecting torch>=2.0
  Downloading torch-2.2.2-cp39-none-macosx_10_9_x86_64.whl (150.8 MB)
[K     |████████████████████████████████| 150.8 MB 14.4 MB/s eta 0:00:01
[?25hCollecting more-itertools
  Downloading more_itertools-10.2.0-py3-none-a

Installing collected packages: torch, Mako, typing-inspect, torch-max-mem, more-itertools, marshmallow, colorlog, alembic, torch-ppr, pystow, optuna, more-click, docdata, dataclasses-json, click-default-group, class-resolver, pykeen
  Attempting uninstall: torch
    Found existing installation: torch 1.12.1
    Uninstalling torch-1.12.1:
      Successfully uninstalled torch-1.12.1
Successfully installed Mako-1.3.5 alembic-1.13.1 class-resolver-0.4.3 click-default-group-1.2.4 colorlog-6.8.2 dataclasses-json-0.6.6 docdata-0.0.3 marshmallow-3.21.2 more-click-0.1.2 more-itertools-10.2.0 optuna-3.6.1 pykeen-1.10.2 pystow-0.5.4 torch-2.2.2 torch-max-mem-0.1.3 torch-ppr-0.0.8 typing-inspect-0.9.0


## Librerie

In [20]:
import json
import requests
import os
import pandas as pd
import numpy as np
import tarfile
import tqdm
#from pykeen.pipeline import pipeline
#from pykeen.predict import predict_all
#import torch
#from pykeen.evaluation import RankBasedEvaluator
#from pykeen.triples import TriplesFactory

IQS = False

## Moduli

In [22]:
def fetch_labels_in_batches(ids, batch_size=50):
    # Prepare batches
    batches = [ids[i:i + batch_size] for i in range(0, len(ids), batch_size)]
    
    # Dictionary to hold the results
    labels_dict = {}
    
    for batch in batches:
        # Join IDs with '|' to form a single string for the API request
        batch_ids = '|'.join(batch)
        url = f"https://www.wikidata.org/w/api.php?action=wbgetentities&ids={batch_ids}&format=json&languages=en&props=labels"
        response = requests.get(url)
        data = response.json()
        
        # Extract labels for each entity in the batch
        for wikidata_id in batch:
            try:
                label = data['entities'][wikidata_id]['labels']['en']['value']
                labels_dict[wikidata_id] = label
            except KeyError:
                labels_dict[wikidata_id] = 'Label not found'  # Handle missing labels

    return labels_dict

In [78]:
def fetch_related_items(subject_qid):
    endpoint_url = "https://query.wikidata.org/sparql"
    query = f"""
    SELECT ?item ?itemLabel WHERE {{
      ?item wdt:P31/wdt:P279* wd:{subject_qid}.
      SERVICE wikibase:label {{ bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". }}
    }}
    LIMIT 10
    """

    headers = {"Accept": "application/json"}
    response = requests.get(endpoint_url, headers=headers, params={'query': query, 'format': 'json'})
    
    results = []
    if response.status_code == 200:
        data = response.json()
        results = [{'name': binding['itemLabel']['value'], 'qid': binding['item']['value'].split('/')[-1]}
                   for binding in data['results']['bindings']]

    return results

In [2]:
def download_file(url, destination):
    try:
        response = requests.get(url)
        if response.status_code == 200:
            with open(destination, 'wb') as file:
                file.write(response.content)
            print("Download completato con successo.")
        else:
            print(f"Errore durante il download del file. Codice di stato: {response.status_code}")
    except Exception as e:
        print(f"Si è verificato un errore: {e}")


def decompress_tar_gz(file_path, destination_folder):
    try:

        with tarfile.open(file_path, 'r:gz') as tar:
            if not os.path.exists(destination_folder):
              os.mkdir(destination_folder)

            tar.extractall(destination_folder)
        print("Decompressione completata con successo.")
        return True
    except Exception as e:
        print(f"Si è verificato un errore durante la decompressione: {e}")
        return False


def read_firstlines(file_path, nlines):
    try:
        with open(file_path, 'r') as file:
            lines = file.readlines()[:nlines]
            for line in lines:
                print(line.rstrip())
    except FileNotFoundError:
        print(f"Il file '{file_path}' non esiste.")
    except Exception as e:
        print(f"Si è verificato un errore durante la lettura del file: {e}")


def itemQualityScore(item2search):
    inference_url = 'https://api.wikimedia.org/service/lw/inference/v1/models/wikidatawiki-itemquality:predict'

    try:
        req = requests.post(f"https://www.wikidata.org/w/api.php?action=query&format=json&formatversion=2&prop=revisions|entityterms&titles={item2search}&origin=*")
        jsonReq = req.json()

        data = {"rev_id": jsonReq['query']['pages'][0]['revisions'][0]['revid'] }
        response = requests.post(inference_url, headers=headers, data=json.dumps(data))
        prob = response.json()
        prediction = prob['wikidatawiki']['scores'][str(data['rev_id'])]['itemquality']['score']['prediction']
        probabilityFromModel = prob['wikidatawiki']['scores'][str(data['rev_id'])]['itemquality']['score']['probability'][prediction]
    except Exception as e:
        prediction = "NotFound"
        probabilityFromModel = 0


    return prediction, probabilityFromModel


In [73]:
def check_link_to_entity(entity_ids, subject_entity_id, batch_size=50):
    endpoint_url = "https://query.wikidata.org/sparql"
    linked_entities = []

    # Convert numpy array to list if needed
    if isinstance(entity_ids, np.ndarray):
        entity_ids = entity_ids.tolist()

    # Helper function to generate formatted entity strings
    def format_entities(batch):
        return " ".join(f"wd:{entity}" for entity in batch)

    # Process in batches
    for i in range(0, len(entity_ids), batch_size):
        batch = entity_ids[i:i + batch_size]
        formatted_entities = format_entities(batch)

        # SPARQL query to check link to specified subject entity
        query = f"""
        SELECT DISTINCT ?item WHERE {{
          VALUES ?item {{{formatted_entities}}}
          ?item (wdt:P31/wdt:P279*)|wdt:P279* wd:{subject_entity_id}.
        }}
        """

        # Prepare headers for request
        headers = {"Accept": "application/json"}

        # Perform the request
        response = requests.get(endpoint_url, headers=headers, params={'query': query, 'format': 'json'})
        
        if response.status_code == 200:
            data = response.json()
            linked_entities.extend([result['item']['value'].split('/')[-1] for result in data['results']['bindings']])

    return linked_entities

In [148]:
def get_wikipedia_title(wikidata_id):
    """Fetch Wikipedia page title using Wikidata ID"""
    url = f"https://www.wikidata.org/w/api.php?action=wbgetentities&ids={wikidata_id}&props=sitelinks&sitefilter=enwiki&format=json"
    response = requests.get(url)
    data = response.json()
    title = data['entities'][wikidata_id]['sitelinks']['enwiki']['title']
    return title.replace(' ', '_')  # Replace spaces with underscores for the URL

def get_pageviews(title):
    """Fetch page views for a Wikipedia page title"""
    user_agent = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"
    url = f"https://wikimedia.org/api/rest_v1/metrics/pageviews/per-article/en.wikipedia/all-access/user/{title}/daily/20230101/20230131"
    response = requests.get(url, headers = {'User-Agent': user_agent})
    views = sum(day['views'] for day in response.json()['items'])
    return views

## Dowload file wikidata

### Triple codificate

In [3]:
url = "https://www.dropbox.com/s/6sbhm0rwo4l73jq/wikidata5m_transductive.tar.gz?dl=1"
destination = "wiki_tran.tar.gz"
download_file(url, destination)
decompress_tar_gz(destination, "./data")
os.remove("./wiki_tran.tar.gz")

Download completato con successo.
Decompressione completata con successo.


### Triple con sinonimi

In [4]:
url = "https://www.dropbox.com/s/7jp4ib8zo3i6m10/wikidata5m_text.txt.gz?dl=1"
# Sempre triple ma con i nomi
destination = "corpus.tar.gz"

download_file(url, destination)
decompress_tar_gz(destination, "./data")
os.remove("./corpus.tar.gz")

Download completato con successo.
Si è verificato un errore durante la decompressione: invalid header


### Alias delle entità

In [5]:
url = "https://www.dropbox.com/s/lnbhc8yuhit4wm5/wikidata5m_alias.tar.gz?dl=1"
destination = "alias.tar.gz"
download_file(url, destination)
decompress_tar_gz(destination, "./data")
os.remove("./alias.tar.gz")

Download completato con successo.
Decompressione completata con successo.


### Stampa delle prime righe dei file

In [6]:
for file in os.listdir("./data"):
    print(f"Prime righe di {file}")
    read_firstlines(f"./data/{file}", 3)
    print("\n")

Prime righe di wikidata5m_transductive_train.txt
Q29387131	P31	Q5
Q326660	P1412	Q652
Q7339549	P57	Q1365729


Prime righe di wikidata5m_relation.txt
P489	currency symbol description
P834	train depot	railway depot	depot	rail yard
P2629	BBFC rating	BBFC certificate


Prime righe di wikidata5m_transductive_valid.txt
Q3576734	P495	Q30
Q641724	P1412	Q1860
Q959357	P39	Q49476


Prime righe di wikidata5m_entity.txt
Q5196650	Cut Your Hair	cut your hair
Q912600	Straumur-Burðarás	Straumur	straumur–burðarás investment bank	straumur	Straumur-Burðarás Investment Bank	straumur-burðarás investment bank	straumur investment bank	straumur-burðarás fjárf.banki	Straumur-Burðarás Fjárf.banki	straumur-burðarás	Straumur Investment Bank	Straumur–Burðarás Investment Bank
Q47551	ditiano	tipciano	titiaen geovene	Tizzianello	Called, Titian Tiziano Vecelli Cavaliere	called titian veccellio	genannt Vecelli Titian	Veccelli Titian genoemd Titiaan	titiano da cadore	ttiziano	dit Le Titien Titianus Vecellio	Tiziano Vecell

## API wikidata

In [7]:
headers = {
    'Authorization': 'Bearer eyJ0eXAiOiJKV1QiLCJhbGciOiJSUzI1NiJ9.eyJhdWQiOiI1ZDEzYzQ4ZTg3YzQ4YzJhYWRmZGFlYzA1MjhkYTU0MSIsImp0aSI6IjgzZWE4MmVkMjI3ZDgzZDkwMzBlNGUyNzJiNTBmOTViYjhlODA3NDI4NTI5YjVhMzBlYjRkNTJmZDNhY2JkNTZmMmU4ZTZiNzI3ZDMxNTQ2IiwiaWF0IjoxNzEyODMzOTg0Ljk0NTI2NiwibmJmIjoxNzEyODMzOTg0Ljk0NTI3MSwiZXhwIjozMzI2OTc0Mjc4NC45NDM3NTIsInN1YiI6Ijc1NDA2OTAyIiwiaXNzIjoiaHR0cHM6Ly9tZXRhLndpa2ltZWRpYS5vcmciLCJyYXRlbGltaXQiOnsicmVxdWVzdHNfcGVyX3VuaXQiOjUwMDAsInVuaXQiOiJIT1VSIn0sInNjb3BlcyI6WyJiYXNpYyJdfQ.NnXfiaq4StcjEq9fqjVRUrJnn5m-eIDy6jQtLTwprH_huGrzm8Z6HduKUiE93a3zwQ3T3t2c8EV4VzDHWv-fPSfozhNVVhwzBhNI80qyMYJQb2ieSu0Jq0mobtY443ygnPDWhADV0QhMpgLpafUPA8QpjnfzmSpXzfWkiEj1oWnuMJ25cr4Y7jqTUjlLQ_wHHSS43DWX1ZURmC0tQb2fZJ0o1NdoiC5MTtcaEqB3t2zF_C1BNGB_jLcrqkYKMZwjDL4SSaph5vIVS9mpYJHaoPlkroKnsc66_75541eSrt5D5YHwkYNavNrvFYpDWVnzUCp5kfX5POaqMquHYEYIaTNJImVuY-2hXIvwkF4FG9rh6rt8Y7WY67Sf_wFhVN9LYqMZVlR3JnZzeI-wfPy_MZ2Wh-WeTb6FHJDow66hwBNX8ua5EfOEF3XOj1N4k5iwYSRSCW6Ko1we-MlExWRMRKFS9t1Rfaw1C8wJCWJRfm0TIZkl-1nP5ACr5w9d1n9S1qjp95HEMnb25aUG3N1q0zwg9nS7xoAqWHXoojoLg31ocSpvpe6Qb1lrolKJwI6GnSjJJAYG03tTejgTjLfBON4uJlEaHFi5rk867L5Hiz25HY3h-OlS5DabwMdH9hZnvipPPeslG9e4X3TgqtidVlPWSk3DSH3Fm_ZeHuZ-7EY',
    'Content-type': 'Content-type',
    'User-Agent': 'tumnus7@gmail.com'
}

### Test API prediction

In [8]:
item2search = "Q35610"

In [9]:
pred, prob = itemQualityScore(item2search)

In [10]:
print(pred, prob)

A 0.9308966801692856


## Data Loading e scrematura

In [91]:
#Set a min count to have a smaller dataset and avoid non-meaningful embedding
min_occ = 30

In [11]:
df_org_train = pd.read_csv('data/wikidata5m_transductive_train.txt', sep='\t', header=None, names=["head", "rel", "tail"])
df_org_valid = pd.read_csv('data/wikidata5m_transductive_valid.txt', sep='\t', header=None, names=["head", "rel", "tail"])
df_org_test = pd.read_csv('data/wikidata5m_transductive_test.txt', sep='\t', header=None, names=["head", "rel", "tail"])

In [95]:
(df_org_train.groupby("rel").count()['tail'] > min_occ).sum()

563

In [98]:
#Element in head more common than 'min_occ'
grouped_head = df_org_train.groupby("head").count().rel 
most_common_head = np.array(grouped_head[grouped_head > min_occ ].index)

#Element in tail more common than 'min_occ'
grouped_tail = df_org_train.groupby("tail").count().rel 
most_common_tail = np.array(grouped_tail[grouped_tail> min_occ ].index)

#Element in rel more common than 'min_occ'
grouped_rel = df_org_train.groupby("rel").count()['tail'] 
most_common_rel = np.array(grouped_rel[grouped_rel > min_occ ].index)

In [106]:
len(most_common_tail)

44434

In [101]:
df_org_train_reduced = df_org_train[(df_org_train["head"].isin(most_common_head)) & (df_org_train["rel"].isin(most_common_rel)) & (df_org_train["tail"].isin(most_common_tail))]
df_org_valid_reduced = df_org_valid[(df_org_valid["head"].isin(most_common_head)) & (df_org_valid["rel"].isin(most_common_rel)) & (df_org_valid["tail"].isin(most_common_tail))]
df_org_test_reduced = df_org_test[(df_org_test["head"].isin(most_common_head)) & (df_org_test["rel"].isin(most_common_rel)) & (df_org_test["tail"].isin(most_common_tail))]

In [102]:
df_org_train_reduced

Unnamed: 0,head,rel,tail
52,Q898840,P161,Q450109
62,Q2530270,P1346,Q317358
70,Q1867,P190,Q1475
79,Q107761,P921,Q9402
125,Q152824,P1412,Q1860
...,...,...,...
20614024,Q2155090,P31,Q11424
20614088,Q1064978,P1346,Q425821
20614149,Q259778,P123,Q94912
20614220,Q238866,P161,Q205707


In [162]:
# List of subjects
subjects_to_wikidata = {
    "Geography": "Q82794",
    "History": "Q309",
    "Mathematics": "Q395",
    "Literature": "Q7725634",
    "Computer Science": "Q21198",
    "Biology": "Q420",
    "Statistics": "Q12483",
    "Physics": "Q413",
    "Chemistry": "Q2329",
    "Medicine": "Q11190",
    "Economics": "Q8134",
    "Philosophy": "Q5891",
    "Psychology": "Q9418",
    "Music": "Q8255",
    "Cinema": "Q1458269",
    "Television": "Q15416",
    "Sport": "Q349"
}

In [151]:
unique_rel = np.unique(df_org_train_reduced.rel)
len(unique_rel)

335

In [152]:
unique_object = np.unique(list(df_org_train_reduced['head']) + list(df_org_train_reduced['tail']))
len(unique_object)

26347

In [153]:
rel_labels = fetch_labels_in_batches(unique_rel)
rel_labels

{'P1001': 'applies to jurisdiction',
 'P101': 'field of work',
 'P1018': 'language regulatory body',
 'P102': 'member of political party',
 'P1027': 'conferred by',
 'P103': 'native language',
 'P1038': 'relative',
 'P1040': 'film editor',
 'P1049': 'worshipped by',
 'P1050': 'medical condition',
 'P1056': 'product or material produced or service provided',
 'P106': 'occupation',
 'P1064': 'track gauge',
 'P1066': 'student of',
 'P1071': 'location of creation',
 'P1072': 'readable file format',
 'P1073': 'writable file format',
 'P1075': 'rector',
 'P108': 'employer',
 'P1080': 'from narrative universe',
 'P110': 'illustrator',
 'P112': 'founded by',
 'P113': 'airline hub',
 'P114': 'airline alliance',
 'P1142': 'political ideology',
 'P118': 'league',
 'P119': 'place of burial',
 'P1192': 'connecting service',
 'P1196': 'manner of death',
 'P121': 'item operated',
 'P122': 'basic form of government',
 'P123': 'publisher',
 'P126': 'maintained by',
 'P1269': 'facet of',
 'P127': 'owned

In [None]:
entity_labels = fetch_labels_in_batches(unique_object)
entity_labels

In [None]:
rel_dataframe = pd.DataFrame({
    "ID" : rel_labels.keys(),
    "name" : rel_labels.values(),
}).set_index('ID')
rel_dataframe.to_csv("common_relations.csv")
rel_dataframe

In [None]:
entity_dataframe = pd.DataFrame({
    "ID" : entity_labels.keys(),
    "name" : entity_labels.values(),
}).set_index('ID')
entity_dataframe

In [None]:
#for subject in subjects_to_wikidata:
#    # Fetch related items for Geography
#    related_items = fetch_related_items(subjects_to_wikidata[subject])
#    print(f"Related items to {subject}:")
#   for item in related_items:
#        print(f"{item['name']} (QID: {item['qid']})")

In [None]:
for subject in subjects_to_wikidata:
    linked_to_entity = check_link_to_entity(entity_dataframe.index, subjects_to_wikidata[subject], batch_size=50)
    print(subject, " : ", len(linked_to_entity))
    entity_dataframe[subject] = [1 if idx in linked_to_entity else 0 for idx in entity_dataframe.index]

In [None]:
entity_dataframe

In [None]:
for column in entity_dataframe.columns:
    if column != 'name':
        print(column, " : ", entity_dataframe[column].sum())

In [None]:
entity_dataframe.to_csv("common_entity_subjects.csv")

In [149]:
# Map Wikidata IDs to Wikipedia titles
titles = [get_wikipedia_title(wd_id) for wd_id in wikidata_ids]

# Get page views for each title
pageviews = {title: get_pageviews(title) for title in titles}

print(pageviews)

{'Douglas_Adams': 39271, 'Berlin': 149093, 'London': 485607}


In [16]:
wiki_train_path = 'data/wikidata5m_transductive_train_red.tsv'
df_org_train_reduced.to_csv(wiki_train_path, sep='\t', index=False)

wiki_validation_path = 'data/wikidata5m_transductive_valid_red.tsv'
#df_org_valid.to_csv(wiki_validation_path, sep='\t', index=False)

wiki_test_path = 'data/wikidata5m_transductive_test_red.tsv'
#df_org_test.to_csv(wiki_test_path, sep='\t', index=False)

### Aggiungo ItemQualityScore

In [17]:
if IQS:
  parole_Q = []
  pred_Q = []

  checkpoint_interval = 500
  contatore_iterazioni = 0

  for pathData in [wiki_train_path, wiki_validation_path, wiki_test_path]:
    with open(pathData, 'r') as file:
        for riga in tqdm.tqdm(file):
            item2search = riga.split()[0]
            pred, prob = itemQualityScore(item2search)
            if pred == 'A' or pred == 'B':
                parole_Q.append(item2search)
                pred_Q.append(pred)

            contatore_iterazioni += 1

            if contatore_iterazioni % checkpoint_interval == 0:
                df_checkpoint = pd.DataFrame({'Entity': parole_Q, 'Score': pred_Q})

                df_checkpoint.to_csv(f"./data/checkpoint_{contatore_iterazioni}_{pathData.split('/')[1].split('.')[0]}_entity_score.csv", index=False)

                parole_Q = []
                pred_Q = []

    if parole_Q:
        df_checkpoint = pd.DataFrame({'Entity': parole_Q, 'Score': pred_Q})
        df_checkpoint.to_csv(f"./data/checkpoint_{contatore_iterazioni}_{pathData.split('/')[1].split('.')[0]}_entity_score.csv", index=False)

    df_finale = pd.concat([pd.read_csv(f"checkpoint_{i * checkpoint_interval}_{pathData.split('/')[1].split('.')[0]}_entity_score.csv") for i in range(1, (contatore_iterazioni // checkpoint_interval) + 1)], ignore_index=True)
    df_finale.to_csv(f"./data/{pathData.split('/')[1].split('.')[0]}_entity_score.csv", index=False)

## Embedding

Spiegazione delle metriche [*qui*](https://docs.ampligraph.org/en/2.0.0/ampligraph.evaluation.html#metrics)

In [6]:
if 'df_org_train_reduced' not in globals():
    df_org_train_reduced = pd.read_csv('data/wikidata5m_transductive_train_red.tsv', sep='\t', header=None, names=["head", "rel", "tail"])

In [7]:
df4emb = df_org_train_reduced #pd.concat([df_org_train_reduced, df_org_valid, df_org_test], ignore_index=True)

triples_factory = TriplesFactory.from_labeled_triples(triples=df4emb[['head', 'rel', 'tail']].values)

training = triples_factory
validation = triples_factory
testing = triples_factory

d=training
id_to_entity={v: k for k, v in d.entity_to_id.items()}
id_to_relation={v: k for k, v in d.relation_to_id.items()}

triples_factory.triples



array([['Q100', 'P1376', 'Q771'],
       ['Q100', 'P17', 'Q30'],
       ['Q100', 'P190', 'Q1492'],
       ...,
       ['Q994', 'P190', 'Q656'],
       ['Q994', 'P30', 'Q46'],
       ['head', 'rel', 'tail']], dtype='<U9')

In [None]:
evaluator = RankBasedEvaluator()
emb_dict={}
emb_list = ["PairRE", "ConvE", "QuatE"]
df_metrics=pd.DataFrame(columns=["Model", "Hits@1", "Hits@3", "Hits@5", "Hits@10", "MRR"])
for emb in emb_list:
    print(f"\nStart with {emb}\n")
    try:
        result = pipeline(
            training=training,
            validation=validation,
            testing=testing,
            model=emb,
            device='gpu',
            random_seed=42,
            result_tracker='tensorboard',
            result_tracker_kwargs=dict(
                experiment_name=f'./log/wiki_{emb}',
            ),
        )
        result.save_to_directory(f'./evalModel/wiki_{emb}')
        metrics = evaluator.evaluate(result.model, testing.mapped_triples, additional_filter_triples=[training.mapped_triples, validation.mapped_triples])
        df_metrics['Model'] = emb
        df_metrics['Hits@1'] = metrics.get_metric('hits@1')
        df_metrics['Hits@3'] = metrics.get_metric('hits@3')
        df_metrics['Hits@5'] = metrics.get_metric('hits@5')
        df_metrics['Hits@10'] = metrics.get_metric('hits@10')
        df_metrics['MRR'] = metrics.get_metric('mean_reciprocal_rank')
        
        # Prediction
        pack = predict_all(model=result.model)
        prediction_all_triple = pack.process(factory=result.training)
        prediction_all_annotated = prediction_all_triple.add_membership_columns(training=result.training)

        df_metrics['MaxScore'] = prediction_all_annotated.df['score'].max()
        df_metrics['MinScore'] = prediction_all_annotated.df['score'].min()
        emb_dict[emb] = prediction_all_annotated
        
        triple_not_in_train = prediction_all_annotated.df[prediction_all_annotated.df['in_training']==False]

        triple_not_in_train.to_csv(f"./data/scored_predicted_triple_{emb}_notInTrain.csv", index=False)

    except Exception as e:
        print(f"Si è verificato un errore: {e}")
        continue