# Partie 1: Data Preparation and Exploration

### Exercice 1: Loading the Turtle File

In [23]:
from rdflib import Graph, URIRef
from collections import defaultdict

# Charger le fichier Turtle
human_file = "data/human.ttl"
graph = Graph()
graph.parse(human_file, format="turtle")

# Question 1: How many unique entities and relations are in human.ttl?
entities = set()
relations = set()

for sujet, predicat, objet in graph:
    entities.add(sujet)
    entities.add(objet)
    relations.add(predicat)

print(f"Nombre d'entités uniques : {len(entities)}")
print(f"Nombre de relations uniques : {len(relations)}")

# Question 2: What is the size of the graph in terms of the number of triples?
num_triples = len(graph)
print(f"Nombre de triples dans le graphe : {num_triples}")

# Question 3: Identify some entities and relationships that could illustrate interesting link cases
entity_relations = defaultdict(set)
for sujet, predicat, objet in graph:
    if isinstance(sujet, URIRef) and isinstance(objet, URIRef):
        entity_relations[sujet].add(predicat)

print("\nExemples d'entités et leurs relations :")
for entity, rels in list(entity_relations.items())[:5]:  # Affiche les 5 premières entités pour l'exemple
    print(f"Entité: {entity}")
    for rel in rels:
        print(f"  Relation: {rel}")



Nombre d'entités uniques : 65
Nombre de relations uniques : 15
Nombre de triples dans le graphe : 95

Exemples d'entités et leurs relations :
Entité: http://www.inria.fr/human/data#John
  Relation: http://www.inria.fr/human#hasParent
  Relation: http://www.w3.org/1999/02/22-rdf-syntax-ns#type
Entité: http://www.inria.fr/human/data#Eve
  Relation: http://www.inria.fr/human#hasSpouse
  Relation: http://www.w3.org/1999/02/22-rdf-syntax-ns#type
  Relation: http://www.inria.fr/human#hasFriend
Entité: http://www.inria.fr/human/data#Jennifer
  Relation: http://www.inria.fr/human#hasSpouse
  Relation: http://www.w3.org/1999/02/22-rdf-syntax-ns#type
Entité: http://www.inria.fr/human/data#Karl
  Relation: http://www.inria.fr/human#hasSpouse
  Relation: http://www.w3.org/1999/02/22-rdf-syntax-ns#type
  Relation: http://www.inria.fr/human#hasFriend
Entité: http://www.inria.fr/human/data#Gaston
  Relation: http://www.inria.fr/human#hasChild
  Relation: http://www.w3.org/1999/02/22-rdf-syntax-ns#typ

### Exercice 2: Preparing Data for Learning

In [24]:
from rdflib import Graph
import pandas as pd
import random
from sklearn.model_selection import train_test_split
import numpy as np


triples = list(graph)

# Split triples into training and test sets
train_triples, test_triples = train_test_split(
    triples, test_size=0.2, random_state=42
)

# Generate negative samples
def generate_negative_samples(positive_triples, num_neg_samples=1):
    # Extract unique entities from the graph
    entities = set()
    for sujet, predicat, objet in graph:
        if isinstance(sujet, URIRef) and isinstance(objet, URIRef):
            entities.add(sujet)
            entities.add(objet)
    
    negative_samples = []
    entities_list = list(entities)
    
    for s, p, o in positive_triples:
        for _ in range(num_neg_samples):
            # Randomly decide to corrupt subject or object
            corrupt_subject = np.random.random() < 0.5
            
            if corrupt_subject:
                # Replace subject with a random entity
                neg_s = np.random.choice(entities_list)
                neg_sample = (neg_s, p, o)
            else:
                # Replace object with a random entity
                neg_o = np.random.choice(entities_list)
                neg_sample = (s, p, neg_o)
            
            # Ensure negative sample is not in original graph
            while neg_sample in graph:
                if corrupt_subject:
                    neg_s = np.random.choice(entities_list)
                    neg_sample = (neg_s, p, o)
                else:
                    neg_o = np.random.choice(entities_list)
                    neg_sample = (s, p, neg_o)
            
            negative_samples.append(neg_sample)
    
    return negative_samples

# Generate negative samples for training set
train_negative_samples = generate_negative_samples(train_triples)

# Print some information about the data preparation
print("\nData Preparation Summary:")
print(f"Total triples: {len(triples)}")
print(f"Training triples: {len(train_triples)}")
print(f"Test triples: {len(test_triples)}")
print(f"Negative samples generated: {len(train_negative_samples)}")


Data Preparation Summary:
Total triples: 95
Training triples: 76
Test triples: 19
Negative samples generated: 76


# Partie 2: Implementation of Embedding Models

In [25]:
from rdflib import Graph, URIRef
import pandas as pd
import random
import torch
from sklearn.model_selection import train_test_split
import numpy as np
from pykeen.pipeline import pipeline
from pykeen.models import TransE, DistMult, ComplEx, RotatE
from pykeen.triples import TriplesFactory

# Load the Turtle files
graph = Graph()
graph.parse("data/human.ttl", format="turtle")
graph.parse("data/humanrdfs.ttl", format="turtle")

# Extract triples
triples = [(str(s), str(p), str(o)) for s, p, o in graph]

# Convert triples to a NumPy array
triples_array = np.array(triples, dtype=object)

# Split triples into training and test sets
train_triples, test_triples = train_test_split(
    triples_array, test_size=0.2, random_state=42
)

# Create TriplesFactory for training and testing
training = TriplesFactory.from_labeled_triples(train_triples)
testing = TriplesFactory.from_labeled_triples(test_triples, entity_to_id=training.entity_to_id, relation_to_id=training.relation_to_id)

# Function to train and evaluate a model
def train_and_evaluate_model(model_class, training, testing, embedding_dim=200):
    result = pipeline(
        model=model_class,
        training=training,
        testing=testing,
        model_kwargs=dict(embedding_dim=embedding_dim),
        training_kwargs=dict(num_epochs=50),
        evaluation_kwargs=dict(use_tqdm=False),
    )
    print(f"\nResults for {model_class.__name__} with embedding_dim={embedding_dim}:")
    metric_results_df = result.metric_results.to_df()
    print(metric_results_df)
    return metric_results_df

# Train and evaluate TransE
transE_results = train_and_evaluate_model(TransE, training, testing)

# Train and evaluate DistMult
distMult_results = train_and_evaluate_model(DistMult, training, testing)

# Train and evaluate ComplEx
complEx_results = train_and_evaluate_model(ComplEx, training, testing)

# Train and evaluate RotatE
rotatE_results = train_and_evaluate_model(RotatE, training, testing)

# Inspect available metrics for TransE
available_metrics = transE_results['Metric'].unique()
print("\nAvailable metrics for TransE:", available_metrics)

# Choose a valid metric for tuning (e.g., 'hits_at_10')
target_metric = 'hits_at_10'  # Specify the target metric

# Tune embedding dimensions for TransE as an example
best_score = 0
best_embedding_dim = 0

for embedding_dim in [100, 200, 300]:
    result = pipeline(
        model=TransE,
        training=training,
        testing=testing,
        model_kwargs=dict(embedding_dim=embedding_dim),
        training_kwargs=dict(num_epochs=50),
        evaluation_kwargs=dict(use_tqdm=False),
    )
    result_df = result.metric_results.to_df()
    print(f"\nResults for TransE with embedding_dim={embedding_dim}:")
    print(result_df)
    if target_metric in result_df['Metric'].values:
        mrr_score = result_df[result_df['Metric'] == target_metric]['Value'].values[0]  # Access the metric score correctly
        if mrr_score > best_score:
            best_score = mrr_score
            best_embedding_dim = embedding_dim
    else:
        print(f"{target_metric} not found in results for embedding_dim={embedding_dim}")

print(f"\nBest TransE {target_metric}: {best_score} with embedding_dim={best_embedding_dim}")

INFO:pykeen.pipeline.api:Using device: None
Training epochs on cpu: 100%|██████████| 50/50 [00:05<00:00,  9.60epoch/s, loss=0.0456, prev_loss=0.0864]
INFO:pykeen.evaluation.evaluator:Evaluation took 0.02s seconds
INFO:pykeen.pipeline.api:Using device: None



Results for TransE with embedding_dim=200:
     Side    Rank_type              Metric      Value
0    head   optimistic  standard_deviation  40.943986
1    tail   optimistic  standard_deviation  42.768534
2    both   optimistic  standard_deviation  45.424876
3    head    realistic  standard_deviation  40.943985
4    tail    realistic  standard_deviation  42.768532
..    ...          ...                 ...        ...
220  tail    realistic  adjusted_hits_at_k   0.089906
221  both    realistic  adjusted_hits_at_k   0.008391
222  head  pessimistic  adjusted_hits_at_k  -0.073323
223  tail  pessimistic  adjusted_hits_at_k   0.089906
224  both  pessimistic  adjusted_hits_at_k   0.008391

[225 rows x 4 columns]


Training epochs on cpu: 100%|██████████| 50/50 [00:05<00:00,  9.64epoch/s, loss=0.985, prev_loss=0.985]
INFO:pykeen.evaluation.evaluator:Evaluation took 0.02s seconds
INFO:pykeen.pipeline.api:Using device: None



Results for DistMult with embedding_dim=200:
     Side    Rank_type              Metric      Value
0    head   optimistic  standard_deviation  39.355146
1    tail   optimistic  standard_deviation  43.637598
2    both   optimistic  standard_deviation  41.564220
3    head    realistic  standard_deviation  39.355145
4    tail    realistic  standard_deviation  43.637600
..    ...          ...                 ...        ...
220  tail    realistic  adjusted_hits_at_k  -0.017164
221  both    realistic  adjusted_hits_at_k  -0.018409
222  head  pessimistic  adjusted_hits_at_k  -0.019657
223  tail  pessimistic  adjusted_hits_at_k  -0.017164
224  both  pessimistic  adjusted_hits_at_k  -0.018409

[225 rows x 4 columns]


Training epochs on cpu: 100%|██████████| 50/50 [00:05<00:00,  9.19epoch/s, loss=9.08, prev_loss=7.93]
INFO:pykeen.evaluation.evaluator:Evaluation took 0.02s seconds
INFO:pykeen.pipeline.api:Using device: None



Results for ComplEx with embedding_dim=200:
     Side    Rank_type              Metric      Value
0    head   optimistic  standard_deviation  42.345572
1    tail   optimistic  standard_deviation  43.568194
2    both   optimistic  standard_deviation  43.012527
3    head    realistic  standard_deviation  42.345570
4    tail    realistic  standard_deviation  43.568195
..    ...          ...                 ...        ...
220  tail    realistic  adjusted_hits_at_k   0.089906
221  both    realistic  adjusted_hits_at_k   0.115592
222  head  pessimistic  adjusted_hits_at_k   0.141342
223  tail  pessimistic  adjusted_hits_at_k   0.089906
224  both  pessimistic  adjusted_hits_at_k   0.115592

[225 rows x 4 columns]


Training epochs on cpu: 100%|██████████| 50/50 [00:05<00:00,  9.49epoch/s, loss=0.462, prev_loss=0.448]
INFO:pykeen.evaluation.evaluator:Evaluation took 0.02s seconds
INFO:pykeen.pipeline.api:Using device: None



Results for RotatE with embedding_dim=200:
     Side    Rank_type              Metric      Value
0    head   optimistic  standard_deviation  10.256705
1    tail   optimistic  standard_deviation  21.230815
2    both   optimistic  standard_deviation  16.795070
3    head    realistic  standard_deviation  10.256705
4    tail    realistic  standard_deviation  21.230814
..    ...          ...                 ...        ...
220  tail    realistic  adjusted_hits_at_k   0.518186
221  both    realistic  adjusted_hits_at_k   0.329994
222  head  pessimistic  adjusted_hits_at_k   0.141342
223  tail  pessimistic  adjusted_hits_at_k   0.518186
224  both  pessimistic  adjusted_hits_at_k   0.329994

[225 rows x 4 columns]

Available metrics for TransE: ['standard_deviation' 'harmonic_mean_rank' 'z_arithmetic_mean_rank'
 'inverse_median_rank' 'variance' 'inverse_harmonic_mean_rank'
 'inverse_arithmetic_mean_rank' 'median_absolute_deviation'
 'z_inverse_harmonic_mean_rank' 'adjusted_geometric_mean_rank_

Training epochs on cpu: 100%|██████████| 50/50 [00:05<00:00,  9.92epoch/s, loss=0.108, prev_loss=0.186]
INFO:pykeen.evaluation.evaluator:Evaluation took 0.02s seconds
INFO:pykeen.pipeline.api:Using device: None



Results for TransE with embedding_dim=100:
     Side    Rank_type              Metric      Value
0    head   optimistic  standard_deviation  40.270678
1    tail   optimistic  standard_deviation  40.962910
2    both   optimistic  standard_deviation  41.663766
3    head    realistic  standard_deviation  40.270679
4    tail    realistic  standard_deviation  40.962910
..    ...          ...                 ...        ...
220  tail    realistic  adjusted_hits_at_k   0.036371
221  both    realistic  adjusted_hits_at_k   0.008391
222  head  pessimistic  adjusted_hits_at_k  -0.019657
223  tail  pessimistic  adjusted_hits_at_k   0.036371
224  both  pessimistic  adjusted_hits_at_k   0.008391

[225 rows x 4 columns]


Training epochs on cpu: 100%|██████████| 50/50 [00:05<00:00,  9.76epoch/s, loss=0.0227, prev_loss=0.105]  
INFO:pykeen.evaluation.evaluator:Evaluation took 0.02s seconds
INFO:pykeen.pipeline.api:Using device: None



Results for TransE with embedding_dim=200:
     Side    Rank_type              Metric      Value
0    head   optimistic  standard_deviation  40.776096
1    tail   optimistic  standard_deviation  50.628154
2    both   optimistic  standard_deviation  48.541838
3    head    realistic  standard_deviation  40.776096
4    tail    realistic  standard_deviation  50.628155
..    ...          ...                 ...        ...
220  tail    realistic  adjusted_hits_at_k   0.089906
221  both    realistic  adjusted_hits_at_k   0.035192
222  head  pessimistic  adjusted_hits_at_k  -0.019657
223  tail  pessimistic  adjusted_hits_at_k   0.089906
224  both  pessimistic  adjusted_hits_at_k   0.035192

[225 rows x 4 columns]


Training epochs on cpu: 100%|██████████| 50/50 [00:05<00:00,  9.60epoch/s, loss=0.0544, prev_loss=0.0966]
INFO:pykeen.evaluation.evaluator:Evaluation took 0.02s seconds



Results for TransE with embedding_dim=300:
     Side    Rank_type              Metric      Value
0    head   optimistic  standard_deviation  42.433801
1    tail   optimistic  standard_deviation  35.504084
2    both   optimistic  standard_deviation  44.390701
3    head    realistic  standard_deviation  42.433804
4    tail    realistic  standard_deviation  35.504082
..    ...          ...                 ...        ...
220  tail    realistic  adjusted_hits_at_k   0.250511
221  both    realistic  adjusted_hits_at_k   0.088792
222  head  pessimistic  adjusted_hits_at_k  -0.073323
223  tail  pessimistic  adjusted_hits_at_k   0.250511
224  both  pessimistic  adjusted_hits_at_k   0.088792

[225 rows x 4 columns]

Best TransE hits_at_10: 0.05 with embedding_dim=100


#### TransE vs. ComplEx :

TransE :

Représentations des Entités : TransE représente les entités et les relations comme des vecteurs dans un espace vectoriel continu.\
Il utilise une opération de traduction simple pour prédire les liens manquants : ( h + r ≈ t ).  
Performance : D'après les résultats, TransE a un rang moyen arithmétique relativement élevé et des scores plus faibles en hits@10 comparés à ComplEx.\
Cela suggère que TransE a du mal à capturer des relations complexes entre les entités.  
\
ComplEx :

Représentations des Entités : ComplEx étend TransE en utilisant des embeddings à valeurs complexes.\
Il représente les entités et les relations comme des vecteurs complexes, ce qui lui permet de capturer des interactions et des relations plus complexes.  
Performance : ComplEx a des rangs moyens arithmétiques significativement plus bas et des scores hits@10 plus élevés comparés à TransE.\
Cela indique que ComplEx est plus performant pour capturer des relations complexes et fournir des prédictions plus précises.

In [None]:
from collections import defaultdict
import numpy as np
import matplotlib.pyplot as plt
from sklearn.manifold import TSNE
import seaborn as sns

# Create a mapping from entity to category based on their relationships
entity_to_category = defaultdict(list)
for subj, pred, obj in graph:
    entity_to_category[str(subj)].append(str(pred))
    entity_to_category[str(obj)].append(str(pred))

# For simplicity, assign the most common relation as the category
entity_category = {entity: max(set(rels), key=rels.count) for entity, rels in entity_to_category.items()}

# Extract entity embeddings
entity_embeddings = result.model.entity_representations[0]().cpu().detach().numpy()
entity_ids = list(result.training.entity_to_id.keys())

# Reduce dimensions to 2D using t-SNE
tsne = TSNE(n_components=2, random_state=42)
embeddings_2d = tsne.fit_transform(entity_embeddings)

# Prepare colors for each category
categories = list(set(entity_category.values()))
palette = sns.color_palette('hsv', len(categories))
color_map = {category: palette[i] for i, category in enumerate(categories)}

# Assign colors to entities based on their category
colors = [color_map.get(entity_category.get(entity_id, 'Other'), (0, 0, 0)) for entity_id in entity_ids]

# Plot the embeddings with colors
plt.figure(figsize=(12, 12))
plt.scatter(embeddings_2d[:, 0], embeddings_2d[:, 1], c=colors, alpha=0.6)

# Optional: Add legend (may be crowded if many categories)
from matplotlib.lines import Line2D
legend_elements = [Line2D([0], [0], marker='o', color='w', label=category,
                          markerfacecolor=color_map[category], markersize=10)
                   for category in categories]
plt.legend(handles=legend_elements, bbox_to_anchor=(1.05, 1), loc='upper left')

plt.title('Entity Embeddings Visualized with t-SNE')
plt.xlabel('Dimension 1')
plt.ylabel('Dimension 2')
plt.show()

In [None]:
from sklearn.decomposition import PCA

# Reduce dimensions to 2D using PCA
pca = PCA(n_components=2)
embeddings_2d = pca.fit_transform(entity_embeddings)

# Plot the embeddings with colors
plt.figure(figsize=(12, 12))
plt.scatter(embeddings_2d[:, 0], embeddings_2d[:, 1], c=colors, alpha=0.6)

# Optional: Add legend (may be crowded if many categories)
from matplotlib.lines import Line2D
legend_elements = [Line2D([0], [0], marker='o', color='w', label=category,
                          markerfacecolor=color_map[category], markersize=10)
                   for category in categories]
plt.legend(handles=legend_elements, bbox_to_anchor=(1.05, 1), loc='upper left')

plt.title('Entity Embeddings Visualized with PCA')
plt.xlabel('Principal Component 1')
plt.ylabel('Principal Component 2')
plt.show()

1. Are entities with similar relationships clustered in embedding space?
    - Non, les entités avec des relations similaires ne sont pas nécessairement regroupées dans l'espace d'embedding. Que ce soit avec l'analyse PCA ou t-SNE. 

In [None]:
from sklearn.cluster import KMeans

# Determine the number of clusters (you might set it equal to the number of categories)
num_clusters = len(categories)

# Perform K-Means clustering
kmeans = KMeans(n_clusters=num_clusters, random_state=42)
cluster_labels = kmeans.fit_predict(embeddings_2d)

# Plot the clustered embeddings
plt.figure(figsize=(12, 12))
scatter = plt.scatter(embeddings_2d[:, 0], embeddings_2d[:, 1], c=cluster_labels, cmap='tab10', alpha=0.6)

# Add legend for clusters
plt.legend(*scatter.legend_elements(), title="Clusters")
plt.title('Entity Embeddings Clusters Visualized with PCA')
plt.xlabel('Principal Component 1')
plt.ylabel('Principal Component 2')
plt.show()

from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import homogeneity_score, completeness_score, v_measure_score

# Encode actual categories
label_encoder = LabelEncoder()
true_labels = label_encoder.fit_transform([entity_category.get(entity_id, 'Other') for entity_id in entity_ids])

# Evaluate clustering performance
homogeneity = homogeneity_score(true_labels, cluster_labels)
completeness = completeness_score(true_labels, cluster_labels)
v_measure = v_measure_score(true_labels, cluster_labels)

print(f"Homogeneity Score: {homogeneity:.2f}")
print(f"Completeness Score: {completeness:.2f}")
print(f"V-Measure Score: {v_measure:.2f}")

2. Identify and analyze clusters of entities. Do these clusters correspond to coherent entity
types?

# PARTIE 3: Model Evaluation and Result Analysis

### Exercice 1: Evaluation Metrics

In [None]:
def explain_evaluation_metrics():
    """
    Detailed explanation of link prediction evaluation metrics
    """
    metrics_explanation = {
        "Mean Rank": {
            "Définition": "Rang moyen de l'entité correcte parmi toutes les entités possibles lors de la prédiction d'un lien manquant.",
            "Interprétation": "Des valeurs plus faibles indiquent une meilleure performance du modèle. Un rang de 1 signifie que le modèle prédit parfaitement l'entité correcte.",
            "Calcul": "Pour chaque triple test, classez les entités candidates et calculez la moyenne de ces classements pour tous les triples tests."
        },
        "Mean Reciprocal Rank (MRR)": {
            "Définition": "Moyenne des rangs réciproques des entités correctes.",
            "Interprétation": "Des valeurs élevées indiquent une meilleure performance. Les valeurs sont comprises entre 0 et 1, la valeur 1 correspondant à une prédiction parfaite.",
            "Calcul": "1 / rang pour chaque triple test, puis moyenne sur l'ensemble des triples tests."
        },
        "Hits@K": {
            "Définition": "Proportion de triplets de test où l'entité correcte est classée dans les K premières positions.",
            "Interprétation": "Des valeurs élevées indiquent une meilleure performance. Mesuré pour différentes valeurs de K (1, 3, 10).",
            "Calcul": "Pourcentage de triplets de test où l'entité correcte se trouve parmi les K premières entités prédites."
        }
    }
    
    return metrics_explanation

def comprehensive_model_evaluation(models_results):
    results_summary = []
    
    for name, result in models_results.items():
        metrics_df = result
        
        # Initialize default values
        model_metrics = {
            'model_name': name,
            'Mean Rank': None,
            'MRR': None,
            'Hits@1': None,
            'Hits@3': None,
            'Hits@10': None
        }
        
        # Safely get metrics with error handling
        def get_metric_value(metric_name):
            filtered = metrics_df[metrics_df['Metric'] == metric_name]['Value']
            return filtered.values[0] if not filtered.empty else None
        
        # Update metrics if they exist
        metric_mappings = {
            'arithmetic_mean_rank': 'Mean Rank',
            'mean_reciprocal_rank': 'MRR',
            'hits_at_1': 'Hits@1',
            'hits_at_3': 'Hits@3',
            'hits_at_10': 'Hits@10'
        }
        
        for metric_key, metric_name in metric_mappings.items():
            value = get_metric_value(metric_key)
            if value is not None:
                model_metrics[metric_name] = value
        
        results_summary.append(model_metrics)
    
    # Convert to DataFrame for easy comparison
    results_df = pd.DataFrame(results_summary)
    
    # Sort by different metrics to identify top performers
    print("Top Models by Different Metrics:")
    print("\nSorted by Mean Rank (Lower is Better):")
    print(results_df.sort_values('Mean Rank'))
    
    print("\nSorted by MRR (Higher is Better):")
    print(results_df.sort_values('MRR', ascending=False))
    
    print("\nSorted by Hits@10 (Higher is Better):")
    print(results_df.sort_values('Hits@10', ascending=False))
    
    return results_df

metrics_explanation = explain_evaluation_metrics()
for metric, details in metrics_explanation.items():
    print(f"\n{metric}:")
    for key, value in details.items():
        print(f"  {key}: {value}")

# Evaluate the models using comprehensive_model_evaluation with the train dataset

transE_results = train_and_evaluate_model(TransE, training, training)
distMult_results = train_and_evaluate_model(DistMult, training, training)
complEx_results = train_and_evaluate_model(ComplEx, training, training)
rotatE_results = train_and_evaluate_model(RotatE, training, training)

models_results = {
    "TransE": transE_results,
    "DistMult": distMult_results,
    "ComplEx": complEx_results,
    "RotatE": rotatE_results
}

train_evaluation = comprehensive_model_evaluation(models_results)

# Evaluate the models using comprehensive_model_evaluation with the test dataset

transE_results = train_and_evaluate_model(TransE, training, testing)
distMult_results = train_and_evaluate_model(DistMult, training, testing)
complEx_results = train_and_evaluate_model(ComplEx, training, testing)
rotatE_results = train_and_evaluate_model(RotatE, training, testing)

models_results = {
    "TransE": transE_results,
    "DistMult": distMult_results,
    "ComplEx": complEx_results,
    "RotatE": rotatE_results
}

test_evaluation = comprehensive_model_evaluation(models_results)

1. Define each metric and explain how they assess link prediction quality.

    <b>Mean Rank</b>:  
        Rang moyen de l'entité correcte parmi toutes les entités possibles lors de la prédiction d'un lien manquant.  
        Interprétation: Des valeurs plus faibles indiquent une meilleure performance du modèle. Un rang de 1 signifie que le modèle prédit parfaitement l'entité correcte.  
        Calcul: Pour chaque triple test, classez les entités candidates et calculez la moyenne de ces classements pour tous les triples tests.
    
    <b>Mean Reciprocal Rank (MRR)</b>:  
        Moyenne des rangs réciproques des entités correctes.  
        Interprétation: Des valeurs élevées indiquent une meilleure performance. Les valeurs sont comprises entre 0 et 1, la valeur 1 correspondant à une prédiction parfaite.  
        Calcul: 1 / rang pour chaque triple test, puis moyenne sur l'ensemble des triples tests.
    
    <b>Hits@K</b>:  
        Proportion de triplets de test où l'entité correcte est classée dans les K premières positions.  
        Interprétation: Des valeurs élevées indiquent une meilleure performance. Mesuré pour différentes valeurs de K (1, 3, 10).   
        Calcul: Pourcentage de triplets de test où l'entité correcte se trouve parmi les K premières entités prédites.
    

2. Interpret the results for each model and identify the top-performing models for this dataset
((a) with the train dataset, (b) with the test dataset).

In [30]:
print("\nEvaluation Metrics Comparison:")
print("\nTrain Dataset:")
print(train_evaluation)
print("\nTest Dataset:")
print(test_evaluation)


Evaluation Metrics Comparison:

Train Dataset:
  model_name  Mean Rank   MRR    Hits@1    Hits@3   Hits@10
0     TransE   2.855670  None  0.000000  0.845361  0.979381
1   DistMult   1.061856  None  0.963918  0.994845  1.000000
2    ComplEx  52.541237  None  0.010309  0.030928  0.092784
3     RotatE   1.603093  None  0.798969  0.917526  0.989691

Test Dataset:
  model_name  Mean Rank   MRR  Hits@1  Hits@3  Hits@10
0     TransE      60.00  None     0.0    0.00      0.1
1   DistMult      73.60  None     0.0    0.05      0.1
2    ComplEx      85.15  None     0.0    0.00      0.0
3     RotatE      19.80  None     0.1    0.10      0.2


- DistMult obtient des résultats exceptionnels sur l'ensemble d'apprentissage pour toutes les mesures, mais ne se généralise pas bien à l'ensemble de test.
- RotatE présente un bon équilibre entre les performances d'entraînement et de test, avec les meilleurs résultats de test pour toutes les mesures.
- TransE présente des performances raisonnables sur l'ensemble d'apprentissage, mais une baisse significative des performances sur l'ensemble de test.
- ComplEx obtient des résultats médiocres à la fois sur l'ensemble d'entraînement et sur l'ensemble de test.
  
Dans l'ensemble, RotatE semble être le meilleur modèle en termes de généralisation à l'ensemble de test, tandis que DistMult est le meilleur sur l'ensemble d'apprentissage, mais overfits.

### Exercice 2: Comparative Analysis
