In [None]:
import torch
import pykeen
import pandas as pd
from pykeen import predict
from pykeen.pipeline import pipeline
from pykeen.hpo import hpo_pipeline
from pykeen.triples import TriplesFactory
import numpy as np
from pykeen.utils import set_random_seed

# Set random seed for reproducibility


file_path = 'C.1_query.tsv'
tf = TriplesFactory.from_path(file_path, delimiter="\t")
training, testing = tf.split([0.85, 0.15], random_state=2025)
training, validation = training.split([0.8, 0.2], random_state=2025)
device = "cuda" if torch.cuda.is_available() else "cpu"

In [None]:
def run_pipeline(model_name, model_kwargs_range, random_seed=2025):
    # Set the random seed for reproducibility
    set_random_seed(random_seed)

    # Validate input
    if model_name is None or model_kwargs_range is None:
        raise ValueError("Both model name and model parameter ranges must be provided.")

    # Run the hyperparameter optimization pipeline
    result = hpo_pipeline(
        training=training,
        testing=testing,
        validation=validation,

        # Hyperparameter optimization configuration
        sampler="tpe",
        n_trials=50,

        # Training parameter ranges
        training_kwargs_ranges=dict(
            num_epochs=dict(type="int", low=50, high=150, step=25)
        ),

        # Model and its parameter search space
        model=model_name,
        model_kwargs_ranges=model_kwargs_range,

        # Optimizer configuration
        optimizer="adam",
        optimizer_kwargs_ranges=dict(
            lr=dict(type="float", low=0.0001, high=0.001, log=True),
            weight_decay=dict(type="float", low=1e-4, high=1e-3, log=True)
        ),

        # Negative sampling configuration
        negative_sampler="basic",
        negative_sampler_kwargs_ranges=dict(
            num_negs_per_pos=dict(type="int", low=1, high=10, step=3)
        ),

        # Evaluation configuration
        evaluation_kwargs=dict(
            filtered=True,
            batch_size=128,
            k=5,
            mode='both'
        ),

        # Early stopping settings
        stopper="early",
        stopper_kwargs=dict(
            patience=10,
            frequency=5,
            metric="hits@5",
            relative_delta=0.002,
            larger_is_better=True,
        ),

        device=device,
    )

    return result

# TransH Model

In [None]:
model_name = "TransH"
model_kwargs_range =dict(
            embedding_dim=dict(type="int", low=128, high=256, step=64),
        )

# Run the HPO pipeline
resultTransH = run_pipeline(model_name, model_kwargs_range)

# RotatE Model

In [None]:
model_name = "RotatE"
model_kwargs_range =dict(
            embedding_dim=dict(type="int", low=128, high=256, step=64),
        )
# Run the HPO pipeline
resultRotatE = run_pipeline(model_name, model_kwargs_range)

# ComplEx Model

In [None]:
model_name = "ComplEx"
model_kwargs_range =dict(
            embedding_dim=dict(type="int", low=128, high=256, step=64),
        )
# Run the HPO pipeline
resultComplEx = run_pipeline(model_name, model_kwargs_range)

# ConvKB Model

In [None]:
model_name = "ConvB"
model_kwargs_range =dict(
            embedding_dim=dict(type="int", low=128, high=256, step=64),
            hidden_dropout_rate=dict(type="float", low=0.3, high=0.5, step=0.1),
            num_filters=dict(type="int", low=64, high=128, step=32),
        )
# Run the HPO pipeline
resultConvB = run_pipeline(model_name, model_kwargs_range)