In [None]:
from helpers import *


# generation of datas Structure

# Parameter Scan on weight_decay and lr

In [None]:
def run_experiment(n, m, d, p, s, device, lr, weight_decay, reps=5):
    """
    Runs multiple experiments for matrix factorization with BTL preference data,
    scanning learning rate and weight decay parameters.
    """
    num_datapoints = int(n * m * p / 2)
    reconstruction_errors, log_likelihoods, accuracy = [], [], []
    gt_accuracy, gt_log_likelihoods = [], []

    for rep in range(reps):
        start_time = time.time()
        print(f"\n### Experiment {rep+1}/{reps} started with lr={lr}, weight_decay={weight_decay}... ###")

        # Step 1: Generate embeddings
        U, V = generate_embeddings(n, m, d, device)

        # Step 2: Create preference dataset
        dataset = BTLPreferenceDataset(U, V, num_datapoints, scale=s)

        # Step 3: Split dataset into train, validation, and test sets
        train_loader, val_loader, test_loader = split_dataset(dataset, num_datapoints)

        # Step 4: Initialize the matrix factorization model
        model = MatrixFactorization(n, m, d).to(device)

        # Step 5: Define optimizer with scanned parameters
        optimizer = torch.optim.Adam(model.parameters(), lr=lr, weight_decay=weight_decay)

        # Step 6: Train the model
        train_start_time = time.time()
        train_model(model, train_loader, val_loader, optimizer, device)
        print(f"Epoch {rep+1}: Training completed in {time.time() - train_start_time:.4f} seconds")

        # Step 7: Evaluate the model on the test set
        test_loss, test_accuracy = evaluate_model(model, test_loader, device)

        # Step 8: Compute reconstruction error
        reconstruction_error = compute_reconstruction_error(model, U, V, s)

        # Step 9: Compute ground truth metrics
        gt_loss, gt_acc = compute_ground_truth_metrics(test_loader, U, V, device)

        # Store results for this repetition
        reconstruction_errors.append(reconstruction_error)
        log_likelihoods.append(-test_loss)
        accuracy.append(test_accuracy)
        gt_log_likelihoods.append(-gt_loss)
        gt_accuracy.append(gt_acc)

        print(f"### Experiment {rep+1}/{reps} completed in {time.time() - start_time:.4f} seconds ###")

    return reconstruction_errors, log_likelihoods, accuracy, gt_log_likelihoods, gt_accuracy



In [None]:
# Define parameter grid
lr_values = [1e-3, 5e-4, 1e-4]  # Different learning rates to test
weight_decay_values = [1e-3, 5e-3, 1e-2]  # Different weight decay values to test

# Experiment settings
n_users = 100
m_items = 200
embedding_dim = 20
pairwise_density = 0.5
scale_factor = 2.0
reps = 1

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Run parameter scan
best_accuracy = 0
best_params = None
results_dict = {}

for lr, weight_decay in itertools.product(lr_values, weight_decay_values):
    print(f"\n>>> Running experiment with lr={lr}, weight_decay={weight_decay}...")
    reconstruction_errors, log_likelihoods, accuracy, gt_log_likelihoods, gt_accuracy = run_experiment(
        n_users, m_items, embedding_dim, pairwise_density, scale_factor, device, lr, weight_decay, reps
    )
    
    mean_accuracy = sum(accuracy) / len(accuracy)
    results_dict[(lr, weight_decay)] = mean_accuracy

    if mean_accuracy > best_accuracy:
        best_accuracy = mean_accuracy
        best_params = (lr, weight_decay)

# Print results
print("\n### Parameter Scan Results ###")
for (lr, wd), acc in results_dict.items():
    print(f"LR: {lr:.0e}, Weight Decay: {wd:.0e} -> Accuracy: {acc:.4f}")

print(f"\n>>> Best Parameters: LR={best_params[0]}, Weight Decay={best_params[1]} with Accuracy={best_accuracy:.4f}")



# Parameter Scan on p, s, d, lr, weight_decay

In [None]:



def run_experiment(n, m, d, p, s, device, lr, weight_decay, reps=5):
    """
    Runs multiple experiments for matrix factorization with BTL preference data,
    scanning multiple hyperparameters.
    """
    num_datapoints = int(n * m * p / 2)
    reconstruction_errors, log_likelihoods, accuracy = [], [], []
    gt_accuracy, gt_log_likelihoods = [], []

    for rep in range(reps):
        start_time = time.time()
        print(f"\n### Experiment {rep+1}/{reps} started with d={d}, p={p}, s={s}, lr={lr}, weight_decay={weight_decay}... ###")

        # Step 1: Generate embeddings
        U, V = generate_embeddings(n, m, d, device)

        # Step 2: Create preference dataset
        dataset = BTLPreferenceDataset(U, V, num_datapoints, scale=s)

        # Step 3: Split dataset into train, validation, and test sets
        train_loader, val_loader, test_loader = split_dataset(dataset, num_datapoints)

        # Step 4: Initialize the matrix factorization model
        model = MatrixFactorization(n, m, d).to(device)

        # Step 5: Define optimizer with scanned parameters
        optimizer = torch.optim.Adam(model.parameters(), lr=lr, weight_decay=weight_decay)

        # Step 6: Train the model
        train_start_time = time.time()
        train_model(model, train_loader, val_loader, optimizer, device)
        print(f"Epoch {rep+1}: Training completed in {time.time() - train_start_time:.4f} seconds")

        # Step 7: Evaluate the model on the test set
        test_loss, test_accuracy = evaluate_model(model, test_loader, device)

        # Step 8: Compute reconstruction error
        reconstruction_error = compute_reconstruction_error(model, U, V, s)

        # Step 9: Compute ground truth metrics
        gt_loss, gt_acc = compute_ground_truth_metrics(test_loader, U, V, device)

        # Store results for this repetition
        reconstruction_errors.append(reconstruction_error)
        log_likelihoods.append(-test_loss)
        accuracy.append(test_accuracy)
        gt_log_likelihoods.append(-gt_loss)
        gt_accuracy.append(gt_acc)

        print(f"### Experiment {rep+1}/{reps} completed in {time.time() - start_time:.4f} seconds ###")

    return reconstruction_errors, log_likelihoods, accuracy, gt_log_likelihoods, gt_accuracy



In [None]:

# Define parameter grid
d_values = [10, 20, 30]  # Different latent dimensions
p_values = [0.3, 0.5, 0.7]  # Different proportions of user-item interactions
s_values = [1.0, 2.0, 3.0]  # Different BTL scale factors
lr_values = [1e-3, 5e-4, 1e-4]  # Different learning rates
weight_decay_values = [1e-3, 5e-3, 1e-2]  # Different weight decay values

# Experiment settings
n_users = 100
m_items = 200
reps = 1

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Run parameter scan
best_accuracy = 0
best_params = None
results_dict = {}

for d, p, s, lr, weight_decay in itertools.product(d_values, p_values, s_values, lr_values, weight_decay_values):
    print(f"\n>>> Running experiment with d={d}, p={p}, s={s}, lr={lr}, weight_decay={weight_decay}...")
    reconstruction_errors, log_likelihoods, accuracy, gt_log_likelihoods, gt_accuracy = run_experiment(
        n_users, m_items, d, p, s, device, lr, weight_decay, reps
    )
    
    mean_accuracy = sum(accuracy) / len(accuracy)
    results_dict[(d, p, s, lr, weight_decay)] = mean_accuracy

    if mean_accuracy > best_accuracy:
        best_accuracy = mean_accuracy
        best_params = (d, p, s, lr, weight_decay)

# Print results
print("\n### Parameter Scan Results ###")
for (d, p, s, lr, wd), acc in results_dict.items():
    print(f"d: {d}, p: {p}, s: {s}, LR: {lr:.0e}, Weight Decay: {wd:.0e} -> Accuracy: {acc:.4f}")

print(f"\n>>> Best Parameters: d={best_params[0]}, p={best_params[1]}, s={best_params[2]}, LR={best_params[3]}, Weight Decay={best_params[4]} with Accuracy={best_accuracy:.4f}")
