In [2]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import random
from scipy.sparse import csr_matrix
from sklearn.preprocessing import normalize

# Ensure FAISS is installed
!pip install faiss-cpu

def extract_ground_truth(interaction_matrix):
    """
    Extracts ground truth user-recipe interactions from the interaction matrix.
    
    Parameters:
    - interaction_matrix (csr_matrix): User-recipe interaction matrix.

    Returns:
    - ground_truth (dict): {user_id: [list of actual recipe interactions]}
    """
    ground_truth = {}
    for user_id in range(interaction_matrix.shape[0]):
        actual_recipes = interaction_matrix[user_id].nonzero()[1].tolist()
        ground_truth[user_id] = actual_recipes
    return ground_truth

def generate_recommendations(interaction_matrix, num_users, k=5):
    """
    Generates top-K recommendations for each user using the recommendation method.

    Parameters:
    - interaction_matrix (csr_matrix): User-recipe interaction matrix.
    - num_users (int): Number of users.
    - k (int): Number of recommendations per user.

    Returns:
    - recommendations (dict): {user_id: [list of recommended recipes]}
    """
    recommendations = {}
    for user_id in range(num_users):
        recommendations[user_id] = recommend_recipes(user_id, k=k)
    return recommendations

def leave_one_out_cross_validation(ground_truth):
    """
    Performs Leave-One-Out Cross-Validation (LOO) by holding out one interaction per user.

    Parameters:
    - ground_truth (dict): User interactions.

    Returns:
    - train_gt (dict): Training set interactions.
    - test_gt (dict): Held-out interactions per user.
    """
    train_gt, test_gt = {}, {}
    for user, items in ground_truth.items():
        if len(items) > 1:
            test_gt[user] = [random.choice(items)]  # Hold out one interaction
            train_gt[user] = list(set(items) - set(test_gt[user]))  # Remaining items
        else:
            train_gt[user] = items
            test_gt[user] = []
    return train_gt, test_gt

def random_negative_sampling(ground_truth, num_recipes, neg_samples=100):
    """
    Generates random negative samples for each user.

    Parameters:
    - ground_truth (dict): User interactions.
    - num_recipes (int): Total number of recipes.
    - neg_samples (int): Number of negative samples per user.

    Returns:
    - negative_samples (dict): {user_id: [list of negative recipe samples]}
    """
    negative_samples = {}
    for user, pos_items in ground_truth.items():
        all_items = set(range(num_recipes))
        neg_items = list(all_items - set(pos_items))
        negative_samples[user] = random.sample(neg_items, min(len(neg_items), neg_samples))
    return negative_samples

# Extract ground truth user interactions
ground_truth = extract_ground_truth(interaction_matrix)

# Apply Leave-One-Out Cross Validation
train_gt, test_gt = leave_one_out_cross_validation(ground_truth)

# Apply Random Negative Sampling
negative_samples = random_negative_sampling(ground_truth, num_recipes)

# Generate recommendations using the existing recommend_recipes function
recommendations = generate_recommendations(interaction_matrix, num_users, k=10)

def compute_evaluation_metrics(recommendations, ground_truth, k_values):
    """
    Computes Precision@K, Hit Rate@K, NDCG@K, and MAP@K.

    Parameters:
    - recommendations (dict): {user_id: [recommended_item_ids]}
    - ground_truth (dict): {user_id: [actual_item_ids]}
    - k_values (list): List of K values.

    Returns:
    - DataFrames for each metric.
    """
    precision_scores = {k: [] for k in k_values}
    hit_rate_scores = {k: [] for k in k_values}
    ndcg_scores = {k: [] for k in k_values}
    map_scores = {k: [] for k in k_values}

    for user, recs in recommendations.items():
        actual_items = set(ground_truth.get(user, []))
        for k in k_values:
            top_k_recs = recs[:k]
            relevant_items = set(top_k_recs) & actual_items

            precision_scores[k].append(len(relevant_items) / k if k > 0 else 0)
            hit_rate_scores[k].append(1 if relevant_items else 0)

            dcg = sum((1 / np.log2(idx + 2)) for idx, item in enumerate(top_k_recs) if item in actual_items)
            idcg = sum((1 / np.log2(idx + 2)) for idx in range(min(len(actual_items), k)))
            ndcg_scores[k].append(dcg / idcg if idcg > 0 else 0)

            ap = sum((len(set(top_k_recs[:i+1]) & actual_items) / (i+1)) for i in range(k) if top_k_recs[i] in actual_items)
            map_scores[k].append(ap / min(k, len(actual_items)) if actual_items else 0)

    df_precision = pd.DataFrame(precision_scores).mean()
    df_hit_rate = pd.DataFrame(hit_rate_scores).mean()
    df_ndcg = pd.DataFrame(ndcg_scores).mean()
    df_map = pd.DataFrame(map_scores).mean()

    return df_precision, df_hit_rate, df_ndcg, df_map

# Define the K values
k_values = np.arange(1, 11)

# Compute evaluation metrics
df_precision, df_hit_rate, df_ndcg, df_map = compute_evaluation_metrics(recommendations, test_gt, k_values)

# Combine results into one DataFrame
df_results = pd.concat([df_precision, df_hit_rate, df_ndcg, df_map], axis=1)
df_results.columns = ["Precision@K", "Hit Rate@K", "NDCG@K", "MAP@K"]
df_results.index.name = "K"

# Display table in notebook
from IPython.display import display
display(df_results)

# Generate Bar Chart for Precision@K
plt.figure(figsize=(10, 6))
df_precision.plot(kind='bar', color='b', alpha=0.7)
plt.xlabel('K')
plt.ylabel('Mean Precision@K')
plt.title('Average Precision@K')
plt.xticks(rotation=0)
plt.grid(axis='y')
plt.savefig("precision_bar_chart.pdf", format="pdf")  # Save as vector graphic
plt.show()

# Generate Line Chart for All Metrics
plt.figure(figsize=(10, 6))
plt.plot(k_values, df_precision, marker='o', linestyle='-', label='Precision@K')
plt.plot(k_values, df_hit_rate, marker='s', linestyle='-', label='Hit Rate@K')
plt.plot(k_values, df_ndcg, marker='^', linestyle='-', label='NDCG@K')
plt.plot(k_values, df_map, marker='d', linestyle='-', label='MAP@K')
plt.xlabel('K')
plt.ylabel('Metric Value')
plt.title('Evaluation Metrics Across K')
plt.legend()
plt.grid()
plt.savefig("evaluation_metrics_graph.pdf", format="pdf")  # Save as vector graphic
plt.show()



NameError: name 'interaction_matrix' is not defined