# Neccessary Imports

In [1]:
import pandas as pd
import numpy as np
from sklearn.neighbors import NearestNeighbors
from scipy.sparse import csr_matrix
from Data_Splitter import split_users, holdout_interactions
from Fairness_Metrics import compute_recGap, compute_compounding_factor
from sklearn.metrics.pairwise import cosine_similarity

# Data Processing

In [2]:
# Build the data
def build_data(df):
    # Split users into train/validation/test groups (based solely on user_id)
    train_users, val_users, test_users = split_users(df, train_frac=0.6, val_frac=0.2, test_frac=0.2)

    # For train users, use all interactions.
    df_train_all = df[df['user_id'].isin(train_users)]

    # For validation and test users, further split each user's interactions (80% train, 20% holdout)
    df_val_all = df[df['user_id'].isin(val_users)]
    df_test_all = df[df['user_id'].isin(test_users)]

    df_val_train, df_val_holdout = holdout_interactions(df_val_all, holdout_frac=0.2)
    df_test_train, df_test_holdout = holdout_interactions(df_test_all, holdout_frac=0.2)

    # Build the overall training data by combining all train users and the training portion for validation/test users.
    df_model_train = pd.concat([df_train_all, df_val_train, df_test_train]).reset_index(drop=True)

    return df_model_train, df_val_holdout, df_test_holdout

In [3]:
# Create a binarized user–track interaction matrix from the training data.
def create_user_track_matrix(df_model_train):
    user_track_matrix = pd.crosstab(df_model_train['user_id'], df_model_train['track_id'])
    user_track_matrix = (user_track_matrix > 0).astype(int)

    # For an item-based KNN recommender, we treat each track (item) as a vector of user interactions.
    # Transpose the matrix and create a sparse representation (tracks x users).
    sparse_item_matrix = csr_matrix(user_track_matrix.T.values)
    track_list = list(user_track_matrix.columns)

    return user_track_matrix, sparse_item_matrix, track_list

# KNNItem Helper Functions

In [4]:
def get_item_similarities(knn_model, track_id, sparse_item_matrix, track_list, n_neighbors=5):
    """
    Retrieve the n most similar tracks for a given track_id.
    """
    if track_id not in track_list:
        return []
    track_index = track_list.index(track_id)
    track_vector = sparse_item_matrix[track_index]
    # Retrieve neighbors (n_neighbors + 1 because the track itself is returned)
    distances, indices = knn_model.kneighbors(track_vector, n_neighbors=n_neighbors + 1)
    # Skip the first index if it is the track itself.
    similar_indices = [i for i in indices.flatten() if i != track_index]
    similar_tracks = [track_list[i] for i in similar_indices]
    return similar_tracks

def get_recommendations_for_user(knn_model, user_id, user_track_matrix, sparse_item_matrix, track_list, top_n=10, n_neighbors=10):
    """
    For a given user, aggregate similar items from the items the user has interacted with in the training data.
    Only recommend items the user has not interacted with.
    """
    if user_id not in user_track_matrix.index:
        return []
    # Get the set of tracks the user has interacted with (training interactions)
    user_history = set(user_track_matrix.loc[user_id][lambda row: row == 1].index)
    
    candidate_scores = {}
    # For each item in the user history, get similar items and sum a simple frequency score.
    for item in user_history:
        similar_items = get_item_similarities(knn_model, item, sparse_item_matrix, track_list, n_neighbors=n_neighbors)
        for sim_item in similar_items:
            if sim_item in user_history:
                continue
            candidate_scores[sim_item] = candidate_scores.get(sim_item, 0) + 1
                
    ranked_items = sorted(candidate_scores.items(), key=lambda x: x[1], reverse=True)
    recommended_items = [item for item, score in ranked_items][:top_n]
    return recommended_items

## KNNItem Evaluation Using NDCG

In [5]:
def ndcg_at_k(relevances, k):
    """
    Compute NDCG@k given a list of binary relevance scores.
    """
    relevances = np.asfarray(relevances)[:k]
    if relevances.size == 0:
        return 0.0
    # Discount factors (log2-based)
    discounts = np.log2(np.arange(2, relevances.size + 2))
    dcg = np.sum(relevances / discounts)
    # Ideal DCG (sorted relevances)
    ideal_relevances = np.sort(relevances)[::-1]
    idcg = np.sum(ideal_relevances / discounts)
    return dcg / idcg if idcg > 0 else 0.0

def evaluate_ndcg(knn_model, df, holdout_df, user_track_matrix, sparse_item_matrix, track_list, top_n=10, n_neighbors=10):
    """
    Evaluate recommendations using NDCG@k for each user in a holdout set.
    Returns overall NDCG and NDCG by gender.
    """
    # Create mapping from user_id to their holdout (ground truth) track_ids.
    user_holdout = holdout_df.groupby('user_id')['track_id'].apply(set).to_dict()
    # Get user genders from the original data (assuming 'gender' column exists).
    user_gender = df.set_index('user_id')['gender'].to_dict()
    
    ndcg_scores = {}    # per user scores
    ndcg_by_gender = {} # aggregated scores per gender
    
    for user, true_items in user_holdout.items():
        # Generate recommendations using the training data.
        recs = get_recommendations_for_user(knn_model, user, user_track_matrix, sparse_item_matrix, track_list, top_n=top_n, n_neighbors=n_neighbors)
        # Binary relevance: 1 if the recommended item is in the holdout set, 0 otherwise.
        relevances = [1 if rec in true_items else 0 for rec in recs]
        ndcg = ndcg_at_k(relevances, top_n)
        ndcg_scores[user] = ndcg
        
        gender = user_gender.get(user, 'unknown')
        if gender not in ndcg_by_gender:
            ndcg_by_gender[gender] = []
        ndcg_by_gender[gender].append(ndcg)
    
    overall_ndcg = np.mean(list(ndcg_scores.values())) if ndcg_scores else 0.0
    avg_ndcg_by_gender = {gender: np.mean(scores) for gender, scores in ndcg_by_gender.items()}

    print("\nSet Evaluation:")
    print(f"Overall NDCG@{top_n}: {overall_ndcg:.4f}")
    print("NDCG by gender:", avg_ndcg_by_gender)

    return overall_ndcg, avg_ndcg_by_gender


def grid_search_validation(knn_model, user_track_matrix, sparse_item_matrix, track_list, df, val_holdout_df, candidate_neighbors, candidate_top_n):
    """
    Perform grid search over n_neighbors and top_n parameters on the validation holdout set.
    Returns the best hyperparameters (those that achieve the highest overall NDCG) and grid search results.
    """
    best_ndcg = -1.0
    best_params = None
    grid_results = []  # Store tuples: (n_neighbors, top_n, overall_ndcg)
    
    for n_neighbors_param in candidate_neighbors:
        for top_n_param in candidate_top_n:
            overall_ndcg_val, _ = evaluate_ndcg(knn_model, df, val_holdout_df, user_track_matrix, sparse_item_matrix, track_list, top_n=top_n_param, n_neighbors=n_neighbors_param)
            grid_results.append((n_neighbors_param, top_n_param, overall_ndcg_val))
            print(f"n_neighbors: {n_neighbors_param}, top_n: {top_n_param} => NDCG: {overall_ndcg_val:.4f}")
            if overall_ndcg_val > best_ndcg:
                best_ndcg = overall_ndcg_val
                best_params = (n_neighbors_param, top_n_param)

    print("\nBest hyperparameters (n_neighbors, top_n):", best_params)
    print("Best overall NDCG on validation set:", best_ndcg)
    
    return best_params, best_ndcg, grid_results

# Other Metrics

In [17]:
def compute_diversity_for_list(recommended_tracks, sparse_item_matrix, track_list):
    """
    Compute intra-list diversity: average dissimilarity among all pairs of recommended tracks.
    Dissimilarity is defined as (1 - cosine similarity) for each pair.
    """
    if len(recommended_tracks) < 2:
        return 0.0

    # Retrieve indices for the recommended tracks from track_list.
    indices = [track_list.index(t) for t in recommended_tracks if t in track_list]
    
    # Extract the corresponding item vectors from the sparse matrix.
    vectors = sparse_item_matrix[indices]
    
    # Compute pairwise cosine similarity.
    sim_matrix = cosine_similarity(vectors)
    
    # Compute average pairwise similarity (ignoring the diagonal)
    sum_similarity = 0.0
    count = 0
    n = len(indices)
    for i in range(n):
        for j in range(i+1, n):
            sum_similarity += sim_matrix[i, j]
            count += 1
    
    avg_similarity = sum_similarity / count if count > 0 else 0.0
    # Diversity is defined as the complement of similarity.
    return 1 - avg_similarity

def evaluate_metrics(knn_model, df, holdout_df, user_track_matrix, sparse_item_matrix, track_list, top_n=10, n_neighbors=10):
    """
    Evaluate recommendations for all users in the holdout set using Recall@10, Coverage@10, and Diversity@10.
    Also, compute the metrics for each gender subgroup.
    
    Parameters:
        knn_model         : The trained KNN model.
        holdout_df        : DataFrame with ground-truth interactions (must include 'user_id' and 'track_id').
        user_track_matrix : Training user-by-item matrix.
        sparse_item_matrix: Sparse representation of item vectors.
        track_list        : List of track IDs.
        df                : The original DataFrame containing user attributes (e.g., 'gender').
        top_n             : Number of recommendations per user.
        n_neighbors       : Number of neighbors to consider.
        
        Returns:
        overall_recall, overall_coverage, overall_diversity, and a dictionary `gender_metrics`
        that contains per-gender averages for recall, coverage, and diversity.
    """
    # Mapping from user_id to their ground truth track_ids.
    user_holdout = holdout_df.groupby('user_id')['track_id'].apply(set).to_dict()
    # Mapping from user_id to gender.
    user_gender = df.set_index('user_id')['gender'].to_dict()
    
    recall_scores = {}
    diversity_scores = {}
    # For coverage per gender, maintain a set of recommended items per gender.
    coverage_by_gender = {}
    
    for user, ground_truth in user_holdout.items():
        recs = get_recommendations_for_user(knn_model, user, user_track_matrix, sparse_item_matrix, track_list, top_n=top_n, n_neighbors=n_neighbors)
        
        # Compute Recall@10.
        if ground_truth:
            recall = len(set(recs).intersection(ground_truth)) / len(ground_truth)
        else:
            recall = 0.0
        recall_scores[user] = recall
        
        # Compute Diversity@10.
        diversity = compute_diversity_for_list(recs, sparse_item_matrix, track_list)
        diversity_scores[user] = diversity
        
        # Collect recommended items per gender for Coverage.
        gender = user_gender.get(user, 'unknown')
        if gender not in coverage_by_gender:
            coverage_by_gender[gender] = set()
        coverage_by_gender[gender].update(recs)
    
    overall_recall = np.mean(list(recall_scores.values()))
    overall_diversity = np.mean(list(diversity_scores.values()))
    overall_coverage = len(set().union(*(set(recs) for recs in coverage_by_gender.values()))) / len(track_list)
    
    # Compute per-gender averages.
    recall_by_gender = {}
    diversity_by_gender = {}
    coverage_metrics_by_gender = {}
    
    # Organize per-user metrics by gender.
    for user, rec in recall_scores.items():
        gender = user_gender.get(user, 'unknown')
        if gender not in recall_by_gender:
            recall_by_gender[gender] = []
        recall_by_gender[gender].append(rec)
    
    for user, div in diversity_scores.items():
        gender = user_gender.get(user, 'unknown')
        if gender not in diversity_by_gender:
            diversity_by_gender[gender] = []
        diversity_by_gender[gender].append(div)
    
    for gender, rec_set in coverage_by_gender.items():
        coverage_metrics_by_gender[gender] = len(rec_set) / len(track_list)
    
    avg_recall_by_gender = {g: np.mean(scores) for g, scores in recall_by_gender.items()}
    avg_diversity_by_gender = {g: np.mean(scores) for g, scores in diversity_by_gender.items()}

    print("\nEvaluation Metrics @ {}:".format(top_n))
    print("\nOverall Recall: {:.4f}".format(overall_recall))
    print("Recall by gender:", avg_recall_by_gender)

    print("\nOverall Coverage: {:.4f}".format(overall_coverage))
    print("Coverage by gender:", coverage_metrics_by_gender)

    print("\nOverall Diversity: {:.4f}".format(overall_diversity))
    print("Diversity by gender:", avg_diversity_by_gender)
    
    gender_metrics = {
        'recall': avg_recall_by_gender,
        'coverage': coverage_metrics_by_gender,
        'diversity': avg_diversity_by_gender
    }
    
    return overall_recall, overall_coverage, overall_diversity, gender_metrics

def recGap_CF_results(df, gender_metrics):
    for key, value in gender_metrics.items():
        print(f"\nFor the {key} metric")
        compute_recGap(value)
        compute_compounding_factor(df, value)

# Main Functions

In [18]:
def Evaluate_KNN(knn_model, user_track_matrix, sparse_item_matrix, track_list, df, df_val_holdout, df_test_holdout):
    # Define candidate hyperparameters.
    candidate_neighbors = [5, 10, 15]
    candidate_top_n = [10]
    best_params, best_ndcg, grid_results = grid_search_validation(knn_model, user_track_matrix, sparse_item_matrix, track_list, df, df_val_holdout, candidate_neighbors, candidate_top_n)
    best_n_neighbors, best_top_n = best_params
    overall_ndcg_test, ndcg_by_gender_test = evaluate_ndcg(knn_model, df, df_test_holdout, user_track_matrix, sparse_item_matrix, track_list, top_n=best_top_n, n_neighbors=best_n_neighbors)
    overall_recall, overall_coverage, overall_diversity, gender_metrics = evaluate_metrics(knn_model, df, df_test_holdout, user_track_matrix, sparse_item_matrix, track_list, top_n=10, n_neighbors=10)
    gender_metrics['ndcg'] = ndcg_by_gender_test 
    print(gender_metrics)
    recGap_CF_results(df, gender_metrics)
    return

In [8]:
def build_and_evaluate_knn(df):
    df_model_train, df_val_holdout, df_test_holdout = build_data(df)
    user_track_matrix, sparse_item_matrix, track_list = create_user_track_matrix(df_model_train)

    # Train a KNN model on the item (track) vectors.
    knn_model = NearestNeighbors(metric='cosine', algorithm='brute')
    knn_model.fit(sparse_item_matrix)

    Evaluate_KNN(knn_model, user_track_matrix, sparse_item_matrix, track_list, df, df_val_holdout, df_test_holdout)

    return

# Running The Algorithm

In [11]:
# Load Data
df = pd.read_csv('data/LFM-1b-DemoBiasSub-10k.csv', header=0)
df_SMOTE = pd.read_csv('data/LFM-1b-DemoBiasSub-10k-SMOTE.csv', header=0)
df_resampled = pd.read_csv('data/LFM-1b-DemoBiasSub-10k-Resampled.csv', header=0)

In [22]:
build_and_evaluate_knn(df)


Set Evaluation:
Overall NDCG@10: 0.1665
NDCG by gender: {'m': 0.16873887963637296, 'f': 0.16029092281791665}
n_neighbors: 5, top_n: 10 => NDCG: 0.1665

Set Evaluation:
Overall NDCG@10: 0.1613
NDCG by gender: {'m': 0.16309032354503913, 'f': 0.15645769099064857}
n_neighbors: 10, top_n: 10 => NDCG: 0.1613

Set Evaluation:
Overall NDCG@10: 0.1591
NDCG by gender: {'m': 0.16149819846488775, 'f': 0.15255109670918537}
n_neighbors: 15, top_n: 10 => NDCG: 0.1591

Best hyperparameters (n_neighbors, top_n): (5, 10)
Best overall NDCG on validation set: 0.1664797411405441

Set Evaluation:
Overall NDCG@10: 0.1624
NDCG by gender: {'m': 0.16615608516935818, 'f': 0.15200149805676685}

Evaluation Metrics @ 10:

Overall Recall: 0.1266
Recall by gender: {'m': 0.12713856915277838, 'f': 0.12509383567976654}

Overall Coverage: 0.8301
Coverage by gender: {'m': 0.7821, 'f': 0.5344}

Overall Diversity: 0.9358
Diversity by gender: {'m': 0.9352221131847291, 'f': 0.9374286046984478}
{'recall': {'m': 0.127138569152

In [23]:
build_and_evaluate_knn(df_resampled)


Set Evaluation:
Overall NDCG@10: 0.1311
NDCG by gender: {'m': 0.17106645297937007, 'f': 0.03281492641683718}
n_neighbors: 5, top_n: 10 => NDCG: 0.1311

Set Evaluation:
Overall NDCG@10: 0.1241
NDCG by gender: {'m': 0.16223132004611746, 'f': 0.030498970041845486}
n_neighbors: 10, top_n: 10 => NDCG: 0.1241

Set Evaluation:
Overall NDCG@10: 0.1256
NDCG by gender: {'m': 0.16461395505395032, 'f': 0.02958040476094506}
n_neighbors: 15, top_n: 10 => NDCG: 0.1256

Best hyperparameters (n_neighbors, top_n): (5, 10)
Best overall NDCG on validation set: 0.13109800217219233

Set Evaluation:
Overall NDCG@10: 0.1284
NDCG by gender: {'m': 0.166356275771398, 'f': 0.03002186182458126}

Evaluation Metrics @ 10:

Overall Recall: 0.0970
Recall by gender: {'m': 0.13003709918633072, 'f': 0.011583962980876501}

Overall Coverage: 0.8312
Coverage by gender: {'m': 0.7819, 'f': 0.52}

Overall Diversity: 0.9362
Diversity by gender: {'m': 0.9357073085682485, 'f': 0.9375656741996905}
{'recall': {'m': 0.1300370991863

In [24]:
build_and_evaluate_knn(df_SMOTE)

  num_cells = num_rows * num_columns


IndexError: index 222388562 is out of bounds for axis 0 with size 222325840