In [7]:
import pandas as pd
import networkx as nx

from preprocess import load_movielens_data, create_bipartite_graph

datapath = "../data/ml-1m"

rating, movies, users = load_movielens_data(datapath)

user_movie_graph = create_bipartite_graph(rating)

Graph created with 9746 nodes and 1000209 edges.


In [None]:
import pandas as pd
import networkx as nx
import numpy as np
from collections import defaultdict
import random

from random_walk_p3 import rank_movies_with_penalty, perform_random_walks
from evaluation import calculate_top_k_accuracy, calculate_correctly_placed_pairs

def evaluate_recommender(graph, walk_length=3, penalty_type="b", k=10):
    """
    Evaluate the recommender system using random walks and penalties.

    Parameters:
    - graph: Bipartite graph.
    - walk_length: Length of each random walk.
    - penalty_type: Type of penalty for unvisited movies.
    - k: Number of top recommendations to consider.

    Returns:
    - avg_top_k_accuracy: Average Top-K accuracy across users.
    - avg_pair_accuracy: Average pairwise accuracy across users.
    """
    users = [node for node, data in graph.nodes(data=True) if data.get("bipartite") == 0]
    avg_top_k_accuracy = 0
    avg_pair_accuracy = 0
    num_users = 0
    results = []

    cnt = 0
    
    for user in users and cnt <= 20:
        cnt += 1
        # Get movies watched by user
        watched_movies = set(neighbor for neighbor in graph.neighbors(user) if graph.nodes[neighbor].get("bipartite") == 1)

        if len(watched_movies) > 10:
            num_users += 1

            # Split movies into training and hidden sets
            hidden_movies = set(random.sample(sorted(watched_movies), 10))
            training_movies = watched_movies - hidden_movies

            # Create temporary graph with training movies only
            temp_graph = graph.copy()
            for movie in hidden_movies:
                temp_graph.remove_edge(user, movie)

            # Perform random walks and rank movies
            movie_visits = perform_random_walks(temp_graph, start_node=user, walk_length=walk_length)
            recommendations = rank_movies_with_penalty(temp_graph, movie_visits, walk_length=walk_length, penalty_type=penalty_type)

            # Calculate accuracy metrics
            top_k_accuracy = calculate_top_k_accuracy(recommendations, hidden_movies, k)
            pair_accuracy = calculate_correctly_placed_pairs(recommendations, {movie: idx for idx, movie in enumerate(hidden_movies)})

            print(user, top_k_accuracy, pair_accuracy, sep='\n')
            
            avg_top_k_accuracy += top_k_accuracy
            avg_pair_accuracy += pair_accuracy
            results.append({
                "user": user,
                "top_k_accuracy": top_k_accuracy,
                "pair_accuracy": pair_accuracy
            })

    # Average the accuracies
    avg_top_k_accuracy /= num_users
    avg_pair_accuracy /= num_users

    return avg_top_k_accuracy, avg_pair_accuracy, results


avg_top_k_accuracy, avg_pair_accuracy,results = evaluate_recommender(user_movie_graph, walk_length=3, penalty_type="b", k=10)

print(f"Average Top-K Accuracy: {avg_top_k_accuracy:.4f}")
print(f"Average Pairwise Accuracy: {avg_pair_accuracy:.4f}")

In [None]:
import matplotlib.pyplot as plt

def visualize_results(avg_top_k_accuracy, avg_pair_accuracy, results):
    """
    Visualize the accuracy results.

    Parameters:
    - avg_top_k_accuracy: Average Top-K accuracy across users.
    - avg_pair_accuracy: Average pairwise accuracy across users.
    - results: Per-user metrics.
    """
    # Bar chart of per-user accuracies
    users = [result["user"] for result in results]
    top_k_accuracies = [result["top_k_accuracy"] for result in results]
    pair_accuracies = [result["pair_accuracy"] for result in results]

    x = np.arange(len(users))  # User indices

    plt.figure(figsize=(12, 6))
    plt.bar(x - 0.2, top_k_accuracies, width=0.4, label="Top-K Accuracy")
    plt.bar(x + 0.2, pair_accuracies, width=0.4, label="Pairwise Accuracy")

    plt.xlabel("Users")
    plt.ylabel("Accuracy")
    plt.title("Per-User Accuracy Metrics")
    plt.xticks(x, users, rotation=90)
    plt.legend()
    plt.tight_layout()
    plt.show()

    # Overall averages
    print(f"Average Top-K Accuracy: {avg_top_k_accuracy:.4f}")
    print(f"Average Pairwise Accuracy: {avg_pair_accuracy:.4f}")



# Visualize results
visualize_results(avg_top_k_accuracy, avg_pair_accuracy, results)