In [19]:
import networkx as nx
from networkx.algorithms import bipartite
import numpy as np
import random
from scipy.stats import spearmanr
from statsmodels.stats.multitest import multipletests

# Loading data pathes and I/O functions from script
from scripts.io import load_movie_titles, load_raw_bipartite, save_projection, load_projection, save_edgelist, projection_path

# Loading reccomendation function
from scripts.recommend import evaluation_recommendation, sort_average_weight

## Loading dicts and graphs

In [2]:
G = load_raw_bipartite("full_bipartite.p")

simple_weights_movies = load_projection("simple_weights_movies.p")
rating_allocation_movies = load_projection("rating_allocation_movies.p")

Graph loaded.
Projection loaded.
Projection loaded.


# Evaluation algorithm
    Cross-validation of movie recommendations from graph with empirical ratings by users.

    For each user:
        1. Sample k liked (highest rated) movies, M, from list of rated movies, L ((movie_node, rating) list).
        2. Get movie recommendations, R ((movie_node, average_weight) list), based on M, for each movie in L.
        3. Sort R and L by average_weight/rating, then discard movie_node from both.
        5. Compute and store spearman rank correlation between R and L. If correlation has p>0.05, it's assumed to be 0.
        
    Output average rank correlation for all users.

In [30]:
# Testing p-value correction
correlations = [0.0001, 0.001, 0.01, 0.02, 0.03, 0.04, 0.05, 0.1]
_, corrected_correlations, _, _ = multipletests(correlations, method='fdr_bh')

# Print the original and corrected correlations
print("Original Correlations:", correlations)
print("Corrected Correlations:", corrected_correlations)

Original Correlations: [0.0001, 0.001, 0.01, 0.02, 0.03, 0.04, 0.05, 0.1]
Corrected Correlations: [0.0008     0.004      0.02666667 0.04       0.048      0.05333333
 0.05714286 0.1       ]


In [7]:
def evaluate(movie_graph, k=3, G=G, seed=None, default_rating='zero'):
    """
    Cross-validation of movie recommendations from graph with empirical ratings of users.
    
    parameters:
        movie_graph (nx.Graph or nx.DiGraph) graph from which to recommend movies by highest weight neighbors
        k (int) number of liked movies to sample from each user for recommendation
        G (nx.Graph) bipartite graph of users and movies
        seed (int) enables reproducibility of evaluation randomness
        default_rating (str) determines for each user the assumed rating of non-rated movies. Can be 'zero' or 'mean' (user's average rating).
    returns:
        average_correlation 
    """

    # Validating and setting parameters
    if type(movie_graph) not in [nx.Graph, nx.DiGraph] or type(G) not in [nx.Graph, nx.DiGraph] or type(k) != int or type(seed) != int:
        raise TypeError("Called evaluate() with argument of wrong type.")
    if default_rating not in ['zero', 'mean']:
        raise ValueError("default_rating argument must be 'zero' or 'mean'. Default is 'zero'.")
    if default_rating == 'zero':
            default_rating = 0
    if seed:
        random.seed(seed)

    # Getting users and movies from bipartite graph (this could be moved outside evaluate() to save time)
    user_nodes, movie_nodes = nx.algorithms.bipartite.basic.sets(G)

    # Correlation (and p-value) between each user's ratings and recommendations
    correlations = []  
    p_values = []
    
    # Proportion of correlations which have p>0.05
    not_significant_correlations = 0

    ##### For each user #####
    for user_node in user_nodes:

        ##### 1. Sample k liked (highest rated) movies #####
        
        # All rated movies and ratings of user
        movie_rating_tuples = [(movie_node,attr_dict['weight']) for movie_node, attr_dict in dict(G[user_node]).items()]  

        # Determining default rating

        if default_rating == 'mean':
            ratings = [weight for movie_node, weight in movie_rating_tuples]
            default_rating = np.mean(ratings)

        # Adding not rated movies with rating default_rating
        not_rated_movies = [movie for movie in movie_nodes if movie not in rated_movies]

        
        # Movies are shuffled, to randomize order of movies with the same rating
        random.shuffle(movie_rating_tuples)  
        
        # Movies are sorted by rating
        movie_rating_tuples = sorted(movie_rating_tuples, reverse=True, key=lambda x:x[1])
        rated_movies = [movie_node for movie_node, weight in movie_rating_tuples]

        
            
        
        # k highest rated movies are sampled
        k_most_liked_movie_nodes = rated_movies[:k]
        
        # Removing sampled nodes from liked movies
        rated_movies = rated_movies[k:]
        
        
        ##### 2. Get movie recommendations #####
        
        recommended_movie_nodes = evaluation_recommendation(movie_graph, k_most_liked_movie_nodes)

        # Extending recommendations with missing nodes (this is necessary in simple weights, since not all movies are connected)
        if len(recommended_movie_nodes) < len(movie_graph.nodes):
            missing_nodes = [node for node in movie_graph.nodes if node not in recommended_movie_nodes]
            random.shuffle(missing_nodes)  # randomizing order of non-recommended nodes
            recommended_movie_nodes.extend(missing_nodes)
        
        
        
        assert len(liked_movie_nodes) == len(recommended_movie_nodes), f"Lengths don't match: {len(liked_movie_nodes)} != {len(recommended_movie_nodes)}."
        assert set(liked_movie_nodes) == set(recommended_movie_nodes), "Node sets don't match."

        
        # ##### 3. Sort R and L by node #####
        # liked_movie_nodes = sorted(liked_movie_nodes, key=lambda x:x[0])
        # recommended_movie_nodes = sorted(recommended_movie_nodes, key=lambda x:x[0])

        # Considering only top k nodes
        # liked_movie_nodes = liked_movie_nodes[:k]
        # recommended_movie_nodes = recommended_movie_nodes[:k]
        
        ##### 4. Compute and store spearman rank correlation (and p value) between R and L.
        # Null hypothesis is that the correlation is not positive, alternative hypothesis is that correlation is positive.
        # Because some people have rated few movies, we use permutation test like in 'Examples' at https://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.spearmanr.html
        
        ref = stats.permutation_test((x,), statistic, alternative='greater', permutation_type='pairings')
        
        r, p = spearmanr(liked_movie_nodes, recommended_movie_nodes, alternative='greater')
        correlations.append(r)
        p_values.append(p)
        
    
    _, corrected_p_values, _, _ = multipletests(p_values, method='fdr_bh')  # Using Benjamini/Hochberg FDR correction (because we test many pre-detemined hypotheses) 

    # if p>0.05, correlation is set to 0
    for i, p_value in enumerate(corrected_p_values):
        if p_value>0.05:
            correlations[i] = 0

    # Computing and printing result
    mean_correlation = np.mean(correlations)
    std_correlation = np.std(correlations)
    non_significant_proportion = np.mean(correlations==0)
    print(f"Average correlation ± std: {mean_correlation}±{std_correlation}, non-significant: {non_significant_proportion}")
 
def statistic(x):  # explore all possible pairings by permuting `x`
    rs = stats.spearmanr(x, y).statistic  # ignore pvalue
    transformed = rs * np.sqrt(dof / ((rs+1.0)*(1.0-rs)))
    return transformed
    


In [8]:
# evaluate(simple_weights_movies, k=3, seed=0)

AssertionError: Lengths don't match: 21 != 1682.

In [54]:
r, p = spearmanr([1,2,3,4,5,6,7,8,9],[2,3,1,4,5,6,7,8,9], alternative='greater')
r, p

(0.9500000000000001, 4.381261982543088e-05)

In [49]:
correlations = [0, 0.1, 0.5, 0.3, 0.3, 0.0]

mean_correlation = np.mean(correlations)
std_correlation = np.std(correlations)
non_significant_proportion = np.mean(np.array(correlations)==0)
print(f"Average correlation ± std: {mean_correlation:.3f} ± {std_correlation:.3f}, {non_significant_proportion:.0%} non-significant.")

Average correlation ± std: 0.200 ± 0.183, 33% non-significant.
