In [1]:
import networkx as nx
from networkx.algorithms import bipartite
import numpy as np
import random
from scipy.stats import spearmanr

# Loading data pathes and I/O functions from script
from scripts.io import load_movie_titles, load_raw_bipartite, save_projection, load_projection, save_edgelist, projection_path

# Loading reccomendation function
from scripts.recommend import evaluation_recommendation, sort_average_weight

## Loading dicts and graphs

In [2]:
G = load_raw_bipartite("full_bipartite.p")

simple_weights_movies = load_projection("simple_weights_movies.p")
rating_allocation_movies = load_projection("rating_allocation_movies.p")

Graph loaded.
Projection loaded.
Projection loaded.


# Evaluation
    Cross-validation of movie recommendations from graph with empirical ratings by users.

    For each user:
        1. Sample k liked (highest rated) movies, M, from list of liked movies, L ((movie_node, weight) list).
        2. Get movie recommendations, R ((movie_node, weight) list), for all movie nodes, based on M.
        3. Sort R and L by movie_node, then discard movie_node from both.
        5. Compute and store spearman rank correlation between R and L.
        
    Output average rank correlation for all users.

In [3]:
def evaluate(movie_graph, k=3, G=G, seed=None):
    """
    Cross-validation of movie recommendations from graph with empirical ratings of users.
    
    parameters:
        movie_graph (nx.Graph or nx.DiGraph) graph from which to recommend movies by highest weight neighbors
        k (int) number of liked movies to sample from each user for recommendation
        G (nx.Graph) bipartite graph of users and movies
        seed (int) enables reproducibility of evaluation randomness
    
    returns:
        average_correlation 
    """
  
    if seed:
        random.seed(seed)

    user_nodes, movie_nodes = nx.algorithms.bipartite.basic.sets(G)

    correlations = []  # Correlation of each user's ratings and recommendations

    # Proportion of correlations which have p>0.05
    not_significant_correlations = 0

    ##### For each user #####
    for user_node in user_nodes:

        ##### 1. Sample k liked (highest rated) movies #####
        
        # All rated movies and ratings of user
        movie_rating_tuples = [(movie_node,attr_dict['weight']) for movie_node, attr_dict in dict(G[user_node]).items()]  
        
        # Movies are shuffled, to randomize order of movies with the same rating
        random.shuffle(movie_rating_tuples)  
        
        # Movies are sorted by rating
        movie_rating_tuples = sorted(movie_rating_tuples, reverse=True, key=lambda x:x[1])  

        # k highest rated movies are sampled
        liked_movie_nodes = [movie_node for movie_node, weight in movie_rating_tuples]
        k_most_liked_movie_nodes = liked_movie_nodes[:k]
        
        ##### 2. Get movie recommendations #####
        
        recommended_movie_nodes = evaluation_recommendation(movie_graph, k_most_liked_movie_nodes)

        # Extending recommendations with missing nodes (this is necessary in simple weights, since not all movies are connected)
        if len(recommended_movie_nodes) < len(movie_graph.nodes):
            missing_nodes = [node for node in movie_graph.nodes if node not in recommended_movie_nodes]
            random.shuffle(missing_nodes)  # randomizing order 
            recommended_movie_nodes.extend(missing_nodes)
        

        
        # Removing sampled nodes from liked movies
        liked_movie_nodes = [node for node in liked_movie_nodes if node not in k_most_liked_movie_nodes]

        
        
        assert len(liked_movie_nodes) == len(recommended_movie_nodes), f"{len(liked_movie_nodes)}, {len(recommended_movie_nodes)}"
    
        ##### 3. Sort R and L by node #####
        liked_movie_nodes = sorted(liked_movie_nodes, key=lambda x:x[0])
        recommended_movie_nodes = sorted(recommended_movie_nodes, key=lambda x:x[0])

        # Considering only top k nodes
        # liked_movie_nodes = liked_movie_nodes[:k]
        # recommended_movie_nodes = recommended_movie_nodes[:k]
        # assert len(liked_movie_nodes) == len(recommended_movie_nodes), f"{len(liked_movie_nodes)}, {len(recommended_movie_nodes)}"

        
        ##### 4. Compute and store spearman rank correlation (and p value) between R and L.
        r, _ = spearmanr(liked_movie_nodes, recommended_movie_nodes)
        if p > 0.05:
            r = 0
        correlations.append(r)

    ##### Print average correlation between recommendations and preferences.
    print(f"avg. correlation: {np.mean(correlations)}, sd: {np.std(correlations)}, not-significant: {np.mean(correlations==0)}")

In [4]:
evaluate(simple_weights_movies, k=3, seed=0)

AssertionError: 21, 1655

In [16]:
user_nodes, movie_nodes = nx.algorithms.bipartite.basic.sets(G)
movie_graph = simple_weights_movies
k_most_liked_movie_nodes = [1,2,3]

recommended_movie_nodes = evaluation_recommendation(movie_graph, k_most_liked_movie_nodes)

if len(recommended_movie_nodes) < len(movie_graph.nodes):
    missing_nodes = [node for node in movie_graph.nodes if node not in recommended_movie_nodes]
    random.shuffle(missing_nodes)  # randomizing order 
    recommended_movie_nodes.extend(missing_nodes)

assert len(recommended_movie_nodes) == len(movie_graph.nodes)
assert set(recommended_movie_nodes) == set(movie_graph.nodes)
len(recommended_movie_nodes), len(movie_graph.nodes)

(1682, 1682)