In [75]:
import networkx as nx
from networkx.algorithms import bipartite
import numpy as np
import random
from scipy.stats import spearmanr

# Loading data pathes and I/O functions from script
from scripts.io import load_movie_titles, load_raw_bipartite, save_projection, load_projection, save_edgelist, projection_path

# Loading reccomendation function
from scripts.recommend import k_recommend_from_list, sort_average_weight

## Loading dicts and graphs

In [4]:
title_dict, node_dict = load_movie_titles("movie-titles.txt")

G = load_raw_bipartite("full_bipartite.p")

simple_weights_movies = load_projection("simple_weights_movies.p")
rating_allocation_movies = load_projection("rating_allocation_movies.p")

Movie titles loaded.
Graph loaded.
Projection loaded.
Projection loaded.


# Evaluation
    Cross-validation of movie recommendations from graph with empirical ratings by users.

    For each user:
        1. Sample k liked (highest rated) movies, M, from list of liked movies, L ((movie_node, weight) list).
        2. Get movie recommendations, R ((movie_node, weight) list), for all movie nodes, based on M.
        3. Sort R and L by movie_node, then discard movie_node from both.
        5. Compute and store spearman rank correlation between R and L.
        
    Output average rank correlation for all users.

In [146]:
def evaluate(movie_graph, k=3, G=G, seed=None):
    """
    Cross-validation of movie recommendations from graph with empirical ratings of users.
    
    params:
        movie_graph (nx.Graph or nx.DiGraph) graph from which to recommend movies by highest weight neighbors
        k (int) number of liked movies to sample from each user for recommendation
        G (nx.Graph) bipartite graph of users and movies
    
    returns:
        average_correlation 
    """
    
    # Optional reproducibility of evaluation using random seed
    if seed:
        random.seed(seed)

    user_nodes, movie_nodes = nx.algorithms.bipartite.basic.sets(G)

    # list of correlation of each user's ratings and recommendations
    correlations = []

    # For each user
    for user_node in user_nodes:

        ##### 1. Sample k liked (highest rated) movies, M, from list of liked movies, L ((movie_node, weight) list)

        movie_rating_tuples = [(movie_node,attr_dict['weight']) for movie_node, attr_dict in dict(G[user_node]).items()]  # All rated movies and ratings of user
        
        random.shuffle(movie_rating_tuples)  # Movies are shuffled, to randomize order of movies with the same rating
        
        movie_rating_tuples = sorted(movie_rating_tuples, reverse=True, key=lambda x:x[1])  # Movies are sorted by rating
        
        liked_movies = [movie_node for movie_node, weight in movie_rating_tuples[:k]]  # k highest rated movies are sampled

        
        ##### 2. Get movie recommendations, R ((title, weight) list), for all movie titles, based on M.
        
        k_recommend_from_list(k, movie_graph, liked_movies)
        
        ##### 3. Sort R and L by title, then discard the title from both.

        
        ##### 4. Compute and store spearman rank correlation between R and L.

    ##### Return average correlation between recommendations and preferences.

        



In [96]:
movie_ratings = [(node,attr_dict['weight']) for node, attr_dict in dict(G[61]).items()]
# rating_values = [r[1]['weight'] for r in movie_ratings]
# np.percentile(rating_values, 50)
# movie_ratings

In [143]:
k = 3


[(10645, 5), (10763, 5), (10862, 5)]

In [98]:
  # sort_average_weight
# sorted(movie_ratings, reverse=True, key=lambda x:x[1])
# movie_ratings