In [37]:
import numpy as np
import pandas as pd
import scipy.sparse as sp
from collections import Counter
import random

# Generating rating data randomly

In [38]:
np.random.seed(42)  # Seed fixed to be reproduced
n_users = 50   
n_items = 100
sparsity = 0.9  # 90% of entries empty

# 0 to 5, where 0 means it's not rated
ratings = (sp.rand(n_users, n_items, density = 1-sparsity)  # Uniformly distributed values.
           .multiply(5)                                     # From 0-1 to 0-5 floats
           .tocsr())                                        # Sparse format

ratings.data = np.ceil(ratings.data)                        # Round to integer numbers

print("Shape: ", ratings.shape), 
print("Shape: ", ratings.nnz)                               # Number of stored values, including explicit zeros.

ratings = pd.DataFrame(ratings.toarray())
display(ratings)

Shape:  (50, 100)
Shape:  500


Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,90,91,92,93,94,95,96,97,98,99
0,0.0,0.0,3.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,0.0,0.0,0.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,2.0,3.0,0.0,0.0,0.0,0.0,0.0
2,0.0,0.0,0.0,0.0,0.0,3.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,3.0,0.0,3.0,0.0
3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,0.0,0.0,0.0,0.0,0.0,3.0,0.0,1.0,0.0,0.0,...,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
6,0.0,0.0,3.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
7,0.0,0.0,0.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,4.0,0.0,0.0,0.0,0.0,0.0
8,4.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,3.0,0.0,0.0,0.0,0.0
9,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


# Random walker

In [39]:
def random_walk_recomendation(user_id, ratings, r = 10):
    '''
    user_id -> User where to start the random walk
    r -> Number of item recommendations per walk
    ratings -> Matrix ratings in a df format
    '''
    walk_recommendations = []
    current_user_id = user_id

    while len(walk_recommendations) < r:
        
        # Normalizing current user rates
        user_ratings = ratings.loc[current_user_id, :]
        norm_user_ratings = user_ratings / user_ratings.sum()
        
        # Choose an item based on normalized ratings
        current_item = np.random.choice(norm_user_ratings.index, p=norm_user_ratings.values)
        
        # Add it to our recommendation list if not rated nor already recommended in this walk
        if ratings.loc[user_id, current_item] == 0 and current_item not in walk_recommendations:
            walk_recommendations.append(current_item)

        # Filter the users who rated this item
        users_who_rated = ratings[ratings[current_item] > 0].index.tolist()
        
        # If none rated, rerun to choose anoter item
        # TODO Theoreticaly an infinite loop error possible
        if not users_who_rated:
            continue
        
        # Choose randomly a new user from those that rated this item
        # TODO Adopt weights to choose
        current_user_id = random.choice(users_who_rated)        
    
    return walk_recommendations

def get_top_N_recommendations(user_id, N, ratings):
    recommendations = []
    
    # Realize 1k random walks and append it to our recommendations
    for _ in range(1000):
        recommendations.extend(random_walk_recomendation(user_id , ratings, r = 10))
    
    # Counting frequency of each recommendation
    recommendations_frequency = Counter(recommendations)
    
    # Get top N
    top_N_recommendations = [item for item, _ in recommendations_frequency.most_common(N)]
    
    return top_N_recommendations, recommendations_frequency


In [40]:
# Teste da função de recomendação com o usuário 1 e N=5
top_N , rf = get_top_N_recommendations(user_id = 1, N = 5, ratings = ratings)
top_N

[71, 0, 78, 1, 28]

# Pixie extra implementations
- Biasing the Pixie Random Walk
- Multiple Query Pins with Weights.
- Multi-hit Booster
- Early Stopping