In [None]:
import numpy as np
import pandas as pd
import networkx as nx
import os
from joblib import Parallel, delayed
os.system("taskset -p 0xff %d" % os.getpid())

### Create Graph as dictionary

In [None]:
df = pd.read_csv('./data/selected_triplet_new.csv') 
df.head()

In [None]:
df.sort_values('weight', ascending=False)

In [None]:
user_dic = {}
song_dic = {}
for edge in df.values.tolist():
    user_id, song_id, listened_count = edge[0], edge[1], edge[2]
    if user_id not in user_dic:
        user_dic[user_id] = {}
    user_dic[user_id][song_id] = listened_count    
    
    if song_id not in song_dic:
        song_dic[song_id] = {}
    song_dic[song_id][user_id] = listened_count


In [None]:
for user, songs in user_dic.items():
    print(user, songs)
    break

In [None]:
'''
Calculates the sum of the weights of an edgeslist of the bipartite graph
Takes in a list of edges (the ones adjacent to the user or the song.)
'''

def cal_adj_weights(dic):
    total_weight = 0.0
    for key, weight in dic.items():
        total_weight += weight

    return (total_weight)

'''
Finds the average number of song listens by a user u.
bipart_g: is a weighted bipartite networkx Graph, where the weight = # of listens of song s by user u
It returns 1 / |S_u| Σ r_(u,s) => the average listens for a user u
'''

def mean_ru(user_u):
    songs_u = user_dic[user_u]
    song_count = len(songs_u) # R_u
    listen_count = cal_adj_weights(songs_u)
    return (listen_count / song_count)


'''
Finds the 2-step random walk recommendation power for an arbitrary user v from a choosen user u
We know both users u,v
This is P(u -> s -> v), as this is a bipartite graph
v is always 2 steps from u
P(Transition from user u to song s) = r_(u,s) / R_u

P(Transition from song s to user v) = r_(v,s) / R_s
'''
def rec_power_u_v(user_u, user_v):
    songs_u = user_dic[user_u]
    songs_v = user_dic[user_v]
    
    song_count_u = len(songs_u)
    listen_count_u = cal_adj_weights(songs_u) # R_u
    
    running_value = 0.0
    
    for s in songs_u:
        users_s = song_dic[s]
        listen_count_s = cal_adj_weights(song_dic[s]) # R_s
        
        if user_v in users_s:
            user_u_listens_s = user_dic[user_u][s] # r_{u,s}
            user_v_listens_s = user_dic[user_v][s] # r_{v,s}
            running_value += user_v_listens_s * user_u_listens_s / listen_count_s
    return (running_value / listen_count_u)



'''
Predicts the number of listens that user u will give to song s
'''
def predict_listens(user_u, song_s, total_users):
    u_bar = mean_ru(user_u)
    running_value = 0.0
    for v in total_users:
        if v in song_dic[song_s]:
            v_bar = mean_ru(v)
            user_v_listens_s = user_dic[v][song_s]
            rp_u_v = rec_power_u_v(user_u, v)
            running_value += (rp_u_v * (user_v_listens_s - v_bar))            
    return u_bar + running_value

In [None]:
rating = predict_listens('a263000355e6a46de29ec637820771ac7620369f', 'SONSTND12AB018516E', list(user_dic.keys()))
print(rating)

In [None]:
def predict_wrapper(user_u, song_s, total_users):
    pred_listen = predict_listens(user_u, song_s, total_users)
    return (song_s, pred_listen)

'''
Parallel wrapper to predict the number of listens that a user will listen to a given song
n = number of users to go through
'''
def song_wrapper(user, n, song_dic, user_dic):
    song_dic_subset = list(song_dic.keys())[:n]
    listens = Parallel(n_jobs = 10, verbose = 20, prefer='processes', backend = 'threading')(delayed(predict_wrapper)(user_u = user, song_s = song, total_users = user_dic) for song in song_dic_subset)
    return listens

In [None]:
data = song_wrapper('a263000355e6a46de29ec637820771ac7620369f', 100, song_dic, user_dic)

In [None]:
print(sorted(data, key = lambda x: x[1]))