In [None]:
#COLLABORATIVE BPR ALS FILTERING CELL

#CALCULATION OF CSR MATRIX
#CSR MATRIX CREATION FOR IMPLICIT RATINGS
import implicit
import random
from scipy.sparse import csr_matrix

# Group interactions
users_books = df_train.groupby(by=['person_id','title']).apply(lambda x: 1).to_dict() 

# Number of different Users / Locations
nu = len(df_train['person_id'].unique())
nl = len(df_train['title'].unique())

#Build Rating matrix
row, col = zip(*(users_books.keys())) #row-> users,  col-> books

map_u = dict(zip(df_train['person_id'].unique(),range(nu)))
map_l = dict(zip(df_train['title'].unique(),range(nl)))

row_idx = [map_u[u] for u in row]
col_idx = [map_l[l] for l in col]
data = np.array(list(users_books.values()), dtype=np.float32)

rating_matrix = csr_matrix((data, (row_idx, col_idx)), shape=(nu,nl)) #-matrix

#Create BPR ALS models with Implicit

In [None]:
#BPR EVALUATION
#N indicates the number of considered users
#BPR EVALUATION
#N indicates the number of considered users

#TO RETRIEVE EVALUATION AND TEST AVERAGE HIT USERS AND RECOMMENDATIONS AND HIT GENRES
possible_latent_factors = [10, 20, 30, 40, 50, 60, 70, 80, 90, 100]

def evaluate_users_recomms_bpr(dataset, csr_matrix, mapping_users, mapping_books, N, number_of_validations, als_or_bpr):
    list_users = dataset['person_id'].unique()
    user_genre_counter = {}
    inverse_mapping_books = {v: k for k, v in mapping_books.items()}
    total_average_users = []
    total_average_recomms = []
    #PICK N RANDOM USERS FOR number_of_validations times
    for i in range(0, number_of_validations):
        recomms = []
        users = []
        for index, user in enumerate(list_users):
            if index % 10000 == 0:
                print(index)
            user_flag = 0 #Hit user or not?
            recomm_count = 0 #Variables to keep track of recommendations
       
            if als_or_bpr == 'bpr':
                recommendations = model_bpr.recommend(mapping_users[user], csr_matrix[mapping_users[user]], N=20)
            elif als_or_bpr == 'als':
                recommendations = model_als.recommend(mapping_users[user], csr_matrix[mapping_users[user]], N=20)
            
            read_book_list = np.asarray(dataset[dataset['person_id'] == user].title)
            
            if read_book_list.size == 0:
                continue
            recomm_list_titles = np.asarray([inverse_mapping_books[item] for item in recommendations[0]])
            
            intersection = np.intersect1d(read_book_list, recomm_list_titles)
            
            if np.any(intersection):
                user_flag = 1
                recomm_count = intersection.size
            
            for book in intersection:
                genre_book = dataset[dataset['title'] == book].genre.to_list()[0]
                genre_dict = eval(genre_book)
                for genre in genre_dict:
                    if genre in user_genre_counter.keys():
                        user_genre_counter[genre] += 1
                    else:
                        user_genre_counter[genre] = 1
                        
            recomms.append(recomm_count)
            users.append(user_flag)
        recomms = np.mean(np.asarray(recomms))
        users = np.mean(np.asarray(users))
        total_average_users.append(users)
        total_average_recomms.append(recomms)
        print(f"Average users for iteration {i} was {users}")
        print(f"Average recomms for iteration {i} was {recomms}")
    return np.mean(np.asarray(total_average_recomms)), np.mean(np.asarray(total_average_users)), user_genre_counter

average_recomm_values_factors_bpr = []
average_users_values_factors_bpr = []
average_recomm_values_factors_als = []
average_users_values_factors_als = []
for latent in [1]:
    model_bpr = implicit.bpr.BayesianPersonalizedRanking(factors=100, iterations=300)
    model_als = implicit.als.AlternatingLeastSquares(factors=30, iterations=300)
    #train the model on a sparse matrix of item/user/confidence weights
    model_bpr.fit(rating_matrix, show_progress=False)
    model_als.fit(rating_matrix, show_progress=False)
    avg_recomms_als, avg_users_als, user_genre_counter_als = evaluate_users_recomms_bpr(df_test, rating_matrix, map_u, map_l, 1000, 1, 'als')
    avg_recomms_bpr, avg_users_bpr, user_genre_counter_bpr = evaluate_users_recomms_bpr(df_test, rating_matrix, map_u, map_l, 1000, 1, 'bpr')
    average_recomm_values_factors_bpr.append(avg_recomms_bpr)
    average_recomm_values_factors_als.append(avg_recomms_als)
    average_users_values_factors_bpr.append(avg_users_bpr)
    average_users_values_factors_als.append(avg_users_als)
    
print(average_users_values_factors_als, average_users_values_factors_bpr, average_recomm_values_factors_als, average_recomm_values_factors_bpr)
print(user_genre_counter_bpr)
print(user_genre_counter_als)

In [None]:
#AVERAGE RANK ALS BPR 
def avg_rank_als_bpr(dataset, csr_matrix, mapping_users, mapping_books, N, number_of_validations, als_or_bpr):
    list_users = dataset['person_id'].unique().tolist()
    ranks_total = []
    list_books = df_train['title'].unique().tolist()
    inverse_mapping_books = {v: k for k, v in mapping_books.items()}
    
    total_average_rank = []
    
    #PICK N RANDOM USERS FOR number_of_validations times
    for i in range(0, number_of_validations):
        avg_rank = []
        for index, user in enumerate(list_users):
            if index % 10000 == 0:
                print(index)
                
            read_book_list = dataset[dataset['person_id'] == user].title.to_list()
            
            if als_or_bpr == 'bpr':
                recommendations = model_bpr.recommend(mapping_users[user], csr_matrix[mapping_users[user]], N=300)
            elif als_or_bpr == 'als':
                recommendations = model_als.recommend(mapping_users[user], csr_matrix[mapping_users[user]], N=300)
            
            read_book_list = np.asarray(dataset[dataset['person_id'] == user].title)
            recomm_list_titles = np.asarray([inverse_mapping_books[item] for item in recommendations[0]])
            
            ranks = np.where(np.in1d(recomm_list_titles, read_book_list))
            
            if np.any(ranks):
                ranks_min = np.amin(ranks)
            else:
                ranks_min = 300      
            avg_rank.append(ranks_min)   
        ranks_total = np.mean(np.asarray(avg_rank))
        #total_average_rank.append(ranks_total)
        print(f'Average rank for interaction {i} was {ranks_total}')
        return ranks_total

average_ranks_als = []
average_ranks_bpr = []
possible_latent_factors = [10, 20, 30, 40, 50, 60, 70, 80, 90, 100]

for latent in [1]:
    model_bpr = implicit.bpr.BayesianPersonalizedRanking(factors=100, iterations=300)
    model_als = implicit.als.AlternatingLeastSquares(factors=30, iterations=300)
    
    #train the model on a sparse matrix of item/user/confidence weights
    model_bpr.fit(rating_matrix, show_progress=False)
    model_als.fit(rating_matrix, show_progress=False)
    avg_ranks_bpr = avg_rank_als_bpr(df_test, rating_matrix, map_u, map_l, 1000, 1, 'bpr')
    avg_ranks_als = avg_rank_als_bpr(df_test, rating_matrix, map_u, map_l, 1000, 1, 'als')
    average_ranks_als.append(avg_ranks_bpr)
    average_ranks_bpr.append(avg_ranks_als)

print(average_ranks_als, average_ranks_bpr)

In [None]:
#PLOT GRAPH
if False:
    #ALS AND BPR USERS AND RECOMM PLOT
    import matplotlib.pyplot as plt
    if False:
        #PLOT AVERAGE HIT USERS AND RECOMM. TEST BPR/ALS
        X_axis = np.arange(2)
        plt.figure()
        plt.bar(X_axis - 0.2, [0.14790209141351346, 0.2607679032048546], 0.4, label = 'average hit users')
        plt.bar(X_axis + 0.2, [0.1724720247926216, 0.3449811642882328], 0.4, label = 'average hit recomm.')
        plt.xticks(X_axis, ['ALS', 'BPR'])
        plt.ylabel('Metric value')
        plt.legend(['average hit users', 'average hit recomm.'])
        plt.show()
        plt.close()
        
        #HIT GENRES TEST
        plt.figure()
        X_axis_2 = np.arange(20)
        names = user_genre_counter_als.keys()
        values = user_genre_counter_als.values()
        names_2 = user_genre_counter_bpr.keys()
        values_2 = user_genre_counter_bpr.values()
        plt.bar(X_axis_2 - 0.2, values, 0.4, label = 'ALS')
        plt.bar(X_axis_2 + 0.2, values_2, 0.4, label = 'BPR')
        plt.legend()
        plt.xticks(X_axis_2, list(user_genre_counter_bpr.keys()), rotation='vertical')
        plt.show()
        plt.close()
    

    if False:
        #AVERAGE RANK TEST
        plt.figure()
        plt.bar(['BPR', 'ALS'], [121.81826785681147, 193.67754746042644])
        plt.ylabel('Average rank')
        plt.show()
        plt.close()
        possible_latent_factors = [10, 20, 30, 40, 50, 60, 70, 80, 90, 100]
        values_als_users = [0.41711253592748176, 0.42668582799027194, 0.43325226619500334, 0.4298474463851426, 0.4253813840371435] + [0.42443068759672786, 0.4191244749060358, 0.4175326110988282, 0.41322131328764095, 0.4100818041123148]
        values_bpr_users = [0.32363475569312405, 0.3750608003537475, 0.3997789078045545, 0.41512270616847224, 0.4239442847667477] + [0.4271059031616184, 0.4309971258014592, 0.4297590095069644, 0.4327216449259341, 0.43420296263541897]
        values_als_recomm = [0.6344019456113199, 0.657594516913553, 0.6764978996241433, 0.6747070528410347, 0.6734247181074509] + [0.6713464514702631, 0.6633208047755914, 0.6510944063674552, 0.6442405483086447, 0.640592527083794]
        values_bpr_recomm = [0.46082246296705726, 0.5471368560689808, 0.586292283882379, 0.6218881273491046, 0.6396197214238337] + [0.6448374972363475, 0.6574397523767411, 0.6565996020340482, 0.6621047977006411, 0.6651779792173337]
    if False:
        #ALS AND BPR USERS AND RECOMM PLOT
        plt.figure()
        plt.plot(possible_latent_factors, values_als_users)
        plt.plot(possible_latent_factors, values_bpr_users)
        plt.plot(possible_latent_factors, values_als_recomm)
        plt.plot(possible_latent_factors, values_bpr_recomm)
        plt.xlabel('Latent factors')
        plt.legend(['ALS average hit users', 'BPR average hit users', 'ALS average hit recomm.', 'BPR average hit recomm.'])
        plt.show()
        plt.close()

    average_rank_bpr = [98.48003537475127, 98.66002653106345, 99.24656201636083, 101.1456113199204, 103.2809418527526, 104.99498120716339, 107.49560026531063, 108.96522219765642, 110.51591863807208, 112.92184390891002]
    average_rank_als = [106.75332743754146, 100.23829316825116, 98.73736458103029, 99.2442184390891, 99.82252929471589, 99.53128454565554, 99.70504090205615, 100.38971921291179, 100.57176652664161, 100.68912226398407] 

    plt.figure()
    plt.plot(possible_latent_factors, average_rank_bpr)
    plt.plot(possible_latent_factors, average_rank_als)
    plt.xlabel('Latent factors')
    plt.ylabel('Average rank')
    plt.legend(['ALS', 'BPR'])
    plt.show()
    plt.close()