In [1]:
import pandas as pd
import numpy as np
from lenskit import batch, topn, util
from lenskit.algorithms import Recommender, als, user_knn, item_knn, svd, user_knn
from lenskit.algorithms.ranking import TopN
from lenskit.algorithms.basic import UnratedItemCandidateSelector, Popular
from lenskit import crossfold as cf

from collections import deque
import random
import gc

In [10]:
pd.set_option('mode.chained_assignment', None)

In [3]:
ratings=pd.read_parquet("Data/Experiment_Data/full_training_subset_of_select_tracks_experiment.parquet")
ratings.columns=['user', 'item', 'rating']

In [11]:
user_age_index_df=pd.read_parquet("Data/Experiment_Data/user_age_index_basic_for_experiment_updated.parquet")
user_age_index_df=user_age_index_df.reset_index()
artists_index_df=pd.read_parquet("Data/Experiment_Data/tracks_artist_index_for_experiment.parquet")

#Convert the user age and artist data into a lookup dictonary
user_age_index=dict(zip(list(user_age_index_df['user']), list(user_age_index_df['user_age'])))
artists_index=dict(zip(list(artists_index_df['item']), list(artists_index_df['artist'])))

In [5]:
user_counts=ratings.groupby('user').count().reset_index()
users_admited=user_counts[user_counts['item']>20]
users_admited=pd.DataFrame({'user':users_admited.user.unique()})
ratings=ratings.merge(users_admited,how='inner',on='user')

In [12]:
def user_similarity(user1,user2,items_per_user):
    user1_item_set=items_per_user[user1]
    user2_item_set=items_per_user[user2]
    return len(user1_item_set.intersection(user2_item_set))/len(user1_item_set.union(user2_item_set))
def claculate_similarities():
    age_ranges=[(10,20),(16,26),(46,56),(49,59),(49,55),(55,61)]  

    ratings_w_ages=ratings.merge(user_age_index_df,how='inner',on='user')

    user_items=ratings.groupby('user').item.unique()

    for i in user_items.index:
        user_items[i]=set(user_items[i])

    for age_range_start,age_range_end in age_ranges:

        similarity_sum_all_in_group=0
        similarity_sum_with_similar_in_group=0

        sim_user_number_sum=0

        similarity_sum_all_outof_group=0
        similarity_sum_with_similar_outof_group=0

        users_with_no_similarity_in_group=0
        users_with_no_similarity_outof_group=0
        users_with_no_similarity_general=0

        users_in_age_range=ratings_w_ages[(ratings_w_ages['user_age']>=age_range_start) & (ratings_w_ages['user_age']<=age_range_end)]
        users_in_age_range=users_in_age_range.user.unique()

        print('Age Range: ',age_range_start,'-',age_range_end)
        print('Number of users in age range: ',len(users_in_age_range))
        other_users=set(ratings_w_ages.user.unique()).difference(set(users_in_age_range))
        for user1 in users_in_age_range:

            #Test Within age group
            current_user_similarity_sum=0
            users_with_similarity=0
            for user2 in users_in_age_range:

                if(user1==user2):
                    continue

                similarity=user_similarity(user1,user2,user_items)

                current_user_similarity_sum=current_user_similarity_sum+similarity

                if similarity>0:
                    users_with_similarity=users_with_similarity+1

            similarity_sum_all_in_group=similarity_sum_all_in_group+(current_user_similarity_sum/len(users_in_age_range))
            similarity_sum_with_similar_in_group=similarity_sum_with_similar_in_group+(current_user_similarity_sum/users_with_similarity)

            sim_user_number_sum=sim_user_number_sum+users_with_similarity

            if current_user_similarity_sum==0:
                users_with_no_similarity_in_group=users_with_no_similarity_in_group+1

            current_user_similarity_sum_passed=current_user_similarity_sum


        print('Number of users in age range with no similarity to other users in their age group: ',users_with_no_similarity_in_group)
        print('Average similiarity of users between themselved within the age range: ',similarity_sum_all_in_group/len(users_in_age_range))
        print('Average similiarity of only similar users within the age range: ',similarity_sum_with_similar_in_group/(len(users_in_age_range)-users_with_no_similarity_in_group))
        print("The average number of users a users is similar with within the age group:",sim_user_number_sum/len(users_in_age_range))

  

In [8]:
claculate_similarities()
gc.collect()

Age Range:  10 - 20
Number of users in age range:  11069
Number of users in age range with no similarity to other users in their age group:  0
Average similiarity of users between themselved within the age range:  0.008431282034049657
Average similiarity of only similar users within the age range:  0.013672836159205488
The average number of users a users is similar with within the age group: 6489.645857801066
Age Range:  16 - 26
Number of users in age range:  26208
Number of users in age range with no similarity to other users in their age group:  0
Average similiarity of users between themselved within the age range:  0.00855732158513584
Average similiarity of only similar users within the age range:  0.01252244192007138
The average number of users a users is similar with within the age group: 16900.90544871795
Age Range:  46 - 56
Number of users in age range:  795
Number of users in age range with no similarity to other users in their age group:  0
Average similiarity of users betwee

0

In [13]:

def ajdust_diversity_user(userID, diversity, topN_list, n=100,k=50):
    
    similar_track_removal_probability=int(np.floor(diversity*100))
    #print(similar_track_removal_probability)
    artist_already_pressent=[]
    removed_tracks_indices=[]
    
    for i in range(0,len(topN_list)):
        candidate=topN_list.iloc[i]
        item=int(candidate['item'])
        rank=int(candidate['rank'])
        item_artist=artists_index[item]
        
        if (item_artist in artist_already_pressent):
            removal_chance=random.randrange(1,100)
            
            if(removal_chance<similar_track_removal_probability):
                removed_tracks_indices.append(i)
                #print(removal_chance, " removed artist ",item_artist)
        else:
            artist_already_pressent.append(item_artist)
            #print("New artist ",item_artist)
        
        if i == k:
            break
    
    topN_list_final=topN_list.drop(removed_tracks_indices).reset_index(drop=True)
    topN_list_final['rank']=list(range(1,len(topN_list_final)+1))
    
    topN_list_final=topN_list_final.head(k)
    
    if len(topN_list_final)>0:
        return len(artist_already_pressent)/len(topN_list_final), topN_list_final
    else:
        return 0, topN_list_final



def adjust_divesities(users, diversity, TopN_lists_persanolized, n=100,k=50):
    final_list=None
    set_list=True
    
    diversities=[]
    
    #users_processed=0
    
    for user in users:
        
        user_list=TopN_lists_persanolized[TopN_lists_persanolized['user']==user]
        user_list=user_list.reset_index(drop=True)
        
        diversitiy, user_adujusted_list=ajdust_diversity_user(user, diversity, user_list, n=n,k=k)
        diversities.append(diversitiy)
        if set_list:
            final_list=user_adujusted_list
            set_list=False
        else:
            final_list=final_list.append(user_adujusted_list)
        
    diversities=list(filter(lambda x: x != 0, diversities))       
    return sum(diversities)/len(diversities), final_list

In [14]:
def convert_to_DF(series):
    return pd.DataFrame(series).T

def generate_recommendations(user_set, algorithm, n=100):
    
    recommendations = batch.recommend(algorithm, user_set, n ,n_jobs=25)
        
    return recommendations

def evaluate_recomendations(recomendations, truth, k=100):
    
    analysis = topn.RecListAnalysis()
    analysis.add_metric(topn.ndcg,k=k)
    analysis.add_metric(topn.precision,k=k)
    analysis.add_metric(topn.recall,k=k)
    analysis.add_metric(topn.hit)
    results = analysis.compute(recomendations, truth)
    
    return results


def shanon_enthropy(rec_list):
    
    rec_list=rec_list.groupby('item').count().reset_index()
    rec_list=list(rec_list['user'])
    
    total_recs=sum(rec_list)
    probs=np.array(rec_list)/total_recs
    
    enthropy=sum(probs*np.log2(probs))*-1
    
    return enthropy



In [15]:
def generate_single_split(lenght, folds=5):
    
    adjustment_for_remainder_after_split=lenght % folds 
    split_length=int((lenght-adjustment_for_remainder_after_split)/folds)
    
    
    split=[]
    
    
    for i in range(1,6):
        if adjustment_for_remainder_after_split >0:
            split=split+[i]*(split_length+1)
            adjustment_for_remainder_after_split=adjustment_for_remainder_after_split - 1
        else:
            split=split+[i]*(split_length)
    
    return split

def split_by_users(ratings, folds=5):
    
    #Sampling 100% of the dataset just returnes the datased shuffled 
    ratings=ratings.sample(frac=1)
    splited_ratings=None
    
    set_inital=True

    for _,user in ratings.groupby('user', sort=False):
        user['split']=generate_single_split(len(user),folds=folds)
        
        if set_inital:
            splited_ratings=user
            set_inital=False
        else:
            splited_ratings=splited_ratings.append(user)
    return splited_ratings

In [52]:
#Split Ratings per User

ratings=split_by_users(ratings, folds=5)


In [59]:
#Save and load ratings with split if needed

#ratings.to_parquet("Data/Experiment_Data/Ratings_with_Split.parquet")
#ratings=pd.read_parquet("Data/Experiment_Data/Ratings_with_Split.parquet")

In [17]:
## Crossvalidate sliding window

#Evaluate Recomenders
age_ranges=[(10,64),(10,20),(16,26),(26,36),(36,46),(46,56),(49,59),(49,55),(55,61)] 
neighbours_set=[6,8,12,18,24,36,50,60,70,100,110,120,150]
diversities=[0,0.2,0.4,0.6]
list_lengths=[10]
n=50


results=None
set_results=True
cv_iter=0

for cv_iter in range(1,5+1):
    
    test=ratings[ratings['split']==cv_iter]
    train=ratings.drop(test.index)
    
    test=test.drop('split', axis=1)
    train=train.drop('split', axis=1)
    
    print("Testing in CV iteration:",cv_iter)
    
    for neighbours in neighbours_set:
        print("Traning recommender with",neighbours,"neighbours") #, save_nbrs=100
        predictor = user_knn.UserUser(neighbours,min_nbrs=neighbours,center=False,feedback='implicit',use_ratings=False)#
        Unseen_item_selector = UnratedItemCandidateSelector()
        recommender = TopN(predictor, Unseen_item_selector)    
        predictor.fit(train)
        Unseen_item_selector.fit(train)
        
        recomendations_all=generate_recommendations(test.user.unique(),recommender,n=n)
        
        for k in list_lengths:
            print("Testing at",k,"recommendations")
            for diversity in diversities:  
                for age_range_start,age_range_end in age_ranges:

                    
                    age_range_str=str(age_range_start)+'-'+str(age_range_end)

                    users_in_age_range=user_age_index_df[(user_age_index_df['user_age']>=(age_range_start)) & 
                                            (user_age_index_df['user_age']<=(age_range_end))]                

                    info= "age range "+age_range_str+" with "+str(neighbours)+" neighbours and " + str(diversity) + " diversity" 
                    print('Processing',info,"in itteration",cv_iter)

                    truth_in_age_range=test.merge(users_in_age_range[['user']], how='inner', on='user')

                    recs_in_age_range = recomendations_all.merge(users_in_age_range[['user']], how='inner', on='user')

                    print("============")
                    print("WE ARE RECCOEMEDNING TO")
                    print("============")
                    
                    testing=recs_in_age_range.groupby('user').count().reset_index()
                    print(len(testing[testing['score']>0]))
                    
                    print("USERS")
                    print("============")
                    Intralist_diversity_avg, recs_in_age_range = adjust_divesities(users_in_age_range.user.unique(), diversity, 
                                                          recs_in_age_range, n=n,k=k)


                    results_i=evaluate_recomendations(recs_in_age_range, truth_in_age_range, k=k)
                    
                    
                    filename='User_Results_'+age_range_str+'_'+str(neighbours)+'_neig_'+str(int(10*diversity))+"_diver_CViter_"+str(cv_iter)+'.csv'
                    
                    results_i=results_i.reset_index()
                    results_i.to_csv('Data/Experiment_Data/CV_iters/'+filename, index=False)
                    
                    
                    results_i=results_i[["ndcg","precision","recall","hit"]].mean()
                    results_i=convert_to_DF(results_i)

                    diversity_pressent=shanon_enthropy(recs_in_age_range)

                    print('Final Evaluation for',k, 'recomendations in', info, ", with Inta-user",Intralist_diversity_avg,'and',diversity_pressent,"measured Shannon diversity")
                    print(results_i)


    
                    results_i['List_Len']=[k]*len(results_i)
                    results_i['Neighbours']=[neighbours]*len(results_i)
                    results_i['Diversity_adjustment']=[diversity]*len(results_i)
                    results_i['Intralist_Diversity_calculated']=[Intralist_diversity_avg]*len(results_i)
                    results_i['Shannon_diversity_pressent']=[diversity_pressent]*len(results_i)
                    results_i['Age_range']=[age_range_str]*len(results_i)
                    results_i['CV_iter']=[cv_iter]*len(results_i)
                    if set_results:
                        results=results_i
                        set_results=False
                    else:
                        results=results.append(results_i)
                #break
            #break
        #break
    #break
    sifix="_at_fold_"+str(cv_iter)
    results.to_parquet("Data/Experiment_Data/Results_Cross_validationNew_5_10_2"+sifix+".parquet")

results.to_parquet("Data/Experiment_Data/Results_Cross_validationNew_5_10_2_Final.parquet")    
    
    
results.head()                


Testing in CV iteration: 1
Traning recommender with 8 neighbours


BLAS using multiple threads - can cause oversubscription
found 1 potential runtime problems - see https://boi.st/lkpy-perf


Testing at 10 recommendations
Processing age range 26-36 with 8 neighbours and 0 diversity in itteration 1
WE ARE RECCOEMEDNING TO
9891
USERS
Final Evaluation for 10 recomendations in age range 26-36 with 8 neighbours and 0 diversity , with Inta-user 0.819441916894153 and 11.315085073123736 measured Shannon diversity
       ndcg  precision    recall       hit
0  0.105119   0.184774  0.187265  0.785664
Processing age range 36-46 with 8 neighbours and 0 diversity in itteration 1
WE ARE RECCOEMEDNING TO
2302
USERS
Final Evaluation for 10 recomendations in age range 36-46 with 8 neighbours and 0 diversity , with Inta-user 0.8437011294526449 and 10.912458651108418 measured Shannon diversity
      ndcg  precision    recall       hit
0  0.10087   0.155734  0.159758  0.715465
Processing age range 26-36 with 8 neighbours and 0.2 diversity in itteration 1
WE ARE RECCOEMEDNING TO
9891
USERS
Final Evaluation for 10 recomendations in age range 26-36 with 8 neighbours and 0.2 diversity , with Inta-u

Final Evaluation for 10 recomendations in age range 36-46 with 24 neighbours and 0 diversity , with Inta-user 0.8103388357949588 and 10.06677461638796 measured Shannon diversity
       ndcg  precision    recall       hit
0  0.115148   0.168679  0.172854  0.736316
Processing age range 26-36 with 24 neighbours and 0.2 diversity in itteration 1
WE ARE RECCOEMEDNING TO
9891
USERS
Final Evaluation for 10 recomendations in age range 26-36 with 24 neighbours and 0.2 diversity , with Inta-user 0.7874127995147118 and 10.448190352619363 measured Shannon diversity
      ndcg  precision    recall       hit
0  0.12386   0.208553  0.211188  0.817309
Processing age range 36-46 with 24 neighbours and 0.2 diversity in itteration 1
WE ARE RECCOEMEDNING TO
2302
USERS
Final Evaluation for 10 recomendations in age range 36-46 with 24 neighbours and 0.2 diversity , with Inta-user 0.8103388357949588 and 10.067835995248606 measured Shannon diversity
       ndcg  precision    recall       hit
0  0.114473   0.1

Final Evaluation for 10 recomendations in age range 36-46 with 70 neighbours and 0.2 diversity , with Inta-user 0.7987831377661854 and 9.242184232756921 measured Shannon diversity
       ndcg  precision  recall       hit
0  0.110374   0.160409  0.1638  0.713168
Processing age range 26-36 with 70 neighbours and 0.4 diversity in itteration 1
WE ARE RECCOEMEDNING TO
9890
USERS
Final Evaluation for 10 recomendations in age range 26-36 with 70 neighbours and 0.4 diversity , with Inta-user 0.7757027300303356 and 9.628063498092116 measured Shannon diversity
       ndcg  precision    recall       hit
0  0.121878   0.204874  0.207184  0.812538
Processing age range 36-46 with 70 neighbours and 0.4 diversity in itteration 1
WE ARE RECCOEMEDNING TO
2301
USERS
Final Evaluation for 10 recomendations in age range 36-46 with 70 neighbours and 0.4 diversity , with Inta-user 0.7987831377661854 and 9.240681303755435 measured Shannon diversity
       ndcg  precision    recall       hit
0  0.110434   0.159

Final Evaluation for 10 recomendations in age range 36-46 with 18 neighbours and 0.4 diversity , with Inta-user 0.8166811468288425 and 10.25492325963738 measured Shannon diversity
       ndcg  precision    recall       hit
0  0.109395    0.16351  0.167792  0.719809
Traning recommender with 24 neighbours
Testing at 10 recommendations
Processing age range 26-36 with 24 neighbours and 0 diversity in itteration 2
WE ARE RECCOEMEDNING TO
9891
USERS
Final Evaluation for 10 recomendations in age range 26-36 with 24 neighbours and 0 diversity , with Inta-user 0.7819735112728784 and 10.433358068481812 measured Shannon diversity
       ndcg  precision    recall       hit
0  0.123758   0.208452  0.211264  0.807502
Processing age range 36-46 with 24 neighbours and 0 diversity in itteration 2
WE ARE RECCOEMEDNING TO
2302
USERS
Final Evaluation for 10 recomendations in age range 36-46 with 24 neighbours and 0 diversity , with Inta-user 0.8087749782797546 and 10.044486363096887 measured Shannon diver

Final Evaluation for 10 recomendations in age range 26-36 with 70 neighbours and 0 diversity , with Inta-user 0.7710515672396429 and 9.644633946775496 measured Shannon diversity
       ndcg  precision    recall       hit
0  0.123551   0.206754  0.209262  0.800708
Processing age range 36-46 with 70 neighbours and 0 diversity in itteration 2
WE ARE RECCOEMEDNING TO
2300
USERS
Final Evaluation for 10 recomendations in age range 36-46 with 70 neighbours and 0 diversity , with Inta-user 0.7956521739130404 and 9.252990953412906 measured Shannon diversity
       ndcg  precision    recall       hit
0  0.109416   0.163304  0.167132  0.703043
Processing age range 26-36 with 70 neighbours and 0.2 diversity in itteration 2
WE ARE RECCOEMEDNING TO
9890
USERS
Final Evaluation for 10 recomendations in age range 26-36 with 70 neighbours and 0.2 diversity , with Inta-user 0.7710515672396429 and 9.640498726518773 measured Shannon diversity
       ndcg  precision    recall       hit
0  0.123246   0.20492

Final Evaluation for 10 recomendations in age range 26-36 with 18 neighbours and 0.2 diversity , with Inta-user 0.7926498837326913 and 10.661010694349727 measured Shannon diversity
       ndcg  precision   recall       hit
0  0.120136   0.199899  0.20287  0.806895
Processing age range 36-46 with 18 neighbours and 0.2 diversity in itteration 3
WE ARE RECCOEMEDNING TO
2302
USERS
Final Evaluation for 10 recomendations in age range 36-46 with 18 neighbours and 0.2 diversity , with Inta-user 0.8167245873153742 and 10.260961749067313 measured Shannon diversity
       ndcg  precision    recall       hit
0  0.105835   0.160469  0.164657  0.710252
Processing age range 26-36 with 18 neighbours and 0.4 diversity in itteration 3
WE ARE RECCOEMEDNING TO
9891
USERS
Final Evaluation for 10 recomendations in age range 26-36 with 18 neighbours and 0.4 diversity , with Inta-user 0.7926498837326913 and 10.655456945961687 measured Shannon diversity
       ndcg  precision    recall       hit
0  0.119444   

Final Evaluation for 10 recomendations in age range 26-36 with 50 neighbours and 0.4 diversity , with Inta-user 0.7746107178968699 and 9.891972279605936 measured Shannon diversity
       ndcg  precision    recall       hit
0  0.122543   0.202154  0.204963  0.808392
Processing age range 36-46 with 50 neighbours and 0.4 diversity in itteration 3
WE ARE RECCOEMEDNING TO
2302
USERS
Final Evaluation for 10 recomendations in age range 36-46 with 50 neighbours and 0.4 diversity , with Inta-user 0.8036055603822742 and 9.50135171954838 measured Shannon diversity
       ndcg  precision    recall       hit
0  0.105598   0.159383  0.163319  0.705908
Traning recommender with 70 neighbours
Testing at 10 recommendations
Processing age range 26-36 with 70 neighbours and 0 diversity in itteration 3
WE ARE RECCOEMEDNING TO
9890
USERS
Final Evaluation for 10 recomendations in age range 26-36 with 70 neighbours and 0 diversity , with Inta-user 0.7736501516683559 and 9.643536713285318 measured Shannon dive

Testing at 10 recommendations
Processing age range 26-36 with 18 neighbours and 0 diversity in itteration 4
WE ARE RECCOEMEDNING TO
9890
USERS
Final Evaluation for 10 recomendations in age range 26-36 with 18 neighbours and 0 diversity , with Inta-user 0.7939939332659277 and 10.670727577452293 measured Shannon diversity
       ndcg  precision    recall       hit
0  0.121028   0.203084  0.206649  0.801112
Processing age range 36-46 with 18 neighbours and 0 diversity in itteration 4
WE ARE RECCOEMEDNING TO
2302
USERS
Final Evaluation for 10 recomendations in age range 36-46 with 18 neighbours and 0 diversity , with Inta-user 0.8217202432667221 and 10.287708073656201 measured Shannon diversity
       ndcg  precision    recall       hit
0  0.109902   0.165161  0.170358  0.715465
Processing age range 26-36 with 18 neighbours and 0.2 diversity in itteration 4
WE ARE RECCOEMEDNING TO
9890
USERS
Final Evaluation for 10 recomendations in age range 26-36 with 18 neighbours and 0.2 diversity , wi

Final Evaluation for 10 recomendations in age range 36-46 with 50 neighbours and 0 diversity , with Inta-user 0.8050825369244095 and 9.526361026430763 measured Shannon diversity
       ndcg  precision   recall       hit
0  0.111951   0.165334  0.17034  0.719809
Processing age range 26-36 with 50 neighbours and 0.2 diversity in itteration 4
WE ARE RECCOEMEDNING TO
9890
USERS
Final Evaluation for 10 recomendations in age range 26-36 with 50 neighbours and 0.2 diversity , with Inta-user 0.7770070778564232 and 9.90479653577426 measured Shannon diversity
       ndcg  precision    recall       hit
0  0.124547   0.205228  0.208542  0.797978
Processing age range 36-46 with 50 neighbours and 0.2 diversity in itteration 4
WE ARE RECCOEMEDNING TO
2302
USERS
Final Evaluation for 10 recomendations in age range 36-46 with 50 neighbours and 0.2 diversity , with Inta-user 0.8050825369244095 and 9.514650892401896 measured Shannon diversity
      ndcg  precision    recall      hit
0  0.11117   0.162685 

Final Evaluation for 10 recomendations in age range 36-46 with 12 neighbours and 0.2 diversity , with Inta-user 0.8313640312771465 and 10.593026124446187 measured Shannon diversity
      ndcg  precision    recall       hit
0  0.10848   0.159209  0.164404  0.717637
Processing age range 26-36 with 12 neighbours and 0.4 diversity in itteration 5
WE ARE RECCOEMEDNING TO
9891
USERS
Final Evaluation for 10 recomendations in age range 26-36 with 12 neighbours and 0.4 diversity , with Inta-user 0.8048731169750305 and 10.970733808456353 measured Shannon diversity
       ndcg  precision    recall       hit
0  0.112986    0.19176  0.195102  0.797998
Processing age range 36-46 with 12 neighbours and 0.4 diversity in itteration 5
WE ARE RECCOEMEDNING TO
2302
USERS
Final Evaluation for 10 recomendations in age range 36-46 with 12 neighbours and 0.4 diversity , with Inta-user 0.8313640312771465 and 10.581040063888485 measured Shannon diversity
       ndcg  precision    recall       hit
0  0.107592   

Final Evaluation for 10 recomendations in age range 36-46 with 36 neighbours and 0.4 diversity , with Inta-user 0.8049522154648087 and 9.765213802453134 measured Shannon diversity
       ndcg  precision    recall       hit
0  0.112806   0.163076  0.168337  0.722415
Traning recommender with 50 neighbours
Testing at 10 recommendations
Processing age range 26-36 with 50 neighbours and 0 diversity in itteration 5
WE ARE RECCOEMEDNING TO
9890
USERS
Final Evaluation for 10 recomendations in age range 26-36 with 50 neighbours and 0 diversity , with Inta-user 0.7740950455005091 and 9.897085927942024 measured Shannon diversity
       ndcg  precision    recall       hit
0  0.125004   0.207118  0.210552  0.804752
Processing age range 36-46 with 50 neighbours and 0 diversity in itteration 5
WE ARE RECCOEMEDNING TO
2302
USERS
Final Evaluation for 10 recomendations in age range 36-46 with 50 neighbours and 0 diversity , with Inta-user 0.8010317115551658 and 9.535126035381882 measured Shannon diversi

Unnamed: 0,ndcg,precision,recall,hit,List_Len,Neighbours,Diversity_adjustment,Intralist_Diversity_calculated,Shannon_diversity_pressent,Age_range,CV_iter
0,0.105119,0.184774,0.187265,0.785664,10,8,0.0,0.819442,11.315085,26-36,1
0,0.10087,0.155734,0.159758,0.715465,10,8,0.0,0.843701,10.912459,36-46,1
0,0.104663,0.182883,0.185378,0.786877,10,8,0.2,0.819442,11.319685,26-36,1
0,0.100387,0.154257,0.158285,0.715899,10,8,0.2,0.843701,10.91114,36-46,1
0,0.103855,0.181438,0.183813,0.789405,10,8,0.4,0.819442,11.321365,26-36,1


In [22]:


#results=pd.read_parquet("Data/Experiment_Data/Results_Cross_validationNew_5_10_2_at_fold_1.parquet")
#for f in range(2,6):
#    r=pd.read_parquet("Data/Experiment_Data/Results_Cross_validationNew_5_10_2_at_fold_"+str(f)+".parquet")
#    results=results.append(r)        

results_means_all=results.groupby(['Age_range','Neighbours','Diversity_adjustment','List_Len']).mean().reset_index()

print_l=False
list_lengths=[10]

add=None
set_add=True
for list_len in list_lengths:
    
    results_means=results_means_all[results_means_all['List_Len']==list_len]
    for age_r in results.Age_range.unique():
        print("Age Range",age_r)
        results_age_r=results_means[results_means['Age_range']==age_r].sort_values(by='ndcg',ascending=False).head(4)
        print(results_age_r)

        if set_add:
            add=results_age_r
            set_add=False
        else:
            add=add.append(results_age_r)

filename="Data/Experiment_Data/Results_Cross_validation_MiddleAges_Summarized.csv"
add.to_csv(filename)

Age Range 26-36
   Age_range  Neighbours  Diversity_adjustment  List_Len      ndcg  precision  \
12     26-36          36                   0.0        10  0.125611   0.209639   
15     26-36          50                   0.0        10  0.125334   0.208758   
13     26-36          36                   0.2        10  0.124506   0.207506   
16     26-36          50                   0.2        10  0.124466   0.206518   

      recall       hit  Intralist_Diversity_calculated  \
12  0.212739  0.808954                        0.777920   
15  0.211764  0.806067                        0.774754   
13  0.210501  0.809257                        0.777927   
16  0.209449  0.806835                        0.774754   

    Shannon_diversity_pressent  CV_iter  
12                   10.141583      3.0  
15                    9.899420      3.0  
13                   10.141357      3.0  
16                    9.896421      3.0  
Age Range 36-46
   Age_range  Neighbours  Diversity_adjustment  List_Len     

In [38]:
#Looking for age groups with sliding window


#Test with sliding window
#Evaluate Recomenders
age_ranges=[3,5,7] 

neighbours_set=[6,12,24,50]

diversities=[0,0.2,0.4,0.6,0.8]

train, test=single_split(ratings)

results=None
set_results=True


for age_range in age_ranges:
    print("Testing in age range:",age_range)
    for neighbours in neighbours_set:
        print("Traning recommender with",neighbours,"neighbours") #, save_nbrs=100
        predictor = user_knn.UserUser(neighbours,min_nbrs=neighbours,center=False,feedback='implicit',use_ratings=False)
        Unseen_item_selector = UnratedItemCandidateSelector()
        recommender = TopN(predictor, Unseen_item_selector)    
        predictor.fit(train)
        Unseen_item_selector.fit(train)
        
        recomendations_all=generate_recommendations(test.user.unique(),recommender,n=100)
        
        for diversity in diversities:  
        #younger_older_diversities=[(low_diversity, high_diversity) , (high_diversity, low_diversity)]
            for central_age in range(10+age_range,64-age_range,3):
                
                
                
                age_range_start= central_age-age_range
                age_range_end = central_age+age_range
                age_range_str=str(age_range_start)+'-'+str(age_range_end)
                
                users_in_age_range=user_age_index_df[(user_age_index_df['user_age']>=(age_range_start)) & 
                                        (user_age_index_df['user_age']<=(age_range_end))]                
                
                info= "age range "+age_range_str+" with "+str(neighbours)+" neighbours and " + str(diversity) + " diversity, " 
                print('Processing',info)
                
                truth_in_age_range=test.merge(users_in_age_range[['user']], how='inner', on='user')
                
                recs_in_age_range = recomendations_all.merge(users_in_age_range[['user']], how='inner', on='user')
                
                recs_in_age_range= adjust_divesities(users_in_age_range.user.unique(), diversity, 
                                                      recs_in_age_range, n=100)
                
                
                
                results_i=evaluate_recomendations(recs_in_age_range, truth_in_age_range, k=50)
                results_i=results_i[["ndcg","precision","recall","hit"]].mean()
                results_i=convert_to_DF(results_i)
                
                print('Final Evaluation for 50 recomendations in', info)
                print(results_i)
                
                #results_i=crossvalidate_ages(younger_users_recomender,older_users_recomender,
                #                   Unseen_item_selector, ratings,
                #                    younger_users, older_users,
                #                   younger_diversity, older_diversity,
                #                    info=info, n=100)
                
                results_i['Neighbours']=[neighbours]*len(results_i)
                results_i['Diversity']=[diversity]*len(results_i)
                results_i['Age_range']=[age_range_str]*len(results_i)
                
                if set_results:
                    results=results_i
                    set_results=False
                else:
                    results=results.append(results_i)
                #break
            #break
        #break
    #break
    sifix="_at_age_range_"+str(age_range)
    results.to_parquet("Data/Experiment_Data/Results_Sliding_Window"+sifix+".parquet")

    
    
    
results.head()                





Testing in age range: 5
Traning recommender with 6 neighbours
Processing age range 10-20 with 6 neighbours and 0 diversity, 
Final Evaluation for 50 recomendations in age range 10-20 with 6 neighbours and 0 diversity, 
       ndcg  precision    recall       hit
0  0.203897   0.115717  0.213307  0.975336
Processing age range 13-23 with 6 neighbours and 0 diversity, 
Final Evaluation for 50 recomendations in age range 13-23 with 6 neighbours and 0 diversity, 
       ndcg  precision    recall       hit
0  0.196866   0.122136  0.198031  0.973772
Processing age range 16-26 with 6 neighbours and 0 diversity, 
Final Evaluation for 50 recomendations in age range 16-26 with 6 neighbours and 0 diversity, 
       ndcg  precision    recall       hit
0  0.187057   0.122378  0.187503  0.972694
Processing age range 19-29 with 6 neighbours and 0 diversity, 
Final Evaluation for 50 recomendations in age range 19-29 with 6 neighbours and 0 diversity, 
       ndcg  precision    recall       hit
0  0.1784

Final Evaluation for 50 recomendations in age range 19-29 with 6 neighbours and 0.4 diversity, 
       ndcg  precision    recall       hit
0  0.172728   0.118854  0.169225  0.970465
Processing age range 22-32 with 6 neighbours and 0.4 diversity, 
Final Evaluation for 50 recomendations in age range 22-32 with 6 neighbours and 0.4 diversity, 
      ndcg  precision    recall       hit
0  0.16635   0.120104  0.162532  0.972623
Processing age range 25-35 with 6 neighbours and 0.4 diversity, 
Final Evaluation for 50 recomendations in age range 25-35 with 6 neighbours and 0.4 diversity, 
      ndcg  precision    recall       hit
0  0.15573    0.11898  0.154602  0.972174
Processing age range 28-38 with 6 neighbours and 0.4 diversity, 
Final Evaluation for 50 recomendations in age range 28-38 with 6 neighbours and 0.4 diversity, 
       ndcg  precision    recall       hit
0  0.148388    0.11541  0.148491  0.969142
Processing age range 31-41 with 6 neighbours and 0.4 diversity, 
Final Evaluation

Final Evaluation for 50 recomendations in age range 31-41 with 6 neighbours and 0.8 diversity, 
       ndcg  precision    recall       hit
0  0.140527    0.10793  0.139712  0.968404
Processing age range 34-44 with 6 neighbours and 0.8 diversity, 
Final Evaluation for 50 recomendations in age range 34-44 with 6 neighbours and 0.8 diversity, 
       ndcg  precision    recall       hit
0  0.128458   0.103777  0.135236  0.953995
Processing age range 37-47 with 6 neighbours and 0.8 diversity, 
Final Evaluation for 50 recomendations in age range 37-47 with 6 neighbours and 0.8 diversity, 
       ndcg  precision    recall       hit
0  0.118849   0.092903  0.133094  0.919355
Processing age range 40-50 with 6 neighbours and 0.8 diversity, 
Final Evaluation for 50 recomendations in age range 40-50 with 6 neighbours and 0.8 diversity, 
       ndcg  precision    recall       hit
0  0.127046   0.091928  0.144118  0.910314
Processing age range 43-53 with 6 neighbours and 0.8 diversity, 
Final Evalua

Final Evaluation for 50 recomendations in age range 43-53 with 12 neighbours and 0.2 diversity, 
       ndcg  precision    recall      hit
0  0.139803   0.099509  0.171521  0.93865
Processing age range 46-56 with 12 neighbours and 0.2 diversity, 
Final Evaluation for 50 recomendations in age range 46-56 with 12 neighbours and 0.2 diversity, 
       ndcg  precision    recall       hit
0  0.134739   0.091736  0.167169  0.933884
Processing age range 49-59 with 12 neighbours and 0.2 diversity, 
Final Evaluation for 50 recomendations in age range 49-59 with 12 neighbours and 0.2 diversity, 
       ndcg  precision   recall       hit
0  0.137118   0.085676  0.15374  0.891892
Processing age range 52-62 with 12 neighbours and 0.2 diversity, 
Final Evaluation for 50 recomendations in age range 52-62 with 12 neighbours and 0.2 diversity, 
       ndcg  precision    recall       hit
0  0.158494   0.102909  0.154946  0.909091
Processing age range 10-20 with 12 neighbours and 0.4 diversity, 
Final Ev

Final Evaluation for 50 recomendations in age range 52-62 with 12 neighbours and 0.6 diversity, 
       ndcg  precision    recall       hit
0  0.150972   0.096364  0.147212  0.890909
Processing age range 10-20 with 12 neighbours and 0.8 diversity, 
Final Evaluation for 50 recomendations in age range 10-20 with 12 neighbours and 0.8 diversity, 
       ndcg  precision    recall       hit
0  0.205591   0.111278  0.199221  0.966368
Processing age range 13-23 with 12 neighbours and 0.8 diversity, 
Final Evaluation for 50 recomendations in age range 13-23 with 12 neighbours and 0.8 diversity, 
       ndcg  precision    recall      hit
0  0.199529   0.119895  0.187323  0.96805
Processing age range 16-26 with 12 neighbours and 0.8 diversity, 
Final Evaluation for 50 recomendations in age range 16-26 with 12 neighbours and 0.8 diversity, 
       ndcg  precision    recall       hit
0  0.193226   0.121812  0.180593  0.968698
Processing age range 19-29 with 12 neighbours and 0.8 diversity, 
Final 

Final Evaluation for 50 recomendations in age range 19-29 with 24 neighbours and 0.2 diversity, 
       ndcg  precision    recall       hit
0  0.201334   0.131718  0.188062  0.973747
Processing age range 22-32 with 24 neighbours and 0.2 diversity, 
Final Evaluation for 50 recomendations in age range 22-32 with 24 neighbours and 0.2 diversity, 
     ndcg  precision   recall       hit
0  0.1941   0.133407  0.18112  0.976323
Processing age range 25-35 with 24 neighbours and 0.2 diversity, 
Final Evaluation for 50 recomendations in age range 25-35 with 24 neighbours and 0.2 diversity, 
       ndcg  precision    recall       hit
0  0.182116   0.131745  0.171371  0.977391
Processing age range 28-38 with 24 neighbours and 0.2 diversity, 
Final Evaluation for 50 recomendations in age range 28-38 with 24 neighbours and 0.2 diversity, 
       ndcg  precision    recall       hit
0  0.170824   0.128505  0.163365  0.970106
Processing age range 31-41 with 24 neighbours and 0.2 diversity, 
Final Eval

Final Evaluation for 50 recomendations in age range 28-38 with 24 neighbours and 0.6 diversity, 
       ndcg  precision    recall       hit
0  0.166782   0.125284  0.158596  0.970106
Processing age range 31-41 with 24 neighbours and 0.6 diversity, 
Final Evaluation for 50 recomendations in age range 31-41 with 24 neighbours and 0.6 diversity, 
      ndcg  precision    recall       hit
0  0.15896   0.119242  0.154469  0.971564
Processing age range 34-44 with 24 neighbours and 0.6 diversity, 
Final Evaluation for 50 recomendations in age range 34-44 with 24 neighbours and 0.6 diversity, 
       ndcg  precision    recall       hit
0  0.149739   0.115061  0.149843  0.958838
Processing age range 37-47 with 24 neighbours and 0.6 diversity, 
Final Evaluation for 50 recomendations in age range 37-47 with 24 neighbours and 0.6 diversity, 
       ndcg  precision    recall       hit
0  0.139837   0.101032  0.144008  0.935484
Processing age range 40-50 with 24 neighbours and 0.6 diversity, 
Final 

Final Evaluation for 50 recomendations in age range 37-47 with 50 neighbours and 0 diversity, 
      ndcg  precision   recall       hit
0  0.14193   0.101032  0.14469  0.929032
Processing age range 40-50 with 50 neighbours and 0 diversity, 
Final Evaluation for 50 recomendations in age range 40-50 with 50 neighbours and 0 diversity, 
       ndcg  precision    recall      hit
0  0.139259   0.096861  0.147816  0.90583
Processing age range 43-53 with 50 neighbours and 0 diversity, 
Final Evaluation for 50 recomendations in age range 43-53 with 50 neighbours and 0 diversity, 
       ndcg  precision    recall       hit
0  0.135679   0.088834  0.144664  0.895706
Processing age range 46-56 with 50 neighbours and 0 diversity, 
Final Evaluation for 50 recomendations in age range 46-56 with 50 neighbours and 0 diversity, 
      ndcg  precision    recall       hit
0  0.12926   0.079669  0.136843  0.884298
Processing age range 49-59 with 50 neighbours and 0 diversity, 
Final Evaluation for 50 reco

Final Evaluation for 50 recomendations in age range 49-59 with 50 neighbours and 0.4 diversity, 
       ndcg  precision    recall       hit
0  0.121477   0.072162  0.121557  0.878378
Processing age range 52-62 with 50 neighbours and 0.4 diversity, 
Final Evaluation for 50 recomendations in age range 52-62 with 50 neighbours and 0.4 diversity, 
       ndcg  precision    recall       hit
0  0.138034   0.086909  0.134831  0.872727
Processing age range 10-20 with 50 neighbours and 0.6 diversity, 
Final Evaluation for 50 recomendations in age range 10-20 with 50 neighbours and 0.6 diversity, 
       ndcg  precision    recall       hit
0  0.219386   0.116274  0.206733  0.968575
Processing age range 13-23 with 50 neighbours and 0.6 diversity, 
Final Evaluation for 50 recomendations in age range 13-23 with 50 neighbours and 0.6 diversity, 
       ndcg  precision    recall      hit
0  0.211917   0.124275  0.193945  0.97042
Processing age range 16-26 with 50 neighbours and 0.6 diversity, 
Final 

Unnamed: 0,ndcg,precision,recall,hit,Neighbours,Diversity,Age_range
0,0.203897,0.115717,0.213307,0.975336,6,0.0,10-20
0,0.196866,0.122136,0.198031,0.973772,6,0.0,13-23
0,0.187057,0.122378,0.187503,0.972694,6,0.0,16-26
0,0.178472,0.123055,0.177837,0.973449,6,0.0,19-29
0,0.171679,0.123759,0.169178,0.973733,6,0.0,22-32


In [79]:
#Generate Results

age_range=5
sifix="_at_age_range_"+str(age_range)

results=pd.read_parquet("Data/Experiment_Data/Results_Cross_validation_final2.parquet")

add=None
set_add=True

for age_r in results.Age_range.unique():
    print("Age Range",age_r)
    results_age_r=results[results['Age_range']==age_r].sort_values(by='ndcg',ascending=False).head(4)
    print(results_age_r)
    
    if set_add:
        add=results_age_r
        set_add=False
    else:
        add=add.append(results_age_r)
add.to_csv("Data/Experiment_Data/Results_CV_2_summarized.csv")

Age Range 49-59
       ndcg  precision    recall       hit  Neighbours  Diversity_adjustment  \
0  0.166460   0.093846  0.160486  0.923077          12                   0.2   
0  0.166424   0.094154  0.161676  0.923077          12                   0.0   
0  0.160021   0.094769  0.162784  0.907692           6                   0.0   
0  0.158668   0.088000  0.146684  0.907692          12                   0.4   

   Shannon_diversity_pressent Age_range  CV_iter  
0                    9.703549     49-59        2  
0                    9.703978     49-59        2  
0                   10.053383     49-59        2  
0                    9.683972     49-59        2  
Age Range 49-55
       ndcg  precision    recall       hit  Neighbours  Diversity_adjustment  \
0  0.167653   0.098846  0.159472  0.923077          12                   0.0   
0  0.165956   0.095000  0.153926  0.923077          12                   0.2   
0  0.162281   0.093462  0.150064  0.903846          12                  