In [1]:
from surprise import Dataset,Reader 
from surprise import KNNBasic,KNNWithMeans,KNNWithZScore,KNNBaseline
from surprise import SVD,SVDpp,NMF,SlopeOne
from surprise import NormalPredictor,BaselineOnly
from surprise.model_selection import KFold, cross_validate
import os
import numpy as np
import pandas as pd
import time

In [2]:
#load dataset1
file_path = os.path.expanduser('./ratings_1.csv')
print("Loading Dataset1...")
reader = Reader(line_format='user item rating', sep=',', rating_scale=[1, 5], skip_lines=1)
data1 = Dataset.load_from_file(file_path, reader=reader)
print("Done.")

Loading Dataset1...
Done.


In [3]:
#load dataset2
file_path = os.path.expanduser('./ratings_2.csv')
print("Loading Dataset2...")
reader2 = Reader(line_format='user item rating', sep=',', rating_scale=[1, 10], skip_lines=1)
data2 = Dataset.load_from_file(file_path, reader=reader2)
print("Done.")

Loading Dataset2...
Done.


In [6]:
def Recommendation_System(data):
    # list of the algorithms and their name
    algorithms= [NormalPredictor,BaselineOnly,KNNBasic,KNNWithMeans,KNNWithZScore,KNNBaseline,SVD,SVDpp,NMF,SlopeOne]
    algo_names= ['NormalPredictor','BaselineOnly','KNNBasic','KNNWithMeans','KNNWithZScore','KNNBaseline','SVD','SVDpp','NMF','SlopeOne']
    #useful data stracture
    row=[]
    table_df = pd.DataFrame()
    #set the folds for the CV
    kf = KFold(n_splits=5, random_state=0)
    
    #for each algorithm 
    for current_algo in range(len(algorithms)):
        #start = time.time()
        result=cross_validate(algorithms[current_algo](), data, measures=['RMSE'], cv=kf, verbose=True,n_jobs=4)
        print()
        mean_rmse = '{:.4f}'.format(np.mean(result['test_rmse']))  
        row.append([algo_names[current_algo], mean_rmse])
        #end = time.time()-start #to see the excecution time
    
    #fill the dataframe    
    table_df=table_df.append(row)
    #set the cols name
    table_df.columns =['Algorithm', 'RMSE']
    #sort the dataframe in ascending order
    table_df.sort_values(by=['RMSE'], inplace=True, ascending=False)
    
    #Show the execution time
    #print('Execution time: ',round(end,2),' min')
    
    return table_df

In [5]:
Recommendation_System(data1)

Evaluating RMSE of algorithm NormalPredictor on 5 split(s).

                  Fold 1  Fold 2  Fold 3  Fold 4  Fold 5  Mean    Std     
RMSE (testset)    1.5033  1.4976  1.5040  1.5076  1.5026  1.5030  0.0032  
Fit time          0.50    0.43    0.43    0.40    0.28    0.41    0.07    
Test time         0.60    0.50    0.41    0.32    0.29    0.42    0.12    

Evaluating RMSE of algorithm BaselineOnly on 5 split(s).

                  Fold 1  Fold 2  Fold 3  Fold 4  Fold 5  Mean    Std     
RMSE (testset)    0.9176  0.9201  0.9174  0.9209  0.9225  0.9197  0.0020  
Fit time          0.11    0.13    0.10    0.14    0.10    0.12    0.01    
Test time         0.25    0.26    0.33    0.34    0.27    0.29    0.04    

Evaluating RMSE of algorithm KNNBasic on 5 split(s).

                  Fold 1  Fold 2  Fold 3  Fold 4  Fold 5  Mean    Std     
RMSE (testset)    0.9468  0.9517  0.9505  0.9530  0.9560  0.9516  0.0030  
Fit time          1.28    1.36    1.36    1.32    1.72    1.41    0.16    


Unnamed: 0,Algorithm,RMSE
0,NormalPredictor,1.503
2,KNNBasic,0.9516
8,NMF,0.9381
3,KNNWithMeans,0.9324
4,KNNWithZScore,0.9314
9,SlopeOne,0.923
1,BaselineOnly,0.9197
5,KNNBaseline,0.9085
6,SVD,0.9079
7,SVDpp,0.8931


In [7]:
Recommendation_System(data2)

Evaluating RMSE of algorithm NormalPredictor on 5 split(s).

                  Fold 1  Fold 2  Fold 3  Fold 4  Fold 5  Mean    Std     
RMSE (testset)    3.2551  3.2039  3.2323  3.1712  3.2369  3.2199  0.0293  
Fit time          0.02    0.02    0.01    0.02    0.01    0.02    0.01    
Test time         0.04    0.03    0.05    0.03    0.02    0.03    0.01    

Evaluating RMSE of algorithm BaselineOnly on 5 split(s).

                  Fold 1  Fold 2  Fold 3  Fold 4  Fold 5  Mean    Std     
RMSE (testset)    1.9661  2.0052  1.9827  1.9678  1.9668  1.9777  0.0150  
Fit time          0.02    0.02    0.00    0.02    0.02    0.01    0.01    
Test time         0.02    0.02    0.03    0.02    0.02    0.02    0.01    

Evaluating RMSE of algorithm KNNBasic on 5 split(s).

                  Fold 1  Fold 2  Fold 3  Fold 4  Fold 5  Mean    Std     
RMSE (testset)    1.8605  1.9067  1.8770  1.8616  1.8684  1.8748  0.0170  
Fit time          0.12    0.11    0.12    0.10    0.09    0.11    0.01    


Unnamed: 0,Algorithm,RMSE
0,NormalPredictor,3.2199
8,NMF,2.0038
9,SlopeOne,1.9823
1,BaselineOnly,1.9777
4,KNNWithZScore,1.9255
3,KNNWithMeans,1.9068
7,SVDpp,1.8916
2,KNNBasic,1.8748
6,SVD,1.8738
5,KNNBaseline,1.8674
