In [None]:
#import library
import numpy as np 
import pandas as pd 
import matplotlib.pyplot as plt

#import surprise library
from surprise import Dataset 
from surprise import Reader
from surprise import SVD
from surprise import KNNBasic
from surprise import accuracy
from surprise.model_selection import cross_validate
from surprise.model_selection import train_test_split

In [None]:
# A. Read data from “ratings small.csv” with line format: 'userID movieID rating timestamp'.

reader = Reader(line_format = "user item rating timestamp", sep = ',', skip_lines = 1)
dataset = Dataset.load_from_file("ratings_small.csv", reader = reader)

In [None]:
#define parameters
PMF_RMSE = []
PMF_MAE = []
UB_RMSE = []
UB_MAE = []
IB_RMSE = []
IB_MAE = []
algorithm = SVD(biased = False)
CV_PMF = cross_validate(algorithm, dataset, measures = ['RMSE', 'MAE'], cv = 5, verbose = True)

In [None]:
algorithm = KNNBasic(sim_options = {'user_based' : True })
CV_UB_CF = cross_validate(algorithm, dataset, measures = ['RMSE', 'MAE'], cv = 5, verbose = True)

In [None]:
algorithm = KNNBasic(sim_options = {'user_based' : False })
CV_IB_CF = cross_validate(algorithm, dataset, measures = ['RMSE', 'MAE'], cv = 5, verbose = True)

In [None]:
PMF_RMSE.append(CV_PMF['test_rmse'].mean())
PMF_MAE.append(CV_PMF['test_mae'].mean())
print(PMF_RMSE)
print(PMF_MAE)

In [None]:
UB_RMSE.append(CV_UB_CF['test_rmse'].mean())
UB_MAE.append(CV_UB_CF['test_mae'].mean())
print(UB_RMSE)
print(UB_MAE)

In [None]:
IB_RMSE.append(CV_IB_CF['test_rmse'].mean())
IB_MAE.append(CV_IB_CF['test_mae'].mean())
print(IB_RMSE)
print(IB_MAE)

In [None]:
algo_UB_cosine = KNNBasic(sim_options = {'name' : 'cosine', 'user_based' : True})
CV_UB_cosine = cross_validate(algo_UB_cosine, dataset, measures = ['RMSE', 'MAE'], cv = 5, verbose = True)
algo_UB_msd = KNNBasic(sim_options = {'user_based' : True})
CV_UB_msd = cross_validate(algo_UB_msd, dataset, measures = ['RMSE', 'MAE'], cv = 5, verbose = True)
algo_UB_pearson = KNNBasic(sim_options = {'name' : 'pearson_baseline', 'user_based' : True})
CV_UB_pearson = cross_validate(algo_UB_pearson, dataset, measures = ['RMSE', 'MAE'], cv = 5, verbose = True)

algo_IB_cosine = KNNBasic(sim_options = {'name' : 'cosine', 'user_based' : False})
CV_IB_cosine = cross_validate(algo_IB_cosine, dataset, measures = ['RMSE', 'MAE'], cv = 5, verbose = True)
algo_IB_msd = KNNBasic(sim_options = {'user_based' : False})
CV_IB_msd = cross_validate(algo_IB_msd, dataset, measures = ['RMSE', 'MAE'], cv = 5, verbose = True)
algo_IB_pearson = KNNBasic(sim_options = {'name' : 'pearson_baseline', 'user_based' : False})
CV_IB_pearson = cross_validate(algo_IB_pearson, dataset, measures = ['RMSE', 'MAE'], cv = 5, verbose = True)

In [None]:
UB_RMSE_PLOT = []
UB_MAE_PLOT = []
UB_RMSE_PLOT.append(CV_UB_cosine['test_rmse'].mean())
UB_RMSE_PLOT.append(CV_UB_msd['test_rmse'].mean())
UB_RMSE_PLOT.append(CV_UB_pearson['test_rmse'].mean())
UB_MAE_PLOT.append(CV_UB_cosine['test_mae'].mean())
UB_MAE_PLOT.append(CV_UB_msd['test_mae'].mean())
UB_MAE_PLOT.append(CV_UB_pearson['test_mae'].mean())

In [None]:
plt.title("User Based CF: Cosine, MSD, Pearson")
plt.plot(UB_RMSE_PLOT, label = 'RMSE')
plt.plot(UB_MAE_PLOT, label = 'MAE')
plt.legend()

In [None]:
IB_RMSE_PLOT = []
IB_MAE_PLOT = []
IB_RMSE_PLOT.append(CV_IB_cosine['test_rmse'].mean())
IB_RMSE_PLOT.append(CV_IB_msd['test_rmse'].mean())
IB_RMSE_PLOT.append(CV_IB_pearson['test_rmse'].mean())
IB_MAE_PLOT.append(CV_IB_cosine['test_mae'].mean())
IB_MAE_PLOT.append(CV_IB_msd['test_mae'].mean())
IB_MAE_PLOT.append(CV_IB_pearson['test_mae'].mean())
plt.title("Item Based CF: Cosine, MSD, Pearson")
plt.plot(IB_RMSE_PLOT, label = 'RMSE')
plt.plot(IB_MAE_PLOT, label = 'MAE')
plt.legend()

In [None]:
Range = range(1,101)
User = []

for i in Range:
        algo = KNNBasic(k = i, sim_options = {'user_based' : True }, verbose = False)
        CV_User = cross_validate(algo, dataset, measures=['rmse', 'mae'],cv = 5,verbose = False)
        User.append(CV_User['test_rmse'].mean())
        
plt.plot(User)

In [None]:
Range = range(1,101)
Item = []

for i in Range:
        algo = KNNBasic(k = i, sim_options = {'user_based' : False }, verbose = False)
        CV_Item = cross_validate(algo, dataset, measures=['rmse', 'mae'],cv = 5,verbose = False)
        Item.append(CV_Item['test_rmse'].mean())
        
plt.plot(Item)

In [None]:
User.index(min(User))

In [None]:
Item.index(min(Item))