In [1]:
import pandas as pd
import numpy as np
import surprise  #conda install -c conda-forge scikit-surprise
from surprise.model_selection import cross_validate
from surprise import KNNWithMeans
from surprise.model_selection import GridSearchCV

In [2]:
df = pd.read_csv('./movies_data/ratings_small.csv')
df

Unnamed: 0,userId,movieId,rating,timestamp
0,1,31,2.5,1260759144
1,1,1029,3.0,1260759179
2,1,1061,3.0,1260759182
3,1,1129,2.0,1260759185
4,1,1172,4.0,1260759205
...,...,...,...,...
99999,671,6268,2.5,1065579370
100000,671,6269,4.0,1065149201
100001,671,6365,4.0,1070940363
100002,671,6385,2.5,1070979663


In [3]:
class ProbabilisticMatrixFactorization(surprise.AlgoBase):
# Randomly initializes two Matrices, Stochastic Gradient Descent to be able to optimize the best factorization for ratings.
    def __init__(self,learning_rate,num_epochs,num_factors):
       # super(surprise.AlgoBase)
        self.alpha = learning_rate #learning rate for Stochastic Gradient Descent
        self.num_epochs = num_epochs
        self.num_factors = num_factors
    def fit(self,train):
        #randomly initialize user/item factors from a Gaussian
        P = np.random.normal(0,.1,(train.n_users,self.num_factors))
        Q = np.random.normal(0,.1,(train.n_items,self.num_factors))
        #print('fit')

        for epoch in range(self.num_epochs):
            for u,i,r_ui in train.all_ratings():
                residual = r_ui - np.dot(P[u],Q[i])
                temp = P[u,:] # we want to update them at the same time, so we make a temporary variable. 
                P[u,:] +=  self.alpha * residual * Q[i]
                Q[i,:] +=  self.alpha * residual * temp 

                
        self.P = P
        self.Q = Q

        self.trainset = train
    
    
    def estimate(self,u,i):
        #returns estimated rating for user u and item i. Prerequisite: Algorithm must be fit to training set.
        #check to see if u and i are in the train set:
        #print('gahh')

        if self.trainset.knows_user(u) and self.trainset.knows_item(i):
            nanCheck = np.dot(self.P[u],self.Q[i])
            
            if np.isnan(nanCheck):
                return self.trainset.global_mean
            else:
                return np.dot(self.P[u,:],self.Q[i,:])
        else:# if its not known we'll return the general average. 
           # print('global mean')
            return self.trainset.global_mean


In [4]:
reader = surprise.Reader(rating_scale=(1,5)) 

# The columns must correspond to user id, item id and ratings (in that order).
data = surprise.Dataset.load_from_df(df[['userId', 'movieId', 'rating']], reader)

In [5]:
### PMF

algo = ProbabilisticMatrixFactorization(learning_rate=0.05,num_epochs=5,num_factors=10)

#movielens = Dataset.load_builtin('ml-100k')

# Run 5-fold cross-validation and print results.
cross_validate(algo, data, measures=['RMSE', 'MAE'], cv=5, verbose=True)

Evaluating RMSE, MAE of algorithm ProbabilisticMatrixFactorization on 5 split(s).

                  Fold 1  Fold 2  Fold 3  Fold 4  Fold 5  Mean    Std     
RMSE (testset)    1.0428  1.0461  1.0581  1.0565  1.0638  1.0535  0.0078  
MAE (testset)     0.8026  0.8068  0.8158  0.8212  0.8220  0.8137  0.0077  
Fit time          5.33    5.48    5.30    5.44    7.22    5.75    0.74    
Test time         0.28    0.28    0.34    0.36    0.44    0.34    0.06    


{'test_rmse': array([1.04281857, 1.04608723, 1.05814672, 1.05648553, 1.06381485]),
 'test_mae': array([0.8026114 , 0.80679683, 0.81581718, 0.82115478, 0.82197408]),
 'fit_time': (5.3286967277526855,
  5.484933853149414,
  5.2974159717559814,
  5.435463190078735,
  7.217126846313477),
 'test_time': (0.2812514305114746,
  0.28131604194641113,
  0.34381628036499023,
  0.35878920555114746,
  0.4376866817474365)}

In [6]:
### User-Based

sim_options = {
    "name": "cosine",
    "user_based": True,  # Compute  similarities between users
}

algo = KNNWithMeans(sim_options=sim_options)

cross_validate(algo, data, measures=['RMSE', 'MAE'], cv=5, verbose=True)

Computing the cosine similarity matrix...
Done computing similarity matrix.
Computing the cosine similarity matrix...
Done computing similarity matrix.
Computing the cosine similarity matrix...
Done computing similarity matrix.
Computing the cosine similarity matrix...
Done computing similarity matrix.
Computing the cosine similarity matrix...
Done computing similarity matrix.
Evaluating RMSE, MAE of algorithm KNNWithMeans on 5 split(s).

                  Fold 1  Fold 2  Fold 3  Fold 4  Fold 5  Mean    Std     
RMSE (testset)    0.9332  0.9163  0.9213  0.9128  0.9239  0.9215  0.0070  
MAE (testset)     0.7135  0.7021  0.7077  0.6989  0.7085  0.7061  0.0051  
Fit time          0.80    0.74    0.60    0.60    0.60    0.67    0.09    
Test time         2.97    2.10    2.07    1.97    2.17    2.26    0.36    


{'test_rmse': array([0.93323248, 0.91630107, 0.92129391, 0.91279166, 0.92393773]),
 'test_mae': array([0.71349757, 0.70205738, 0.70773058, 0.69890107, 0.70846828]),
 'fit_time': (0.8031322956085205,
  0.7379615306854248,
  0.5996215343475342,
  0.6026029586791992,
  0.5995936393737793),
 'test_time': (2.970935583114624,
  2.10058856010437,
  2.0724825859069824,
  1.9662559032440186,
  2.1657323837280273)}

In [7]:
### Item-Based

sim_options = {
    "name": "cosine",
    "user_based": False,  # Compute  similarities between items
}

algo = KNNWithMeans(sim_options=sim_options)

cross_validate(algo, data, measures=['RMSE', 'MAE'], cv=5, verbose=True)

Computing the cosine similarity matrix...
Done computing similarity matrix.
Computing the cosine similarity matrix...
Done computing similarity matrix.
Computing the cosine similarity matrix...
Done computing similarity matrix.
Computing the cosine similarity matrix...
Done computing similarity matrix.
Computing the cosine similarity matrix...
Done computing similarity matrix.
Evaluating RMSE, MAE of algorithm KNNWithMeans on 5 split(s).

                  Fold 1  Fold 2  Fold 3  Fold 4  Fold 5  Mean    Std     
RMSE (testset)    0.9271  0.9249  0.9272  0.9304  0.9199  0.9259  0.0035  
MAE (testset)     0.7102  0.7077  0.7098  0.7135  0.7067  0.7096  0.0023  
Fit time          6.94    8.08    6.92    7.40    7.80    7.43    0.46    
Test time         7.75    8.24    9.22    8.14    8.52    8.37    0.49    


{'test_rmse': array([0.92712731, 0.92490657, 0.92716766, 0.93036798, 0.91987055]),
 'test_mae': array([0.71020661, 0.70765375, 0.70975095, 0.71346496, 0.70668652]),
 'fit_time': (6.939143419265747,
  8.078932046890259,
  6.916399002075195,
  7.400690078735352,
  7.7957398891448975),
 'test_time': (7.7496278285980225,
  8.239897966384888,
  9.219529867172241,
  8.142663478851318,
  8.518662929534912)}

In [8]:
### User-Based

sim_options = {
    "name": "msd",
    "user_based": True,  # Compute  similarities between users
}

algo = KNNWithMeans(sim_options=sim_options)

cross_validate(algo, data, measures=['RMSE', 'MAE'], cv=5, verbose=True)

Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Evaluating RMSE, MAE of algorithm KNNWithMeans on 5 split(s).

                  Fold 1  Fold 2  Fold 3  Fold 4  Fold 5  Mean    Std     
RMSE (testset)    0.9219  0.9165  0.9256  0.9143  0.9164  0.9189  0.0042  
MAE (testset)     0.7046  0.7019  0.7086  0.7016  0.7017  0.7037  0.0027  
Fit time          0.29    0.31    0.28    0.29    0.29    0.29    0.01    
Test time         2.19    2.11    1.96    2.03    1.98    2.05    0.09    


{'test_rmse': array([0.92191771, 0.91645108, 0.92562028, 0.91425571, 0.91643365]),
 'test_mae': array([0.70459889, 0.7019086 , 0.7085921 , 0.70156913, 0.70173802]),
 'fit_time': (0.2877674102783203,
  0.30681586265563965,
  0.2847564220428467,
  0.29378199577331543,
  0.2937812805175781),
 'test_time': (2.18581223487854,
  2.105600595474243,
  1.9552016258239746,
  2.025386333465576,
  1.9782638549804688)}

In [9]:
### Item-Based

sim_options = {
    "name": "msd",
    "user_based": False,  # Compute  similarities between items
}

algo = KNNWithMeans(sim_options=sim_options)

cross_validate(algo, data, measures=['RMSE', 'MAE'], cv=5, verbose=True)

Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Evaluating RMSE, MAE of algorithm KNNWithMeans on 5 split(s).

                  Fold 1  Fold 2  Fold 3  Fold 4  Fold 5  Mean    Std     
RMSE (testset)    0.9165  0.9154  0.9056  0.9104  0.9209  0.9138  0.0052  
MAE (testset)     0.7041  0.6999  0.6939  0.6972  0.7060  0.7002  0.0044  
Fit time          3.77    4.76    5.61    4.26    8.80    5.44    1.79    
Test time         8.79    11.75   9.45    9.11    18.58   11.54   3.67    


{'test_rmse': array([0.91648205, 0.91537556, 0.90562623, 0.91042528, 0.92085527]),
 'test_mae': array([0.70405203, 0.69994802, 0.69388235, 0.6972187 , 0.70604375]),
 'fit_time': (3.770289659500122,
  4.761760711669922,
  5.605912923812866,
  4.2624266147613525,
  8.801414012908936),
 'test_time': (8.789413213729858,
  11.753437042236328,
  9.45014238357544,
  9.111244440078735,
  18.575417041778564)}

In [10]:
### User-Based

sim_options = {
    "name": "pearson",
    "user_based": True,  # Compute  similarities between users
}

algo = KNNWithMeans(sim_options=sim_options)

cross_validate(algo, data, measures=['RMSE', 'MAE'], cv=5, verbose=True)

Computing the pearson similarity matrix...
Done computing similarity matrix.
Computing the pearson similarity matrix...
Done computing similarity matrix.
Computing the pearson similarity matrix...
Done computing similarity matrix.
Computing the pearson similarity matrix...
Done computing similarity matrix.
Computing the pearson similarity matrix...
Done computing similarity matrix.
Evaluating RMSE, MAE of algorithm KNNWithMeans on 5 split(s).

                  Fold 1  Fold 2  Fold 3  Fold 4  Fold 5  Mean    Std     
RMSE (testset)    0.9239  0.9261  0.9215  0.9256  0.9219  0.9238  0.0019  
MAE (testset)     0.7031  0.7045  0.7041  0.7059  0.7035  0.7042  0.0010  
Fit time          2.39    2.30    2.21    2.10    1.60    2.12    0.28    
Test time         5.39    4.70    4.46    3.39    3.50    4.29    0.75    


{'test_rmse': array([0.92385916, 0.926131  , 0.92152792, 0.92559451, 0.92193802]),
 'test_mae': array([0.70305281, 0.70447276, 0.70412397, 0.7059199 , 0.70353394]),
 'fit_time': (2.3893539905548096,
  2.2991151809692383,
  2.214890480041504,
  2.0985829830169678,
  1.6012582778930664),
 'test_time': (5.388336658477783,
  4.702509164810181,
  4.457857847213745,
  3.3870420455932617,
  3.5023467540740967)}

In [11]:
### Item-Based

sim_options = {
    "name": "pearson",
    "user_based": False,  # Compute  similarities between items
}

algo = KNNWithMeans(sim_options=sim_options)

cross_validate(algo, data, measures=['RMSE', 'MAE'], cv=5, verbose=True)

Computing the pearson similarity matrix...
Done computing similarity matrix.
Computing the pearson similarity matrix...
Done computing similarity matrix.
Computing the pearson similarity matrix...
Done computing similarity matrix.
Computing the pearson similarity matrix...
Done computing similarity matrix.
Computing the pearson similarity matrix...
Done computing similarity matrix.
Evaluating RMSE, MAE of algorithm KNNWithMeans on 5 split(s).

                  Fold 1  Fold 2  Fold 3  Fold 4  Fold 5  Mean    Std     
RMSE (testset)    0.9252  0.9202  0.9375  0.9297  0.9341  0.9293  0.0062  
MAE (testset)     0.7054  0.7020  0.7132  0.7092  0.7111  0.7082  0.0040  
Fit time          34.89   14.33   19.55   15.51   17.95   20.45   7.45    
Test time         8.93    9.19    9.09    9.36    11.90   9.70    1.11    


{'test_rmse': array([0.92520854, 0.9201604 , 0.93749028, 0.92969165, 0.93408833]),
 'test_mae': array([0.70543523, 0.7020408 , 0.71322033, 0.7091939 , 0.71113079]),
 'fit_time': (34.88888955116272,
  14.32613229751587,
  19.551083087921143,
  15.50792121887207,
  17.95426321029663),
 'test_time': (8.93075966835022,
  9.192431211471558,
  9.092809915542603,
  9.361933469772339,
  11.900685548782349)}

## 3f

In [12]:
sim_options2 = {
    "name": ["cosine", "msd", "pearson"],
    "user_based": [True]
}

param_grid2 = {"sim_options": sim_options2, "k": [30, 35, 40, 45, 50]}

gs2 = GridSearchCV(KNNWithMeans, param_grid2, measures=["rmse"], cv=5)
gs2.fit(data)

Computing the cosine similarity matrix...
Done computing similarity matrix.
Computing the cosine similarity matrix...
Done computing similarity matrix.
Computing the cosine similarity matrix...
Done computing similarity matrix.
Computing the cosine similarity matrix...
Done computing similarity matrix.
Computing the cosine similarity matrix...
Done computing similarity matrix.
Computing the cosine similarity matrix...
Done computing similarity matrix.
Computing the cosine similarity matrix...
Done computing similarity matrix.
Computing the cosine similarity matrix...
Done computing similarity matrix.
Computing the cosine similarity matrix...
Done computing similarity matrix.
Computing the cosine similarity matrix...
Done computing similarity matrix.
Computing the cosine similarity matrix...
Done computing similarity matrix.
Computing the cosine similarity matrix...
Done computing similarity matrix.
Computing the cosine similarity matrix...
Done computing similarity matrix.
Computing th

In [13]:
print(gs2.best_params["rmse"])
print(gs2.best_score["rmse"])

{'sim_options': {'name': 'msd', 'user_based': True}, 'k': 40}
0.9184465966007231


In [14]:
sim_options = {
    "name": ["msd"],
    "user_based": [False],
}

param_grid = {"sim_options": sim_options, "k": [70, 80, 90, 100, 150, 200, 300]}

gs = GridSearchCV(KNNWithMeans, param_grid, measures=["rmse"], cv=5)
gs.fit(data)

Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computi

In [15]:
print(gs.best_params["rmse"])
print(gs.best_score["rmse"])

{'sim_options': {'name': 'msd', 'user_based': False}, 'k': 150}
0.9109897482795202


In [18]:
# Chunk by Nafis
sim_options = {
    "name": ["cosine", "msd", "pearson"],
    "user_based": [False],
}

param_grid = {"sim_options": sim_options, "k": [70, 80, 90, 100, 150, 200, 300]}

gs = GridSearchCV(KNNWithMeans, param_grid, measures=["rmse"], cv=5)
gs.fit(data)

Computing the cosine similarity matrix...
Done computing similarity matrix.
Computing the cosine similarity matrix...
Done computing similarity matrix.
Computing the cosine similarity matrix...
Done computing similarity matrix.
Computing the cosine similarity matrix...
Done computing similarity matrix.
Computing the cosine similarity matrix...
Done computing similarity matrix.
Computing the cosine similarity matrix...
Done computing similarity matrix.
Computing the cosine similarity matrix...
Done computing similarity matrix.
Computing the cosine similarity matrix...
Done computing similarity matrix.
Computing the cosine similarity matrix...
Done computing similarity matrix.
Computing the cosine similarity matrix...
Done computing similarity matrix.
Computing the cosine similarity matrix...
Done computing similarity matrix.
Computing the cosine similarity matrix...
Done computing similarity matrix.
Computing the cosine similarity matrix...
Done computing similarity matrix.
Computing th

In [19]:
print(gs.best_params["rmse"])
print(gs.best_score["rmse"])

{'sim_options': {'name': 'msd', 'user_based': False}, 'k': 150}
0.9100915924803182


In [16]:
### User-Based

sim_options = {
    "name": "msd",
    "user_based": True,  # Compute  similarities between users
}

for k in [40, 45, 50, 100, 150, 200]:
    algo = KNNWithMeans(sim_options=sim_options, k = k)
    cross_validate(algo, data, measures=['RMSE'], cv=5, verbose=True)

Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Evaluating RMSE of algorithm KNNWithMeans on 5 split(s).

                  Fold 1  Fold 2  Fold 3  Fold 4  Fold 5  Mean    Std     
RMSE (testset)    0.9187  0.9215  0.9144  0.9140  0.9232  0.9184  0.0037  
Fit time          0.29    0.32    0.29    0.31    0.29    0.30    0.01    
Test time         2.02    1.88    1.91    2.13    2.05    2.00    0.09    
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing simi

In [17]:
### Item-Based

sim_options = {
    "name": "msd",
    "user_based": False,  # Compute  similarities between users
}

for k in [40, 45, 50, 100, 150, 200]:
    algo = KNNWithMeans(sim_options=sim_options, k = k)
    cross_validate(algo, data, measures=['RMSE'], cv=5, verbose=True)

Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Evaluating RMSE of algorithm KNNWithMeans on 5 split(s).

                  Fold 1  Fold 2  Fold 3  Fold 4  Fold 5  Mean    Std     
RMSE (testset)    0.9219  0.9087  0.9169  0.9155  0.9109  0.9148  0.0046  
Fit time          4.50    5.02    5.96    4.57    6.16    5.24    0.69    
Test time         11.63   11.93   13.31   12.80   16.39   13.21   1.70    
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing simi

In [22]:
## Finding optimum neighbours
### User-Based

sim_options = {
    "name": "msd",
    "user_based": True,  # Compute  similarities between users
}

for k in [10, 15, 20, 25, 30, 35, 40, 45, 50, 55, 60, 65, 70, 75, 80, 85, 90]:
    print("k = ",k)
    algo = KNNWithMeans(sim_options=sim_options, k = k)
    cross_validate(algo, data, measures=['RMSE'], cv=5, verbose=True)
    print("\n")

k =  10
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Evaluating RMSE of algorithm KNNWithMeans on 5 split(s).

                  Fold 1  Fold 2  Fold 3  Fold 4  Fold 5  Mean    Std     
RMSE (testset)    0.9259  0.9317  0.9359  0.9284  0.9357  0.9315  0.0040  
Fit time          0.51    0.65    0.55    0.57    0.54    0.56    0.05    
Test time         2.07    2.09    2.07    2.05    2.30    2.12    0.09    


k =  15
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
D

Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Evaluating RMSE of algorithm KNNWithMeans on 5 split(s).

                  Fold 1  Fold 2  Fold 3  Fold 4  Fold 5  Mean    Std     
RMSE (testset)    0.9215  0.9216  0.9295  0.9117  0.9124  0.9193  0.0066  
Fit time          0.29    0.30    0.29    0.30    0.30    0.30    0.01    
Test time         2.17    2.24    2.13    2.19    2.28    2.20    0.05    


k =  70
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Evaluating RMSE of algorithm KNNWithMeans on 5 s

In [23]:
## Finding optimum neighbours
### Item-Based

sim_options = {
    "name": "msd",
    "user_based": False,  # Compute  similarities between users
}

for k in [130, 135, 140, 145, 150, 155, 160, 165, 170, 175, 180, 185, 190, 195, 200]:
    print(k)
    algo = KNNWithMeans(sim_options=sim_options, k = k)
    cross_validate(algo, data, measures=['RMSE'], cv=5, verbose=True)
    print("\n")

130
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Evaluating RMSE of algorithm KNNWithMeans on 5 split(s).

                  Fold 1  Fold 2  Fold 3  Fold 4  Fold 5  Mean    Std     
RMSE (testset)    0.9064  0.9188  0.9077  0.9135  0.9083  0.9109  0.0046  
Fit time          7.01    7.64    3.47    3.53    3.72    5.07    1.85    
Test time         17.54   15.60   10.49   10.28   10.32   12.85   3.10    


135
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done comp

Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Evaluating RMSE of algorithm KNNWithMeans on 5 split(s).

                  Fold 1  Fold 2  Fold 3  Fold 4  Fold 5  Mean    Std     
RMSE (testset)    0.9159  0.9117  0.9073  0.9108  0.9046  0.9101  0.0039  
Fit time          3.29    6.78    6.63    3.54    3.42    4.73    1.61    
Test time         16.70   22.37   21.98   11.93   11.85   16.97   4.60    


190
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Evaluating RMSE of algorithm KNNWithMeans on 5 split(s).

                  Fold 1  Fold 2 

In [24]:
## Finding optimum neighbours
### User-Based

sim_options = {
    "name": "msd",
    "user_based": True,  # Compute  similarities between users
}

for k in [5, 10, 15, 20, 25, 30, 35, 40, 45, 50, 55, 60, 65, 70, 75, 80, 85, 90, 95, 100,
         105, 110, 115, 120, 125, 130, 135, 140, 145, 150, 155, 160, 165, 170, 175, 180, 185, 190, 195, 200]:
    print("k = ",k)
    algo = KNNWithMeans(sim_options=sim_options, k = k)
    cross_validate(algo, data, measures=['RMSE'], cv=5, verbose=True)
    print("\n")

k =  5
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Evaluating RMSE of algorithm KNNWithMeans on 5 split(s).

                  Fold 1  Fold 2  Fold 3  Fold 4  Fold 5  Mean    Std     
RMSE (testset)    0.9578  0.9547  0.9614  0.9577  0.9561  0.9575  0.0023  
Fit time          0.28    0.30    0.36    0.34    0.34    0.32    0.03    
Test time         1.53    1.30    1.42    1.55    1.42    1.45    0.09    


k =  10
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Do

Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Evaluating RMSE of algorithm KNNWithMeans on 5 split(s).

                  Fold 1  Fold 2  Fold 3  Fold 4  Fold 5  Mean    Std     
RMSE (testset)    0.9267  0.9202  0.9116  0.9145  0.9254  0.9197  0.0059  
Fit time          0.26    0.29    0.30    0.30    0.30    0.29    0.02    
Test time         2.02    2.20    2.04    2.07    1.97    2.06    0.08    


k =  65
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Evaluating RMSE of algorithm KNNWithMeans on 5 s

Computing the msd similarity matrix...
Done computing similarity matrix.
Evaluating RMSE of algorithm KNNWithMeans on 5 split(s).

                  Fold 1  Fold 2  Fold 3  Fold 4  Fold 5  Mean    Std     
RMSE (testset)    0.9187  0.9201  0.9149  0.9308  0.9172  0.9203  0.0055  
Fit time          0.54    0.58    0.66    0.58    0.54    0.58    0.05    
Test time         3.20    4.03    3.40    3.03    3.19    3.37    0.35    


k =  120
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Evaluating RMSE of algorithm KNNWithMeans on 5 split(s).

                  Fold 1  Fold 2  Fold 3  Fold 4  Fold 5  Mean    Std     
RMSE (testset)    0.9234  0.9154  0.9181  0.9249  0.9148  0.

Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Evaluating RMSE of algorithm KNNWithMeans on 5 split(s).

                  Fold 1  Fold 2  Fold 3  Fold 4  Fold 5  Mean    Std     
RMSE (testset)    0.9250  0.9252  0.9254  0.9137  0.9172  0.9213  0.0049  
Fit time          0.27    0.35    0.31    0.33    0.31    0.31    0.03    
Test time         2.26    2.47    2.65    2.65    2.82    2.57    0.19    


k =  180
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done com

In [25]:
## Finding optimum neighbours
### Item-Based

sim_options = {
    "name": "msd",
    "user_based": False,  # Compute  similarities between users
}

for k in [5, 10, 15, 20, 25, 30, 35, 40, 45, 50, 55, 60, 65, 70, 75, 80, 85, 90, 95, 100,
         105, 110, 115, 120, 125, 130, 135, 140, 145, 150, 155, 160, 165, 170, 175, 180, 185, 190, 195, 200]:
    print("k = ",k)
    algo = KNNWithMeans(sim_options=sim_options, k = k)
    cross_validate(algo, data, measures=['RMSE'], cv=5, verbose=True)
    print("\n")

k =  5
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Evaluating RMSE of algorithm KNNWithMeans on 5 split(s).

                  Fold 1  Fold 2  Fold 3  Fold 4  Fold 5  Mean    Std     
RMSE (testset)    0.9841  0.9775  0.9729  0.9722  0.9744  0.9762  0.0043  
Fit time          3.36    3.52    3.58    3.39    3.55    3.48    0.09    
Test time         5.64    5.69    5.74    5.82    5.72    5.72    0.06    


k =  10
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Do

Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Evaluating RMSE of algorithm KNNWithMeans on 5 split(s).

                  Fold 1  Fold 2  Fold 3  Fold 4  Fold 5  Mean    Std     
RMSE (testset)    0.9162  0.9150  0.9093  0.9004  0.9194  0.9121  0.0067  
Fit time          3.53    4.01    3.70    3.80    3.72    3.75    0.16    
Test time         9.54    9.47    9.49    9.59    10.42   9.70    0.36    


k =  65
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Evaluating RMSE of algorithm KNNWithMeans on 5 s

Computing the msd similarity matrix...
Done computing similarity matrix.
Evaluating RMSE of algorithm KNNWithMeans on 5 split(s).

                  Fold 1  Fold 2  Fold 3  Fold 4  Fold 5  Mean    Std     
RMSE (testset)    0.9064  0.9108  0.9123  0.9221  0.9020  0.9107  0.0067  
Fit time          3.65    3.34    3.32    3.39    3.52    3.44    0.13    
Test time         10.13   9.45    9.49    9.74    9.53    9.67    0.25    


k =  120
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Evaluating RMSE of algorithm KNNWithMeans on 5 split(s).

                  Fold 1  Fold 2  Fold 3  Fold 4  Fold 5  Mean    Std     
RMSE (testset)    0.9061  0.9077  0.9177  0.9086  0.9099  0.

Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Evaluating RMSE of algorithm KNNWithMeans on 5 split(s).

                  Fold 1  Fold 2  Fold 3  Fold 4  Fold 5  Mean    Std     
RMSE (testset)    0.9208  0.9130  0.9052  0.9005  0.9143  0.9108  0.0072  
Fit time          3.09    3.19    3.22    3.56    3.60    3.33    0.21    
Test time         10.58   10.72   12.74   12.47   12.93   11.89   1.02    


k =  180
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the