<a href="https://colab.research.google.com/github/SamoaChen/Frequentist-ML/blob/main/Non_Negative_Matrix_Factorization.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install scikit-surprise

Collecting scikit-surprise
[?25l  Downloading https://files.pythonhosted.org/packages/97/37/5d334adaf5ddd65da99fc65f6507e0e4599d092ba048f4302fe8775619e8/scikit-surprise-1.1.1.tar.gz (11.8MB)
[K     |████████████████████████████████| 11.8MB 3.9MB/s 
Building wheels for collected packages: scikit-surprise
  Building wheel for scikit-surprise (setup.py) ... [?25l[?25hdone
  Created wheel for scikit-surprise: filename=scikit_surprise-1.1.1-cp36-cp36m-linux_x86_64.whl size=1670898 sha256=82160ba1a212e2cb30e25ee6abba7a7ce46c3ada21594d949ad797adbdb10d15
  Stored in directory: /root/.cache/pip/wheels/78/9c/3d/41b419c9d2aff5b6e2b4c0fc8d25c538202834058f9ed110d0
Successfully built scikit-surprise
Installing collected packages: scikit-surprise
Successfully installed scikit-surprise-1.1.1


In [None]:
#IMPORT PACKAGES 
import numpy as np
import pandas as pd
from surprise import NMF
from surprise import Dataset
from surprise import accuracy
import matplotlib.pyplot as plt
from surprise.model_selection import GridSearchCV
from surprise.model_selection import cross_validate
from surprise.model_selection import train_test_split

In [None]:
#IMPLEMENTATION OF NON-NEGATIVE MATRIX FACTORIZATION

#import built in data 
data = Dataset.load_builtin('ml-100k')

#seperating training and testing sets
trainset, testset = train_test_split(data, test_size=.2)

# cross validate to find the best factor dimension
latent_vect = np.linspace(1,20,num=20)
# space for storing cv values
vals = np.zeros(latent_vect.shape)

#find rmse for each latent dimension
for index,val in enumerate(latent_vect):
  algo = NMF(n_factors=int(val))
  result = cross_validate(algo, data, measures=['RMSE', 'RMSE'], cv=5, verbose=False)
  vals[index] = result['test_rmse'].mean()

#locate dimension that minimize rmse
minpos = np.where(vals == vals.min())
best_factor_num = latent_vect[minpos]


In [None]:
print("factor dimension that minimize rmse: ", latent_vect[minpos].item())


factor dimension that minimize rmse:  14.0


In [None]:
# cross validate to find the best factor dimension
reg_vect = np.linspace(0.01,0.1,num=20)
# space for storing cv values
vals = np.zeros(latent_vect.shape)

#find rmse for each regularization 
for index,val in enumerate(reg_vect):
  algo = NMF(n_factors=int(14), reg_pu=int(val), reg_qi=int(val))
  result = cross_validate(algo, data, measures=['RMSE', 'RMSE'], cv=5, verbose=False)
  vals[index] = result['test_rmse'].mean()

#locate dimension that minimize rmse
minpos = np.where(vals == vals.min())
best_factor_num = latent_vect[minpos]
print("Regularization value that minimize rmse: ", reg_vect[minpos].item())

Regularization value that minimize rmse:  0.04315789473684211


In [None]:
#prediction for test model
algo = NMF(n_factors=14, reg_pu=0.04315789473684211, reg_qi=0.04315789473684211)
algo.fit(trainset)
predictions = algo.test(testset)

#compute RMSE for test 
print(accuracy.rmse(predictions))

RMSE: 0.9741
0.9741034434685671


In [None]:
#-------------GRID SEARCH WITH THE PACKAGE
latent_vect = [1, 5, 10, 20, 25, 30]
reg_vect1 = [0.01, 0.05, 0.1, 0.15, 0.2]
reg_vect2 = [0.01, 0.05, 0.1, 0.15, 0.2]
n_epochs = [10, 25, 50, 75, 100]

#GRID PARAMETERS
param_grid = {'n_factors': latent_vect , 'reg_pu': reg_vect1, 'reg_qi': reg_vect2, 'n_epochs': n_epochs}

gs = GridSearchCV(NMF, param_grid, measures=['rmse', 'rmse'], cv=3)

gs.fit(data)

# best RMSE score
print(gs.best_score['rmse'])

# combination of parameters that gave the best RMSE score
print(gs.best_params['rmse'])

0.9546506706155349
{'n_factors': 30, 'reg_pu': 0.1, 'reg_qi': 0.2, 'n_epochs': 100}


In [None]:
#prediction for test model
algo = NMF(n_factors=30, reg_pu=0.1, reg_qi=0.2, n_epochs= 100)
algo.fit(trainset)
predictions = algo.test(testset)

#compute RMSE for test 
print(accuracy.rmse(predictions))

RMSE: 0.9258
0.9258000478138178
