# Import Library

In [1]:
import pandas as pd
import warnings
warnings.filterwarnings('ignore')

from Modules.dataEngineering import dataEngineering
dataEngineering = dataEngineering()

# Load ตาราง Ratings

In [2]:
def human_format(num):
    num = float('{:.3g}'.format(num))
    magnitude = 0
    while abs(num) >= 1000:
        magnitude += 1
        num /= 1000.0
    return '{}{}'.format('{:f}'.format(num).rstrip('0').rstrip('.'), ['', 'K', 'M', 'B', 'T'][magnitude])

In [3]:
df_ratings        = dataEngineering.loadRatings()['data']

display(df_ratings.head(5) ,f'Rating Count: {human_format(df_ratings.shape[0])}')

Unnamed: 0,UserID,MovieID,userRating
0,3,296,5.0
1,3,1217,5.0
2,3,1653,5.0
3,3,4308,3.0
4,3,5952,4.0


'Rating Count: 2.25M'

# Load ค่า Parameterที่เหมาะสมแต่ละ Algorithm

In [4]:
import json
def import_parameter(file):
    path = f'.\Parameter\{file}.json' 
    with open(path) as f:
        read = json.load(f)
    return (read)

In [5]:
from surprise import Dataset, Reader
from surprise.model_selection import cross_validate
from surprise import SVD, KNNWithMeans, KNNBasic, KNNWithZScore

option_SVD = import_parameter('option_SVD')

option_KNNBasic = import_parameter('option_KNNBasic')

option_KNNwithMeans = import_parameter('option_KNNwithMeans')

option_KNNWithZScore = import_parameter('option_KNNWithZScore')

# Define the metrics you want to use for evaluation
metrics = ['rmse', 'mae']

# จำนวน Fold
fold = 5

In [6]:
# Create a Reader object
reader = Reader(rating_scale=(1, 5))

# Load the dataset using the Reader
data = Dataset.load_from_df(df_ratings[['UserID', 'MovieID', 'userRating']], reader)

# Train Model

In [7]:
# Define the models
model_SVD = SVD(
    n_epochs=option_SVD['n_epochs'], lr_all=option_SVD['lr_all'], reg_all=option_SVD['reg_all'],
)

model_KNNwithMeans = KNNWithMeans( k=option_KNNwithMeans['k'], 
    sim_options=option_KNNwithMeans['sim_options'], bsl_options=option_KNNwithMeans['bsl_options']
)

model_KNNBasic = KNNBasic(k=option_KNNBasic['k'], 
    sim_options=option_KNNBasic['sim_options'], bsl_options=option_KNNBasic['bsl_options'],
)

model_KNNWithZScore = KNNWithZScore(k=option_KNNWithZScore['k'], 
    sim_options=option_KNNWithZScore['sim_options'], bsl_options=option_KNNWithZScore['bsl_options'],
)

# ทำ Cross-Validation ในแต่ละ Algorithm

In [8]:
# Perform cross-validation for SVD
try:
    results_SVD = cross_validate(model_SVD, data, measures=metrics, cv=fold, verbose=True)
except (MemoryError):
    results_SVD = {'test_rmse': [0]*fold, 'test_mae': [0]*fold,
                            'fit_time': [0]*fold, 'test_time': [0]*fold}
    print("ใช้ Memory เกิน")


Evaluating RMSE, MAE of algorithm SVD on 5 split(s).

                  Fold 1  Fold 2  Fold 3  Fold 4  Fold 5  Mean    Std     
RMSE (testset)    0.8303  0.8322  0.8329  0.8313  0.8307  0.8315  0.0010  
MAE (testset)     0.6273  0.6292  0.6290  0.6282  0.6274  0.6282  0.0008  
Fit time          13.92   15.32   14.50   14.70   13.66   14.42   0.59    
Test time         5.74    5.96    6.17    5.70    5.19    5.75    0.33    


In [9]:
# Perform cross-validation for KNN with Means
try:
    results_KNNwithMeans = cross_validate(model_KNNwithMeans, data, measures=metrics, cv=fold, verbose=True)
except (MemoryError):
    results_KNNwithMeans = {'test_rmse': [0]*fold, 'test_mae': [0]*fold,
                            'fit_time': [0]*fold, 'test_time': [0]*fold}
    print("ใช้ Memory เกิน")
    

Computing the pearson similarity matrix...
Done computing similarity matrix.
Computing the pearson similarity matrix...
Done computing similarity matrix.
Computing the pearson similarity matrix...
Done computing similarity matrix.
Computing the pearson similarity matrix...
Done computing similarity matrix.
Computing the pearson similarity matrix...
Done computing similarity matrix.
Evaluating RMSE, MAE of algorithm KNNWithMeans on 5 split(s).

                  Fold 1  Fold 2  Fold 3  Fold 4  Fold 5  Mean    Std     
RMSE (testset)    0.7648  0.7680  0.7655  0.7681  0.7667  0.7666  0.0013  
MAE (testset)     0.5672  0.5694  0.5677  0.5690  0.5683  0.5683  0.0008  
Fit time          145.17  163.82  154.04  172.83  139.17  155.01  12.20   
Test time         221.72  226.59  241.23  233.25  224.77  229.51  6.97    


In [10]:
# Perform cross-validation for KNN Basic
try:
    results_KNNBasic = cross_validate(model_KNNBasic, data, measures=metrics, cv=fold, verbose=True)
except (MemoryError):
    results_KNNBasic = {'test_rmse': [0]*fold, 'test_mae': [0]*fold,
                            'fit_time': [0]*fold, 'test_time': [0]*fold}
    print("ใช้ Memory เกิน")


Computing the pearson similarity matrix...
Done computing similarity matrix.
Computing the pearson similarity matrix...
Done computing similarity matrix.
Computing the pearson similarity matrix...
Done computing similarity matrix.
Computing the pearson similarity matrix...
Done computing similarity matrix.
Computing the pearson similarity matrix...
Done computing similarity matrix.
Evaluating RMSE, MAE of algorithm KNNBasic on 5 split(s).

                  Fold 1  Fold 2  Fold 3  Fold 4  Fold 5  Mean    Std     
RMSE (testset)    0.8072  0.8036  0.8070  0.8065  0.8051  0.8059  0.0014  
MAE (testset)     0.6106  0.6078  0.6104  0.6101  0.6095  0.6097  0.0010  
Fit time          170.96  170.15  166.28  163.86  151.11  164.47  7.16    
Test time         205.76  220.98  223.32  221.00  215.07  217.23  6.35    


In [11]:
# Perform cross-validation for KNN with Z-Score
try:
    results_KNNWithZScore = cross_validate(model_KNNWithZScore, data, measures=metrics, cv=fold, verbose=True)
except (MemoryError):
    results_KNNWithZScore = {'test_rmse': [0]*fold, 'test_mae': [0]*fold,
                            'fit_time': [0]*fold, 'test_time': [0]*fold}
    print("ใช้ Memory เกิน")


Computing the pearson similarity matrix...
Done computing similarity matrix.
Computing the pearson similarity matrix...
Done computing similarity matrix.
Computing the pearson similarity matrix...
Done computing similarity matrix.
Computing the pearson similarity matrix...
Done computing similarity matrix.
Computing the pearson similarity matrix...
Done computing similarity matrix.
Evaluating RMSE, MAE of algorithm KNNWithZScore on 5 split(s).

                  Fold 1  Fold 2  Fold 3  Fold 4  Fold 5  Mean    Std     
RMSE (testset)    0.7661  0.7672  0.7667  0.7648  0.7642  0.7658  0.0011  
MAE (testset)     0.5667  0.5680  0.5675  0.5669  0.5660  0.5670  0.0007  
Fit time          155.13  154.88  151.28  149.34  151.90  152.51  2.21    
Test time         233.13  234.20  229.35  230.83  231.49  231.80  1.71    


# จัดข้อมูลที่ได้มาก่อนที่จะเก็บข้อมูล

In [12]:
import numpy as np

def Avg(lis):
    return np.average(lis)

def Std(lis):
    return np.std(lis)

In [25]:
def change2DataFrame(results, name):
    print(name+':')
    columns = ['_','RMSE', 'MAE', 'Fit time (s)', 'Test time (s)']
    dic = { 
        '_' :['Fold %d'%x for x in range(1,fold+1)],
        'RMSE' :results['test_rmse'],
        'MAE' :results['test_mae'],
        'Fit time (s)':(results['fit_time']),
        'Test time (s)':(results['test_time'])
    }
        
    df1 = pd.DataFrame(dic, index=range(1,fold+1), columns=columns)
    new_dict = {
        '_' :['Mean', 'Std'],
        'RMSE' :[df1['RMSE'].mean(), df1['RMSE'].std()],
        'MAE' :[df1['MAE'].mean(), df1['MAE'].std()],
        'Fit time (s)':[Avg(df1['Fit time (s)']), Std(df1['Fit time (s)'])],
        'Test time (s)':[Avg(df1['Test time (s)']), Std(df1['Test time (s)'])]
    }
    df2 = pd.DataFrame(new_dict, index=[fold, fold+1], columns=columns)
    df_results = pd.concat([df1, df2], axis=0)
    return df_results


In [26]:
df_Basic = change2DataFrame(results_KNNBasic, 'KNNBasic Results')
df_Basic

KNNBasic Results:


Unnamed: 0,_,RMSE,MAE,Fit time (s),Test time (s)
1,Fold 1,0.807163,0.610559,170.964809,205.763658
2,Fold 2,0.803582,0.607812,170.150641,220.978497
3,Fold 3,0.807008,0.61038,166.284713,223.32333
4,Fold 4,0.806518,0.610076,163.856534,221.000066
5,Fold 5,0.805099,0.609481,151.112382,215.074721
5,Mean,0.805874,0.609662,164.473816,217.228054
6,Std,0.001518,0.001112,7.163067,6.348472


In [27]:
df_ZScore = change2DataFrame(results_KNNWithZScore, 'KNNWithZScore Results')
df_ZScore

KNNWithZScore Results:


Unnamed: 0,_,RMSE,MAE,Fit time (s),Test time (s)
1,Fold 1,0.766086,0.566709,155.131683,233.126535
2,Fold 2,0.767187,0.567975,154.878307,234.199525
3,Fold 3,0.766746,0.567496,151.282826,229.35393
4,Fold 4,0.764807,0.566881,149.343292,230.829907
5,Fold 5,0.764173,0.566005,151.902384,231.486083
5,Mean,0.7658,0.567013,152.507698,231.799196
6,Std,0.001278,0.000756,2.208405,1.705303


In [28]:
df_Mean = change2DataFrame(results_KNNwithMeans, 'KNNwithMeans Results')
df_Mean

KNNwithMeans Results:


Unnamed: 0,_,RMSE,MAE,Fit time (s),Test time (s)
1,Fold 1,0.764798,0.567197,145.172007,221.719015
2,Fold 2,0.767979,0.569356,163.818988,226.593367
3,Fold 3,0.765451,0.567736,154.043603,241.231625
4,Fold 4,0.768135,0.569013,172.829233,233.248669
5,Fold 5,0.766683,0.568269,139.174078,224.768638
5,Mean,0.766609,0.568314,155.007582,229.512263
6,Std,0.001486,0.000889,12.195243,6.972242


In [29]:
df_SVD = change2DataFrame(results_SVD, 'SVD Results')
df_SVD

SVD Results:


Unnamed: 0,_,RMSE,MAE,Fit time (s),Test time (s)
1,Fold 1,0.83031,0.627296,13.919046,5.737431
2,Fold 2,0.83221,0.629185,15.320657,5.961449
3,Fold 3,0.832939,0.629038,14.504607,6.17224
4,Fold 4,0.83126,0.62821,14.701105,5.695428
5,Fold 5,0.830735,0.627351,13.661029,5.190391
5,Mean,0.831491,0.628216,14.421289,5.751388
6,Std,0.001076,0.000896,0.587255,0.328416


Save Result to xlsx

In [30]:
import pandas as pd

path = 'D:\\Coding\\Machine_Learning\\Recommendation_System\\Results_Comparing\\'
file_name = path + f'Count_Rating_{human_format(df_ratings.shape[0])}' + '.xlsx'

# Export DataFrames to Excel using ExcelWriter 
with pd.ExcelWriter(file_name) as excel_writer:
    df_Basic.to_excel(excel_writer, sheet_name='KNN Basic', index=False)
    df_ZScore.to_excel(excel_writer, sheet_name='KNN With Z-Score', index=False)
    df_Mean.to_excel(excel_writer, sheet_name='KNN with Means', index=False)
    df_SVD.to_excel(excel_writer, sheet_name='SVD', index=False)