# Import Library

In [1]:
import pandas as pd
import warnings
warnings.filterwarnings('ignore')

from Modules.dataEngineering import dataEngineering
dataEngineering = dataEngineering()


# Load ตาราง Ratings

In [19]:
def human_format(num):
    num = float('{:.3g}'.format(num))
    magnitude = 0
    while abs(num) >= 1000:
        magnitude += 1
        num /= 1000.0
    return '{}{}'.format('{:f}'.format(num).rstrip('0').rstrip('.'), ['', 'K', 'M', 'B', 'T'][magnitude])

In [20]:
df_ratings        = dataEngineering.loadRatings()['data']

display(df_ratings.head(5) ,f'Rating Count: {human_format(df_ratings.shape[0])}')

Unnamed: 0,UserID,MovieID,userRating
0,3,296,5.0
1,3,1217,5.0
2,3,1653,5.0
3,3,4308,3.0
4,3,5952,4.0


'Rating Count: 2.25M'

# Load ค่า Parameterที่เหมาะสมแต่ละ Algorithm

In [3]:
import json
def import_parameter(file):
    path = f'.\Parameter\{file}.json' 
    with open(path) as f:
        read = json.load(f)
    return (read)

In [4]:
from surprise import Dataset, Reader
from surprise.model_selection import cross_validate
from surprise import SVD, KNNWithMeans, KNNBasic, KNNWithZScore

option_SVD = import_parameter('option_SVD')

option_KNNBasic = import_parameter('option_KNNBasic')

option_KNNwithMeans = import_parameter('option_KNNwithMeans')

option_KNNWithZScore = import_parameter('option_KNNWithZScore')

# Define the metrics you want to use for evaluation
metrics = ['rmse', 'mae']

# จำนวน Fold
fold = 5

In [5]:
# Create a Reader object
reader = Reader(rating_scale=(1, 5))

# Load the dataset using the Reader
data = Dataset.load_from_df(df_ratings[['UserID', 'MovieID', 'userRating']], reader)

# Train Model

In [6]:
# Define the models
model_SVD = SVD(
    n_epochs=option_SVD['n_epochs'], lr_all=option_SVD['lr_all'], reg_all=option_SVD['reg_all'],
)

model_KNNwithMeans = KNNWithMeans( k=option_KNNwithMeans['k'], 
    sim_options=option_KNNwithMeans['sim_options'], bsl_options=option_KNNwithMeans['bsl_options']
)

model_KNNBasic = KNNBasic(k=option_KNNBasic['k'], 
    sim_options=option_KNNBasic['sim_options'], bsl_options=option_KNNBasic['bsl_options'],
)

model_KNNWithZScore = KNNWithZScore(k=option_KNNWithZScore['k'], 
    sim_options=option_KNNWithZScore['sim_options'], bsl_options=option_KNNWithZScore['bsl_options'],
)



# ทำ Cross-Validation ในแต่ละ Algorithm

In [7]:
# Perform cross-validation for SVD
try:
    results_SVD = cross_validate(model_SVD, data, measures=metrics, cv=fold, verbose=True)
except (MemoryError):
    results_SVD = {'test_rmse': [0]*fold, 'test_mae': [0]*fold,
                            'fit_time': [0]*fold, 'test_time': [0]*fold}
    print("ใช้ Memory เกิน")


Evaluating RMSE, MAE of algorithm SVD on 5 split(s).

                  Fold 1  Fold 2  Fold 3  Fold 4  Fold 5  Mean    Std     
RMSE (testset)    0.8293  0.8315  0.8315  0.8337  0.8311  0.8314  0.0014  
MAE (testset)     0.6267  0.6286  0.6286  0.6300  0.6277  0.6283  0.0011  
Fit time          14.51   17.09   17.08   17.76   17.04   16.69   1.13    
Test time         7.21    6.28    7.66    7.65    5.71    6.90    0.78    


In [8]:
# Perform cross-validation for KNN with Means
try:
    results_KNNwithMeans = cross_validate(model_KNNwithMeans, data, measures=metrics, cv=fold, verbose=True)
except (MemoryError):
    results_KNNwithMeans = {'test_rmse': [0]*fold, 'test_mae': [0]*fold,
                            'fit_time': [0]*fold, 'test_time': [0]*fold}
    print("ใช้ Memory เกิน")
    

Computing the pearson similarity matrix...
Done computing similarity matrix.
Computing the pearson similarity matrix...
Done computing similarity matrix.
Computing the pearson similarity matrix...
Done computing similarity matrix.
Computing the pearson similarity matrix...
Done computing similarity matrix.
Computing the pearson similarity matrix...
Done computing similarity matrix.
Evaluating RMSE, MAE of algorithm KNNWithMeans on 5 split(s).

                  Fold 1  Fold 2  Fold 3  Fold 4  Fold 5  Mean    Std     
RMSE (testset)    0.7679  0.7668  0.7674  0.7652  0.7661  0.7667  0.0010  
MAE (testset)     0.5692  0.5678  0.5687  0.5675  0.5682  0.5683  0.0006  
Fit time          154.02  169.81  160.57  177.00  164.08  165.09  7.85    
Test time         231.54  263.19  245.56  254.50  252.84  249.53  10.60   


In [9]:
# Perform cross-validation for KNN Basic
try:
    results_KNNBasic = cross_validate(model_KNNBasic, data, measures=metrics, cv=fold, verbose=True)
except (MemoryError):
    results_KNNBasic = {'test_rmse': [0]*fold, 'test_mae': [0]*fold,
                            'fit_time': [0]*fold, 'test_time': [0]*fold}
    print("ใช้ Memory เกิน")


Computing the pearson similarity matrix...
Done computing similarity matrix.
Computing the pearson similarity matrix...
Done computing similarity matrix.
Computing the pearson similarity matrix...
Done computing similarity matrix.
Computing the pearson similarity matrix...
Done computing similarity matrix.
Computing the pearson similarity matrix...
Done computing similarity matrix.
Evaluating RMSE, MAE of algorithm KNNBasic on 5 split(s).

                  Fold 1  Fold 2  Fold 3  Fold 4  Fold 5  Mean    Std     
RMSE (testset)    0.8058  0.8055  0.8078  0.8050  0.8050  0.8058  0.0010  
MAE (testset)     0.6094  0.6094  0.6113  0.6088  0.6093  0.6097  0.0009  
Fit time          166.91  148.27  170.94  154.37  151.98  158.49  8.83    
Test time         228.00  212.76  208.64  206.47  202.10  211.59  8.89    


In [10]:
# Perform cross-validation for KNN with Z-Score
try:
    results_KNNWithZScore = cross_validate(model_KNNWithZScore, data, measures=metrics, cv=fold, verbose=True)
except (MemoryError):
    results_KNNWithZScore = {'test_rmse': [0]*fold, 'test_mae': [0]*fold,
                            'fit_time': [0]*fold, 'test_time': [0]*fold}
    print("ใช้ Memory เกิน")


Computing the pearson similarity matrix...
Done computing similarity matrix.
Computing the pearson similarity matrix...
Done computing similarity matrix.
Computing the pearson similarity matrix...
Done computing similarity matrix.
Computing the pearson similarity matrix...
Done computing similarity matrix.
Computing the pearson similarity matrix...
Done computing similarity matrix.
Evaluating RMSE, MAE of algorithm KNNWithZScore on 5 split(s).

                  Fold 1  Fold 2  Fold 3  Fold 4  Fold 5  Mean    Std     
RMSE (testset)    0.7629  0.7652  0.7659  0.7670  0.7687  0.7659  0.0019  
MAE (testset)     0.5649  0.5666  0.5674  0.5677  0.5687  0.5671  0.0013  
Fit time          152.91  143.87  151.16  151.29  150.67  149.98  3.15    
Test time         241.10  232.40  231.57  244.48  231.39  236.19  5.50    


# จัดข้อมูลที่ได้มาก่อนที่จะเก็บข้อมูล

In [11]:
import numpy as np

def Avg(lis):
    return np.average(lis)

def Std(lis):
    return np.std(lis)

In [12]:
def change2DataFrame(results, name):
    print(name+':')
    dic = { 
        '_' :['Fold %d'%x for x in range(1,fold+1)],
        'RMSE' :results['test_rmse'],
        'MAE' :results['test_mae'],
        'Fit time':(results['fit_time']),
        'Test time':(results['test_time'])
    }
        
    df1 = pd.DataFrame(dic,index=range(1,fold+1), columns=['_','RMSE', 'MAE', 'Fit time', 'Test time'])
    new_dict = {
        '_' :['Mean', 'Std'],
        'RMSE' :[df1['RMSE'].mean(), df1['RMSE'].std()],
        'MAE' :[df1['MAE'].mean(), df1['MAE'].std()],
        'Fit time':[Avg(df1['Fit time']), Std(df1['Fit time'])],
        'Test time':[Avg(df1['Test time']), Std(df1['Test time'])]
    }
    df2 = pd.DataFrame(new_dict, index=[fold, fold+1], columns=['_','RMSE', 'MAE', 'Fit time', 'Test time'])
    df_results = pd.concat([df1, df2], axis=0)
    return df_results


In [13]:
df_Basic = change2DataFrame(results_KNNBasic, 'KNNBasic Results')
df_Basic

KNNBasic Results:


Unnamed: 0,_,RMSE,MAE,Fit time,Test time
1,Fold 1,0.805831,0.609442,166.907892,227.99634
2,Fold 2,0.80554,0.609402,148.274812,212.760964
3,Fold 3,0.807751,0.611305,170.93999,208.644501
4,Fold 4,0.805012,0.608838,154.371531,206.468421
5,Fold 5,0.805007,0.609289,151.977901,202.099468
5,Mean,0.805828,0.609655,158.494425,211.593939
6,Std,0.001132,0.000953,8.82703,8.893996


In [14]:
df_ZScore = change2DataFrame(results_KNNWithZScore, 'KNNWithZScore Results')
df_ZScore

KNNWithZScore Results:


Unnamed: 0,_,RMSE,MAE,Fit time,Test time
1,Fold 1,0.762853,0.564853,152.907341,241.099952
2,Fold 2,0.765163,0.566641,143.865896,232.401138
3,Fold 3,0.76591,0.567409,151.161038,231.572343
4,Fold 4,0.767015,0.567665,151.291492,244.47935
5,Fold 5,0.768669,0.56871,150.66712,231.393008
5,Mean,0.765922,0.567056,149.978577,236.189158
6,Std,0.002164,0.001437,3.147652,5.504731


In [15]:
df_Mean = change2DataFrame(results_KNNwithMeans, 'KNNwithMeans Results')
df_Mean

KNNwithMeans Results:


Unnamed: 0,_,RMSE,MAE,Fit time,Test time
1,Fold 1,0.767896,0.569187,154.023278,231.538822
2,Fold 2,0.766772,0.567846,169.805185,263.194165
3,Fold 3,0.767382,0.568715,160.572536,245.556587
4,Fold 4,0.765163,0.567521,176.996122,254.497053
5,Fold 5,0.766076,0.568189,164.077556,252.838882
5,Mean,0.766658,0.568291,165.094935,249.525102
6,Std,0.001078,0.000668,7.847209,10.600001


In [16]:
df_SVD = change2DataFrame(results_SVD, 'SVD Results')
df_SVD

SVD Results:


Unnamed: 0,_,RMSE,MAE,Fit time,Test time
1,Fold 1,0.829296,0.626716,14.507652,7.212544
2,Fold 2,0.831457,0.628642,17.085799,6.279472
3,Fold 3,0.831473,0.628623,17.078686,7.662142
4,Fold 4,0.833707,0.629979,17.764328,7.649544
5,Fold 5,0.831121,0.627729,17.036282,5.710098
5,Mean,0.831411,0.628338,16.694549,6.90276
6,Std,0.001568,0.001211,1.126443,0.779903


Save Result to xlsx

In [18]:
import pandas as pd

path = 'D:\\Coding\\Machine_Learning\\Recommendation_System\\Results_Comparing\\'
file_name = path + f'Count_Rating_{human_format(df_ratings.shape[0])}' + '.xlsx'

# Export DataFrames to Excel using ExcelWriter 
with pd.ExcelWriter(file_name) as excel_writer:
    df_Basic.to_excel(excel_writer, sheet_name='KNN Basic', index=False)
    df_ZScore.to_excel(excel_writer, sheet_name='KNN With Z-Score', index=False)
    df_Mean.to_excel(excel_writer, sheet_name='KNN with Means', index=False)
    df_SVD.to_excel(excel_writer, sheet_name='SVD', index=False)