## 3. Collaborative Filtering (협업 필터링: 사용자 리뷰 기반)

In [1]:
import pandas as pd
from surprise import KNNBasic, SVD, SVDpp, NMF, Reader, Dataset
from surprise.model_selection import cross_validate

In [2]:
data = pd.read_excel('Lpoint_R0.xlsx')
data.head()

Unnamed: 0,cust,affiliate,rating
0,M000136117,A01,0.25
1,M000136117,D01,0.25
2,M000419293,A01,0.36
3,M000419293,A02,0.02
4,M000419293,C01,0.05


In [3]:
reader = Reader(rating_scale=(0.5,5))

In [4]:
dataset = Dataset.load_from_df(data[['cust', 'affiliate', 'rating']], reader = reader)

In [5]:
# KNN을 사용
model = KNNBasic()
cross_validate(model, dataset, measures = ['rmse', 'mse'], cv = 5, n_jobs = 4, verbose = True)

Evaluating RMSE, MSE of algorithm KNNBasic on 5 split(s).

                  Fold 1  Fold 2  Fold 3  Fold 4  Fold 5  Mean    Std     
RMSE (testset)    0.3753  0.3904  0.3289  0.3319  0.3306  0.3514  0.0261  
MSE (testset)     0.1409  0.1524  0.1082  0.1101  0.1093  0.1242  0.0187  
Fit time          0.00    0.00    0.00    0.00    0.00    0.00    0.00    
Test time         0.00    0.00    0.00    0.00    0.00    0.00    0.00    


{'test_rmse': array([0.37534429, 0.39042712, 0.32889931, 0.33188012, 0.33056308]),
 'test_mse': array([0.14088333, 0.15243333, 0.10817476, 0.11014441, 0.10927195]),
 'fit_time': (0.0, 0.0, 0.0, 0.0, 0.0),
 'test_time': (0.0, 0.0009989738464355469, 0.0, 0.0, 0.0)}

In [6]:
# SVD
model = SVD()
cross_validate(model, dataset, measures = ['rmse', 'mse'], cv = 5, n_jobs = 4, verbose = True)

Evaluating RMSE, MSE of algorithm SVD on 5 split(s).

                  Fold 1  Fold 2  Fold 3  Fold 4  Fold 5  Mean    Std     
RMSE (testset)    0.4129  0.2156  0.3747  0.3654  0.3300  0.3397  0.0674  
MSE (testset)     0.1704  0.0465  0.1404  0.1335  0.1089  0.1200  0.0417  
Fit time          0.00    0.00    0.00    0.00    0.00    0.00    0.00    
Test time         0.00    0.00    0.00    0.00    0.00    0.00    0.00    


{'test_rmse': array([0.41285591, 0.21556128, 0.37474436, 0.36537652, 0.3300303 ]),
 'test_mse': array([0.17045   , 0.04646667, 0.14043333, 0.1335    , 0.10892   ]),
 'fit_time': (0.004986763000488281,
  0.003988504409790039,
  0.004986763000488281,
  0.003988504409790039,
  0.0009930133819580078),
 'test_time': (0.0, 0.0009982585906982422, 0.0, 0.0009982585906982422, 0.0)}

In [7]:
# NMF
model = NMF()
cross_validate(model, dataset, measures = ['rmse', 'mse'], cv = 5, n_jobs = 4, verbose = True)

# 0 제외

Evaluating RMSE, MSE of algorithm NMF on 5 split(s).

                  Fold 1  Fold 2  Fold 3  Fold 4  Fold 5  Mean    Std     
RMSE (testset)    0.2633  0.3353  0.4280  0.3888  0.2866  0.3404  0.0614  
MSE (testset)     0.0693  0.1124  0.1832  0.1512  0.0822  0.1197  0.0425  
Fit time          0.00    0.00    0.00    0.00    0.00    0.00    0.00    
Test time         0.00    0.00    0.00    0.00    0.00    0.00    0.00    


{'test_rmse': array([0.26331224, 0.33528595, 0.42796028, 0.38884444, 0.28663566]),
 'test_mse': array([0.06933333, 0.11241667, 0.18315   , 0.1512    , 0.08216   ]),
 'fit_time': (0.002990245819091797,
  0.0029909610748291016,
  0.001993417739868164,
  0.001993417739868164,
  0.001994609832763672),
 'test_time': (0.0, 0.0, 0.0, 0.0009975433349609375, 0.0)}

In [8]:
# 출력하는데 오래 걸림
model = SVDpp()
cross_validate(model, dataset, measures = ['rmse', 'mse'], cv = 5, n_jobs = 4, verbose = True)

Evaluating RMSE, MSE of algorithm SVDpp on 5 split(s).

                  Fold 1  Fold 2  Fold 3  Fold 4  Fold 5  Mean    Std     
RMSE (testset)    0.4190  0.3449  0.2648  0.2621  0.4081  0.3398  0.0673  
MSE (testset)     0.1756  0.1189  0.0701  0.0687  0.1666  0.1200  0.0456  
Fit time          0.00    0.00    0.00    0.00    0.00    0.00    0.00    
Test time         0.00    0.00    0.00    0.00    0.00    0.00    0.00    


{'test_rmse': array([0.41898862, 0.34486712, 0.26482699, 0.26206869, 0.40811763]),
 'test_mse': array([0.17555146, 0.11893333, 0.07013333, 0.06868   , 0.16656   ]),
 'fit_time': (0.003991127014160156,
  0.0049855709075927734,
  0.003987789154052734,
  0.004978656768798828,
  0.0019943714141845703),
 'test_time': (0.0, 0.0, 0.0, 0.0, 0.0)}