# Surprise 다양한 조건 비교

In [2]:
# 집단과 사용자의 평가경향을 함께 고려한 CF 알고리즘
from surprise import KNNWithMeans

# Dataset 관련 모듈을 가져온다.
from surprise import Dataset

# 정확도 측정 관련 모듈
from surprise import accuracy

# train/test set 분리 모듈
from surprise.model_selection import train_test_split

data = Dataset.load_builtin(name=u'ml-100k')

trainset, testset = train_test_split(data, test_size=.25)

result = []

for neighbor_size in (10, 20, 30, 40, 50, 60):
    algo = KNNWithMeans(k=neighbor_size, sim_options={'name': 'pearson_baseline', 'user_based': True})
    
    algo.fit(trainset)
    predictions = algo.test(testset)
    result.append([neighbor_size, accuracy.rmse(predictions)])
    
result

Estimating biases using als...
Computing the pearson_baseline similarity matrix...
Done computing similarity matrix.
RMSE: 0.9516
Estimating biases using als...
Computing the pearson_baseline similarity matrix...
Done computing similarity matrix.
RMSE: 0.9381
Estimating biases using als...
Computing the pearson_baseline similarity matrix...
Done computing similarity matrix.
RMSE: 0.9370
Estimating biases using als...
Computing the pearson_baseline similarity matrix...
Done computing similarity matrix.
RMSE: 0.9367
Estimating biases using als...
Computing the pearson_baseline similarity matrix...
Done computing similarity matrix.
RMSE: 0.9369
Estimating biases using als...
Computing the pearson_baseline similarity matrix...
Done computing similarity matrix.
RMSE: 0.9371


[[10, 0.9515641602346732],
 [20, 0.9381455253568152],
 [30, 0.9370215858407612],
 [40, 0.9366678094902965],
 [50, 0.9368826555397544],
 [60, 0.937098154516528]]

In [3]:
# KNN 다양한 파라미터 비교
# Grid Search를 위한 모듈
from surprise.model_selection import GridSearchCV

param_grid = {
    'k':[5,10,15,25],
    'sim_options': {'name': ['pearson_baseline', 'cosine'],
                    'user_based': [True, False]}
}

gs = GridSearchCV(KNNWithMeans, param_grid, measures=['rmse'], cv=4)

gs.fit(data)

Estimating biases using als...
Computing the pearson_baseline similarity matrix...
Done computing similarity matrix.
Estimating biases using als...
Computing the pearson_baseline similarity matrix...
Done computing similarity matrix.
Estimating biases using als...
Computing the pearson_baseline similarity matrix...
Done computing similarity matrix.
Estimating biases using als...
Computing the pearson_baseline similarity matrix...
Done computing similarity matrix.
Estimating biases using als...
Computing the pearson_baseline similarity matrix...
Done computing similarity matrix.
Estimating biases using als...
Computing the pearson_baseline similarity matrix...
Done computing similarity matrix.
Estimating biases using als...
Computing the pearson_baseline similarity matrix...
Done computing similarity matrix.
Estimating biases using als...
Computing the pearson_baseline similarity matrix...
Done computing similarity matrix.
Computing the cosine similarity matrix...
Done computing similar

In [5]:
# 최적의 RMSE 
print(gs.best_score['rmse'])

# 최적 RMSE의 parameter 출력
print(gs.best_params['rmse'])

0.9259678091954351
{'k': 25, 'sim_options': {'name': 'pearson_baseline', 'user_based': False}}


In [6]:
# SVD(MF) 다양한 파라미터 비교
from surprise import SVD
from surprise.model_selection import GridSearchCV

param_grid = {
    'n_epochs': [70, 80, 90], # like iteration
    'lr_all': [0.005, 0.006, 0.007], # learning rate alpha
    'reg_all': [0.05, 0.07, 0.1] # 정규화 계수
}

gs = GridSearchCV(algo_class=SVD,
                  param_grid = param_grid,
                  measures=['rmse'],
                  cv=4)

gs.fit(data)

print(gs.best_score['rmse'])

print(gs.best_params['rmse'])

0.9118685032057504
{'n_epochs': 90, 'lr_all': 0.006, 'reg_all': 0.1}
