# 3. Collaborative Filtering (협업 필터링: 사용자 리뷰 기반)

In [3]:
import surprise
surprise.__version__

'1.1.1'

In [5]:
import pandas as pd
from surprise import Reader, Dataset, SVD
from surprise.model_selection import cross_validate

In [6]:
ratings = pd.read_csv('ratings_small.csv')
ratings.head()

Unnamed: 0,userId,movieId,rating,timestamp
0,1,31,2.5,1260759144
1,1,1029,3.0,1260759179
2,1,1061,3.0,1260759182
3,1,1129,2.0,1260759185
4,1,1172,4.0,1260759205


In [7]:
ratings['rating'].min()

0.5

In [8]:
ratings['rating'].max()

5.0

In [9]:
reader = Reader(rating_scale=(0.5,5))

In [10]:
data = Dataset.load_from_df(ratings[['userId', 'movieId', 'rating']], reader = reader)
data

<surprise.dataset.DatasetAutoFolds at 0x2b77154baf0>

In [11]:
svd = SVD(random_state=0)

In [12]:
cross_validate(svd, data, measures=['RMSE','MAE'], cv=5, verbose=True)

Evaluating RMSE, MAE of algorithm SVD on 5 split(s).

                  Fold 1  Fold 2  Fold 3  Fold 4  Fold 5  Mean    Std     
RMSE (testset)    0.8976  0.9065  0.8965  0.8924  0.8903  0.8967  0.0056  
MAE (testset)     0.6897  0.6972  0.6902  0.6885  0.6848  0.6901  0.0040  
Fit time          3.50    3.52    3.54    3.54    3.57    3.54    0.02    
Test time         0.18    0.10    0.17    0.10    0.10    0.13    0.04    


{'test_rmse': array([0.8976476 , 0.90647334, 0.89645013, 0.89243155, 0.89026482]),
 'test_mae': array([0.6897171 , 0.69716336, 0.69022894, 0.6885295 , 0.68475385]),
 'fit_time': (3.504631280899048,
  3.516030788421631,
  3.5449976921081543,
  3.543001651763916,
  3.5729970932006836),
 'test_time': (0.17600083351135254,
  0.09596920013427734,
  0.1660022735595703,
  0.10000061988830566,
  0.0950002670288086)}

In [13]:
trainset = data.build_full_trainset()
svd.fit(trainset)

<surprise.prediction_algorithms.matrix_factorization.SVD at 0x2b770d5bdc0>

In [16]:
svd.predict(1, 302)

Prediction(uid=1, iid=302, r_ui=None, est=2.7142061734434044, details={'was_impossible': False})

In [17]:
svd.predict(1, 1029, 3)

Prediction(uid=1, iid=1029, r_ui=3, est=2.8814455446761933, details={'was_impossible': False})

In [18]:
svd.predict(100, 1029)

Prediction(uid=100, iid=1029, r_ui=None, est=3.7705476478414846, details={'was_impossible': False})