In [1]:
import pandas as pd

file_path = '/content/data/ml-latest-small/ratings.csv'

ratings_data = pd.read_csv(file_path, low_memory=False)

ratings_data.head(10)

Unnamed: 0,userId,movieId,rating,timestamp
0,1,1,4.0,964982703
1,1,3,4.0,964981247
2,1,6,4.0,964982224
3,1,47,5.0,964983815
4,1,50,5.0,964982931
5,1,70,3.0,964982400
6,1,101,5.0,964980868
7,1,110,4.0,964982176
8,1,151,5.0,964984041
9,1,157,5.0,964984100


In [2]:
!pip install scikit-surprise


Collecting scikit-surprise
  Downloading scikit-surprise-1.1.3.tar.gz (771 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m772.0/772.0 kB[0m [31m5.1 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Building wheels for collected packages: scikit-surprise
  Building wheel for scikit-surprise (setup.py) ... [?25l[?25hdone
  Created wheel for scikit-surprise: filename=scikit_surprise-1.1.3-cp310-cp310-linux_x86_64.whl size=3162719 sha256=6c40d0db1e54ddec9104071c9bf318179119ffdec56621a777e8627866473e74
  Stored in directory: /root/.cache/pip/wheels/a5/ca/a8/4e28def53797fdc4363ca4af740db15a9c2f1595ebc51fb445
Successfully built scikit-surprise
Installing collected packages: scikit-surprise
Successfully installed scikit-surprise-1.1.3


In [6]:
from surprise import SVD
from surprise import Dataset, Reader
from surprise.model_selection import cross_validate


reader = Reader()
data = Dataset.load_from_df(ratings_data[['userId', 'movieId', 'rating']], reader)

# Use the famous SVD algorithm.
algo = SVD()

# Run 5-fold cross-validation and print results.
cross_validate(algo, data, measures=['RMSE', 'MAE'], cv=5, verbose=True)



Evaluating RMSE, MAE of algorithm SVD on 5 split(s).

                  Fold 1  Fold 2  Fold 3  Fold 4  Fold 5  Mean    Std     
RMSE (testset)    0.8791  0.8741  0.8785  0.8725  0.8675  0.8743  0.0043  
MAE (testset)     0.6760  0.6715  0.6720  0.6713  0.6683  0.6718  0.0024  
Fit time          1.69    1.53    1.51    1.53    2.07    1.66    0.21    
Test time         0.12    0.28    0.13    0.13    0.38    0.21    0.11    


{'test_rmse': array([0.87914623, 0.87411536, 0.87845472, 0.87247621, 0.86747855]),
 'test_mae': array([0.67595812, 0.6715401 , 0.67202349, 0.67133175, 0.66830089]),
 'fit_time': (1.6872930526733398,
  1.5287840366363525,
  1.514061689376831,
  1.5289065837860107,
  2.0653889179229736),
 'test_time': (0.11866950988769531,
  0.27769994735717773,
  0.13214921951293945,
  0.12741947174072266,
  0.3815042972564697)}

In [11]:
from surprise import SVDpp


model_svdpp = SVDpp()


cv_results_svdpp = cross_validate(model_svdpp, data, measures=['RMSE', 'MAE'], cv=5, verbose=True)


print("Cross-validation results for SVD++:")
for metric in ['test_rmse', 'test_mae']:
    scores = cv_results_svdpp[metric]
    print(f"{metric}: {scores.mean()} ± {scores.std()}")





Evaluating RMSE, MAE of algorithm SVDpp on 5 split(s).

                  Fold 1  Fold 2  Fold 3  Fold 4  Fold 5  Mean    Std     
RMSE (testset)    0.8699  0.8578  0.8590  0.8633  0.8582  0.8616  0.0046  
MAE (testset)     0.6680  0.6591  0.6586  0.6609  0.6581  0.6609  0.0037  
Fit time          99.52   113.90  123.12  104.62  115.04  111.24  8.29    
Test time         12.27   14.78   16.83   14.40   13.90   14.44   1.47    
Cross-validation results for SVD++:
test_rmse: 0.861633789560994 ± 0.004578701925161244
test_mae: 0.6609332449413367 ± 0.003661708421015071


In [12]:
from surprise import NMF

model_nmf = NMF()


cv_results_nmf = cross_validate(model_nmf, data, measures=['RMSE', 'MAE'], cv=5, verbose=True)


print("\nCross-validation results for NMF:")
for metric in ['test_rmse', 'test_mae']:
    scores = cv_results_nmf[metric]
    print(f"{metric}: {scores.mean()} ± {scores.std()}")

Evaluating RMSE, MAE of algorithm NMF on 5 split(s).

                  Fold 1  Fold 2  Fold 3  Fold 4  Fold 5  Mean    Std     
RMSE (testset)    0.9118  0.9164  0.9279  0.9154  0.9216  0.9186  0.0056  
MAE (testset)     0.7034  0.6991  0.7102  0.7013  0.7050  0.7038  0.0038  
Fit time          2.89    3.69    2.91    2.90    3.16    3.11    0.31    
Test time         0.10    0.11    0.38    0.13    0.18    0.18    0.11    

Cross-validation results for NMF:
test_rmse: 0.9186284107934819 ± 0.005609851928504646
test_mae: 0.7038194819294253 ± 0.00375050975628017


Прогнозуємо рекомендації фільмів для конкретного юзера, з використанням моделей NMF та SVD++:

In [26]:
from surprise.model_selection import train_test_split

def get_top_n_recommendations(user_id, model, n=10):

    trainset, _ = train_test_split(data, test_size=0.25, random_state=42)

    model.fit(trainset)

    all_movie_ids = data.df['movieId'].unique()

    user_ratings = data.df[data.df['userId'] == user_id]['movieId']
    movies_to_predict = [movie_id for movie_id in all_movie_ids if movie_id not in user_ratings]

    predictions = [model.predict(user_id, movie_id) for movie_id in movies_to_predict]

    top_n_predictions = sorted(predictions, key=lambda x: x.est, reverse=True)[:n]

    recommended_movie_ids = [pred.iid for pred in top_n_predictions]

    return recommended_movie_ids

# Приклад використання для моделі NMF
user_id = 1
nmf_model = NMF()
recommended_movies_nmf = get_top_n_recommendations(user_id, nmf_model)
print("Recommended movies for user", user_id, "using NMF:", recommended_movies_nmf)

# Приклад використання для моделі SVD++
user_id = 1
svdpp_model = SVDpp()
recommended_movies_svdpp = get_top_n_recommendations(user_id, svdpp_model)
print("Recommended movies for user", user_id, "using SVD++:", recommended_movies_svdpp)


Recommended movies for user 1 using NMF: [260, 296, 457, 593, 923, 940, 1023, 1089, 1136, 1196]
Recommended movies for user 1 using SVD++: [260, 296, 356, 527, 593, 608, 1089, 1136, 1196, 1197]


спробуємо ще спрогнозувати рекомендації фільмів, з використанням моделі SVD:

In [27]:
user_id = 1
svd_model = SVD()
recommended_movies_svd = get_top_n_recommendations(user_id, svd_model)
print("Recommended movies for user", user_id, "using SVD:", recommended_movies_svd)


Recommended movies for user 1 using SVD: [260, 296, 527, 608, 1089, 1197, 1210, 1213, 1270, 2329]
