In [2]:
import pandas as pd
from surprise import Reader, Dataset, SVD
from surprise.model_selection import cross_validate
import os

In [3]:
path = os.getcwd() + '/ml1m/'

In [4]:
data = pd.read_csv(f'{path}preprocessed/ratings.txt', sep='\t')
data.head


<bound method NDFrame.head of          uid   pid  rating  timestamp
0          0   872       5  978300760
1          0   537       3  978302109
2          0   679       3  978301968
3          0  2606       4  978300275
4          0  1790       5  978824291
...      ...   ...     ...        ...
940958  6039   810       1  956716541
940959  6039   813       5  956704887
940960  6039   477       5  956704746
940961  6039   815       4  956715648
940962  6039   816       4  956715569

[940963 rows x 4 columns]>

In [5]:
reader = Reader(rating_scale=(1, 5))
data = Dataset.load_from_df(data[['uid', 'pid', 'rating']], reader)

In [6]:
algo = SVD()
cross_validate(algo, data, measures=['RMSE', 'MAE'], cv=5, verbose=True)

Evaluating RMSE, MAE of algorithm SVD on 5 split(s).

                  Fold 1  Fold 2  Fold 3  Fold 4  Fold 5  Mean    Std     
RMSE (testset)    0.8713  0.8734  0.8739  0.8724  0.8755  0.8733  0.0014  
MAE (testset)     0.6842  0.6868  0.6861  0.6850  0.6875  0.6859  0.0012  
Fit time          7.80    7.96    7.92    7.89    7.94    7.90    0.05    
Test time         1.70    1.85    1.42    1.82    1.61    1.68    0.16    


{'test_rmse': array([0.87129792, 0.87340064, 0.87386588, 0.87241409, 0.87548825]),
 'test_mae': array([0.68422412, 0.68677415, 0.68614374, 0.6850384 , 0.68746227]),
 'fit_time': (7.803056955337524,
  7.961819887161255,
  7.924058198928833,
  7.892982006072998,
  7.935588121414185),
 'test_time': (1.698958158493042,
  1.8501250743865967,
  1.416917085647583,
  1.8243739604949951,
  1.614806890487671)}

In [7]:
trainset = data.build_full_trainset()
algo.fit(trainset)

<surprise.prediction_algorithms.matrix_factorization.SVD at 0x12cc569b0>

In [8]:
movie_ids = data.df['pid'].unique()

In [9]:
def get_top_recommendations(user_id, num_recommendations=10):
    predictions = []
    for movie_id in movie_ids:
        predictions.append((movie_id, algo.predict(user_id, movie_id).est))

    predictions.sort(key=lambda x: x[1], reverse=True)

    top_recommendations = [x[0] for x in predictions[:num_recommendations]]
    return top_recommendations

In [10]:
top_movies = get_top_recommendations(46, num_recommendations=10)
top_movies

[665, 2402, 2236, 2533, 840, 2626, 842, 2329, 714, 859]