In [None]:
import pandas as pd
from surprise import Dataset, Reader, SVD, SVDpp, NMF
from surprise.model_selection import cross_validate, GridSearchCV

# Download data
ratings = pd.read_csv('/mnt/data/ratings.csv')

# Convert data to a format supported by the Surprise library
reader = Reader(rating_scale=(0.5, 5))
data = Dataset.load_from_df(ratings[['userId', 'movieId', 'rating']], reader)

# Selection of the best parameters for SVD
param_grid = {
    'n_factors': [20, 50, 100],
    'lr_all': [0.002, 0.005, 0.01],
    'reg_all': [0.02, 0.1, 0.4]
}
gs = GridSearchCV(SVD, param_grid, measures=['rmse', 'mae'], cv=5)
gs.fit(data)

print("Найкращі параметри для SVD:", gs.best_params['rmse'])

# Model building and evaluation
algorithms = {
    'SVD': SVD(n_factors=gs.best_params['rmse']['n_factors'],
               lr_all=gs.best_params['rmse']['lr_all'],
               reg_all=gs.best_params['rmse']['reg_all']),
    'SVD++': SVDpp(),
    'NMF': NMF()
}

results = {}
for name, algo in algorithms.items():
    print(f"Evaluating {name}...")
    results[name] = cross_validate(algo, data, measures=['RMSE', 'MAE'], cv=5, verbose=True)

# Comparing the results
for name, result in results.items():
    print(f"\n{name} performance:")
    for metric in ['test_rmse', 'test_mae']:
        print(f"{metric}: {result[metric].mean():.4f} ± {result[metric].std():.4f}")


In [None]:
import numpy as np
from sklearn.model_selection import train_test_split

# Download data
ratings = pd.read_csv('/mnt/data/ratings.csv')

# Options
num_users = ratings['userId'].nunique()
num_movies = ratings['movieId'].nunique()
latent_factors = 10  # Кількість латентних факторів
learning_rate = 0.01
regularization = 0.1
epochs = 20

# Initialize factor matrices
user_factors = np.random.normal(scale=1.0/latent_factors, size=(num_users, latent_factors))
movie_factors = np.random.normal(scale=1.0/latent_factors, size=(num_movies, latent_factors))

# Splitting data into training and test samples
train_data, test_data = train_test_split(ratings, test_size=0.2)

# Mapping IDs to indexes
user_id_map = {id: idx for idx, id in enumerate(ratings['userId'].unique())}
movie_id_map = {id: idx for idx, id in enumerate(ratings['movieId'].unique())}

train_data['user_index'] = train_data['userId'].map(user_id_map)
train_data['movie_index'] = train_data['movieId'].map(movie_id_map)
test_data['user_index'] = test_data['userId'].map(user_id_map)
test_data['movie_index'] = test_data['movieId'].map(movie_id_map)

# Gradient descent
for epoch in range(epochs):
    for _, row in train_data.iterrows():
        user_idx = int(row['user_index'])
        movie_idx = int(row['movie_index'])
        rating = row['rating']

       # Forecasted rating
        pred_rating = np.dot(user_factors[user_idx], movie_factors[movie_idx])

        # Error
        error = rating - pred_rating

       # Update factors
        user_factors[user_idx] += learning_rate * (error * movie_factors[movie_idx] - regularization * user_factors[user_idx])
        movie_factors[movie_idx] += learning_rate * (error * user_factors[user_idx] - regularization * movie_factors[movie_idx])
    
    # Calculating the RMSE on a training set
    train_rmse = np.sqrt(np.mean([
        (row['rating'] - np.dot(user_factors[int(row['user_index'])], movie_factors[int(row['movie_index'])])) ** 2
        for _, row in train_data.iterrows()
    ]))
    
  # Calculate RMSE on the test sample
    test_rmse = np.sqrt(np.mean([
        (row['rating'] - np.dot(user_factors[int(row['user_index'])], movie_factors[int(row['movie_index'])])) ** 2
        for _, row in test_data.iterrows()
    ]))
    
    print(f'Epoch {epoch + 1}: Train RMSE = {train_rmse:.4f}, Test RMSE = {test_rmse:.4f}')

# Final factors
user_factors_final = user_factors
movie_factors_final = movie_factors
