In [None]:
!pip install scikit-surprise
!pip install scikit-optimize
!pip install pandas-profiling



In [None]:
import pandas as pd
from surprise import SVD, SVDpp, NMF
from surprise import Dataset
from surprise.model_selection import cross_validate


data = Dataset.load_builtin('ml-100k')
alg_svd = SVD()
alg_svdpp = SVDpp()
alg_nmf = NMF()
cv = 5
measures = ['RMSE', 'MAE']

# Виконання крос-валідації для алгоритму SVD
result_svd = cross_validate(alg_svd, data, measures=measures, cv=cv, verbose=True)
SVD_result = pd.DataFrame.from_dict(result_svd).mean(axis=0)

# Виконання крос-валідації для алгоритму SVD++
result_svdpp = cross_validate(alg_svdpp, data, measures=measures, cv=cv, verbose=True)
SVDPP_result = pd.DataFrame.from_dict(result_svdpp).mean(axis=0)

# Виконання крос-валідації для алгоритму NMF
result_nmf = cross_validate(alg_nmf, data, measures=measures, cv=cv, verbose=True)
NMF_result = pd.DataFrame.from_dict(result_nmf).mean(axis=0)

# Створення підсумкового DataFrame для порівняння результатів
result = pd.DataFrame(columns=['SVD', 'SVDpp', 'NMF'])
result['SVD'] = SVD_result
result['SVDpp'] = SVDPP_result
result['NMF'] = NMF_result
print(result)

Evaluating RMSE, MAE of algorithm SVD on 5 split(s).

                  Fold 1  Fold 2  Fold 3  Fold 4  Fold 5  Mean    Std     
RMSE (testset)    0.9262  0.9376  0.9348  0.9351  0.9400  0.9347  0.0047  
MAE (testset)     0.7314  0.7402  0.7347  0.7376  0.7431  0.7374  0.0041  
Fit time          2.53    1.42    1.44    1.42    1.43    1.65    0.44    
Test time         0.18    0.15    0.23    0.13    0.11    0.16    0.04    
Evaluating RMSE, MAE of algorithm SVDpp on 5 split(s).

                  Fold 1  Fold 2  Fold 3  Fold 4  Fold 5  Mean    Std     
RMSE (testset)    0.9171  0.9169  0.9230  0.9143  0.9224  0.9187  0.0034  
MAE (testset)     0.7192  0.7209  0.7256  0.7191  0.7213  0.7212  0.0024  
Fit time          27.13   26.56   26.81   26.43   26.33   26.65   0.29    
Test time         4.31    4.77    4.21    5.10    4.23    4.52    0.35    
Evaluating RMSE, MAE of algorithm NMF on 5 split(s).

                  Fold 1  Fold 2  Fold 3  Fold 4  Fold 5  Mean    Std     
RMSE (tests

In [None]:
import numpy as np
from scipy.io import loadmat


def load_movie_list(filename):
    with open(filename, encoding='ISO-8859-1') as file:
        movies = file.readlines()
    movie_names = [movie.strip().split(' ', 1)[1] for movie in movies]
    return movie_names

def average_ratings(Y, R):
    return np.sum(Y, axis=1) / np.sum(R, axis=1)

def collaborative_filtering(Y, R):
    num_users = Y.shape[1]
    num_movies = Y.shape[0]
    user_similarity = np.zeros((num_users, num_users))
    for i in range(num_users):
        for j in range(num_users):
            if i != j:
                mask = np.logical_and(R[:, i], R[:, j])
                if np.sum(mask) > 0:
                    user_similarity[i, j] = np.dot(Y[mask, i], Y[mask, j]) / (np.linalg.norm(Y[mask, i]) * np.linalg.norm(Y[mask, j]))
    predicted_ratings = np.zeros((num_movies, num_users))
    for i in range(num_movies):
        for j in range(num_users):
            if R[i, j] == 0:
                users_rated_movie = np.nonzero(R[i, :])[0]
                if len(users_rated_movie) > 0:
                    weights = user_similarity[j, users_rated_movie]
                    ratings = Y[i, users_rated_movie]
                    predicted_ratings[i, j] = np.dot(weights, ratings) / np.sum(np.abs(weights))
    return predicted_ratings

def make_recommendations(predicted_ratings, movie_names, user_id, num_recommendations):
    user_row = predicted_ratings[user_id - 1]
    sorted_indices = np.argsort(user_row)[::-1]
    top_indices = sorted_indices[:num_recommendations]
    recommendations = [(idx + 1, movie_names[idx]) for idx in top_indices]
    return recommendations

# Завантаження даних
movie_ids_file = '/content/drive/MyDrive/Colab Notebooks/movie_ids.txt'
movie_names = load_movie_list(movie_ids_file)
movies_file = '/content/drive/MyDrive/Colab Notebooks/movies.mat'
data = loadmat(movies_file)
Y, R = data['Y'], data['R']

# Обчислення середніх рейтингів
avg_ratings = average_ratings(Y, R)

# Колаборативна фільтрація
predicted_ratings = collaborative_filtering(Y, R)

# Рекомендації для користувача
user_id = 1
num_recommendations = 5
recommendations = make_recommendations(predicted_ratings, movie_names, user_id, num_recommendations)

# Рекомендації
for position, movie_name in recommendations:
    print(f"Position {position}: {movie_name}")

Mounted at /content/drive


  predicted_ratings[i, j] = np.dot(weights, ratings) / np.sum(np.abs(weights))


Position 104: Theodore Rex (1995)
Position 717: Juror, The (1996)
Position 190: Henry V (1989)
Position 837: Meet John Doe (1941)
Position 334: U Turn (1997)
