# Imports

In [36]:
import numpy as np
import pandas as pd


In [37]:
from sklearn.neural_network import BernoulliRBM
from sklearn.preprocessing import MinMaxScaler

from sklearn.metrics import root_mean_squared_error, r2_score

# Data preparation

In [38]:
df = pd.read_csv("data.csv")
df

Unnamed: 0,userId,movieId,rating,timestamp
0,1,31,2.5,1260759144
1,1,1029,3.0,1260759179
2,1,1061,3.0,1260759182
3,1,1129,2.0,1260759185
4,1,1172,4.0,1260759205
...,...,...,...,...
99999,671,6268,2.5,1065579370
100000,671,6269,4.0,1065149201
100001,671,6365,4.0,1070940363
100002,671,6385,2.5,1070979663


In [39]:
table = pd.pivot_table(df.drop("timestamp", axis=1), "rating", "userId", "movieId")
# table

In [40]:
scaler = MinMaxScaler((0, 1))
t_table = scaler.fit_transform(table.fillna(0))  # Ставим по-умолчанию минимальную оценку фильму

# Training

In [41]:
rbm = BernoulliRBM(
    n_components=32,        # Количество скрытых нейронов
    learning_rate=0.01,     # Скорость обучения
    n_iter=100,            # Количество итераций
    verbose=True,           # Вывод процесса обучения
    batch_size=32,
    random_state=42
)
rbm.fit(t_table)

[BernoulliRBM] Iteration 1, pseudo-likelihood = -5711.48, time = 0.80s
[BernoulliRBM] Iteration 2, pseudo-likelihood = -5308.28, time = 0.95s
[BernoulliRBM] Iteration 3, pseudo-likelihood = -4921.81, time = 0.92s
[BernoulliRBM] Iteration 4, pseudo-likelihood = -4612.79, time = 0.92s
[BernoulliRBM] Iteration 5, pseudo-likelihood = -4414.66, time = 0.90s
[BernoulliRBM] Iteration 6, pseudo-likelihood = -3921.35, time = 0.88s
[BernoulliRBM] Iteration 7, pseudo-likelihood = -3516.10, time = 0.89s
[BernoulliRBM] Iteration 8, pseudo-likelihood = -3215.26, time = 0.90s
[BernoulliRBM] Iteration 9, pseudo-likelihood = -2998.36, time = 0.92s
[BernoulliRBM] Iteration 10, pseudo-likelihood = -2842.87, time = 0.91s
[BernoulliRBM] Iteration 11, pseudo-likelihood = -2719.25, time = 0.91s
[BernoulliRBM] Iteration 12, pseudo-likelihood = -2635.19, time = 0.93s
[BernoulliRBM] Iteration 13, pseudo-likelihood = -2569.64, time = 0.93s
[BernoulliRBM] Iteration 14, pseudo-likelihood = -2532.25, time = 0.95s
[

# Предсказание

In [None]:
def predict_ratings(user_id, N=50):
    user_vector = t_table[user_id].reshape(1, -1)

    predicted_ratings = np.zeros_like(user_vector)
    for _ in range(N):
        """
        Сглаживаем предсказания через усреднение
        """
        y_pred = rbm.gibbs(user_vector)
        y_pred = scaler.inverse_transform(y_pred)
        predicted_ratings += y_pred.flatten()
    predicted_ratings = predicted_ratings.flatten() / N

    print(len(predicted_ratings), len([i for i in range(1, len(t_table) + 1)]))

    recommendations = pd.DataFrame(
        {
            "movieId": table.columns,
            "predicted_rating": predicted_ratings,
        }
    )

    recommendations = recommendations.sort_values(
        "predicted_rating", ascending=False
    ).set_index("movieId")

    return recommendations

In [None]:
table.iloc[[1]].dropna(axis=1)

movieId,10,17,39,47,50,52,62,110,144,150,...,586,587,588,589,590,592,593,616,661,720
userId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2,4.0,5.0,5.0,4.0,4.0,3.0,3.0,4.0,3.0,5.0,...,3.0,3.0,3.0,5.0,5.0,5.0,3.0,3.0,4.0,4.0


In [103]:
# Предсказание и оценка
user_id = 1
user_idx = user_id - 1

# Получаем предсказания
recommendations = predict_ratings(user_idx)

# Получаем реальные оценки пользователя
real_ratings = table.iloc[user_idx].dropna()

# Выбираем предсказанные оценки для фильмов с известными рейтингами
predicted_for_real = recommendations.loc[real_ratings.index, 'predicted_rating']

# Вычисляем метрики
mse = root_mean_squared_error(real_ratings, predicted_for_real)
r2 = r2_score(real_ratings, predicted_for_real)

print("\nМетрики качества:")
print(f"Mean Squared Error (MSE): {mse:.4f}")
print(f"R-squared (R²) Score: {r2:.4f}")

# Выводим топ-10 рекомендаций и реальные оценки
print("\nТоп-10 рекомендаций:")
print(recommendations.head(10))
print("\nРеальные оценки пользователя:")
print(table.iloc[[user_idx]].dropna(axis=1))

9066 671

Метрики качества:
Mean Squared Error (MSE): 2.5282
R-squared (R²) Score: -7.5507

Топ-10 рекомендаций:
         predicted_rating
movieId                  
954                   5.0
473                   5.0
536                   5.0
526                   5.0
523                   5.0
2063                  5.0
233                   5.0
428                   5.0
267                   5.0
322                   5.0

Реальные оценки пользователя:
movieId  31    1029  1061  1129  1172  1263  1287  1293  1339  1343  1371  \
userId                                                                      
1         2.5   3.0   3.0   2.0   4.0   2.0   2.0   2.0   3.5   2.0   2.5   

movieId  1405  1953  2105  2150  2193  2294  2455  2968  3671  
userId                                                         
1         1.0   4.0   4.0   3.0   2.0   2.0   2.5   1.0   3.0  
