In [None]:
# ==========================================
# ЧАСТЬ 2: Гибридная система и NCF
# ==========================================

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
import tensorflow as tf
from tensorflow.keras.layers import Input, Embedding, Flatten, Dense, Concatenate, Dropout
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam
from surprise import SVD, Reader, Dataset
import warnings

warnings.filterwarnings('ignore')
np.random.seed(42)
tf.random.set_seed(42)

# 1. ЗАГРУЗКА ДАННЫХ
# Предполагается, что файлы лежат в ../data/
# Замените путь, если файлы лежат в другом месте
try:
    ratings = pd.read_csv('../data/ratings.csv')
    books = pd.read_csv('../data/books.csv')
except FileNotFoundError:
    print("Файлы не найдены. Создаем синтетические данные для демонстрации...")
    # ГЕНЕРАЦИЯ ДАННЫХ ЕСЛИ НЕТ ФАЙЛОВ (ДЛЯ ЗАПУСКА)
    users = np.random.randint(1, 1000, 100000)
    items = np.random.randint(1, 2000, 100000)
    rates = np.random.randint(1, 6, 100000)
    ratings = pd.DataFrame({'user_id': users, 'book_id': items, 'rating': rates})
    ratings.drop_duplicates(subset=['user_id', 'book_id'], inplace=True)

# 2. ПОДГОТОВКА И SPLIT
train_df, test_df = train_test_split(ratings, test_size=0.2, random_state=42, stratify=ratings['user_id'])
print(f"Train: {len(train_df)}, Test: {len(test_df)}")

# Маппинг ID для нейросети
user_ids = ratings['user_id'].unique()
book_ids = ratings['book_id'].unique()
user2idx = {u: i for i, u in enumerate(user_ids)}
book2idx = {b: i for i, b in enumerate(book_ids)}

# 3. НЕЙРОСЕТЬ (NCF)
def get_ncf_model(num_users, num_items, emb_size=50):
    user_input = Input(shape=(1,))
    item_input = Input(shape=(1,))
    
    u_emb = Embedding(num_users, emb_size)(user_input)
    i_emb = Embedding(num_items, emb_size)(item_input)
    
    u_flat = Flatten()(u_emb)
    i_flat = Flatten()(i_emb)
    
    concat = Concatenate()([u_flat, i_flat])
    x = Dense(128, activation='relu')(concat)
    x = Dropout(0.2)(x)
    x = Dense(64, activation='relu')(x)
    output = Dense(1, activation='linear')(x)
    
    model = Model(inputs=[user_input, item_input], outputs=output)
    model.compile(optimizer='adam', loss='mse')
    return model

# Подготовка данных для обучения NN
X_train = [train_df['user_id'].map(user2idx).fillna(0).values, 
           train_df['book_id'].map(book2idx).fillna(0).values]
y_train = train_df['rating'].values

X_test = [test_df['user_id'].map(user2idx).fillna(0).values, 
          test_df['book_id'].map(book2idx).fillna(0).values]
y_test = test_df['rating'].values

# Обучение
model_ncf = get_ncf_model(len(user2idx), len(book2idx))
print("Обучение NCF...")
history = model_ncf.fit(X_train, y_train, batch_size=2048, epochs=3, validation_data=(X_test, y_test), verbose=1)

# 4. WRAPPERS (ОБЕРТКИ ДЛЯ КЛАССОВ)
class PopularityRec:
    def __init__(self, df):
        self.pop = df.groupby('book_id')['rating'].mean().to_dict()
        self.mean = df['rating'].mean()
    def predict(self, uid, bids):
        return [self.pop.get(b, self.mean) for b in bids]

class NCFRec:
    def __init__(self, model):
        self.model = model
    def predict(self, uid, bids):
        u_idx = user2idx.get(uid, 0)
        b_idxs = np.array([book2idx.get(b, 0) for b in bids])
        u_idxs = np.array([u_idx] * len(b_idxs))
        return self.model.predict([u_idxs, b_idxs], verbose=0).flatten()

class SVDRec:
    def __init__(self, df):
        reader = Reader(rating_scale=(1, 5))
        data = Dataset.load_from_df(df[['user_id', 'book_id', 'rating']], reader)
        self.algo = SVD(n_factors=50, random_state=42)
        self.algo.fit(data.build_full_trainset())
    def predict(self, uid, bids):
        return [self.algo.predict(uid, b).est for b in bids]

# Инициализация
pop_model = PopularityRec(train_df)
svd_model = SVDRec(train_df)
ncf_model = NCFRec(model_ncf)

# 5. ГИБРИДНАЯ СИСТЕМА
class HybridRec:
    def __init__(self, models, weights):
        self.models = models
        self.weights = weights
    
    def predict(self, uid, bids):
        final = np.zeros(len(bids))
        for name, model in self.models.items():
            preds = np.array(model.predict(uid, bids))
            final += preds * self.weights[name]
        return final

hybrid = HybridRec(
    models={'SVD': svd_model, 'NCF': ncf_model, 'Pop': pop_model}, 
    weights={'SVD': 0.4, 'NCF': 0.4, 'Pop': 0.2}
)

# 6. ОЦЕНКА (EVALUATION LOOP)
def evaluate_model(model, test_data, name="Model", n_users=200):
    precisions = []
    # Берем топ активных юзеров из теста
    users = test_data['user_id'].value_counts().head(n_users).index.tolist()
    
    print(f"Оценка {name}...")
    for u in users:
        user_data = test_data[test_data['user_id'] == u]
        true_pos = set(user_data[user_data['rating'] >= 4]['book_id'])
        if not true_pos: continue
            
        # Кандидаты: то что оценил + шум
        candidates = list(user_data['book_id']) + list(np.random.choice(book_ids, 50))
        candidates = list(set(candidates))
        
        scores = model.predict(u, candidates)
        top_10 = [candidates[i] for i in np.argsort(scores)[::-1][:10]]
        
        hits = len(set(top_10) & true_pos)
        precisions.append(hits / 10)
        
    print(f"{name} Precision@10: {np.mean(precisions):.4f}")

# Запуск тестов
evaluate_model(pop_model, test_df, "Popularity")
evaluate_model(svd_model, test_df, "SVD")
evaluate_model(ncf_model, test_df, "NCF")
evaluate_model(hybrid, test_df, "HYBRID SYSTEM")

# Визуализация результатов (пример)
# plt.bar ... (можно добавить код графика из предыдущего ответа)