In [1]:

# # ============================================================================
# # MLP 
# # ============================================================================

import pandas as pd
import numpy as np
import pickle
from sklearn.neural_network import MLPRegressor
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error, mean_absolute_error
from sklearn.model_selection import train_test_split
import time



In [2]:
import os
# Автопоиск файлов в проекте
def find(f, r='..'):
    for p, d, files in os.walk(r):
        d[:] = [x for x in d if x not in ['.git', '__pycache__', '.ipynb_checkpoints']]
        if f in files: return os.path.join(p, f)
    raise FileNotFoundError(f"'{f}' не найден!")

In [3]:

# ============================================================================
# [1] ЗАГРУЗКА ДАННЫХ 
# ============================================================================

print("\n[1] ЗАГРУЗКА ДАННЫХ...")

# Feature engineering данные (17 числовых признаков)
train_df = pd.read_csv(find('train_features_full.csv'))
test_df = pd.read_csv(find('test_features_full.csv'))

print(f" Train: {len(train_df):,} × {train_df.shape[1]}")
print(f" Test: {len(test_df):,} × {test_df.shape[1]}")

# Preprocessing embeddings 
print("\n Загрузка embeddings...")
with open(find('train_embeddings.pkl'), 'rb') as f:
    train_embeddings_pkl = pickle.load(f)

with open(find('test_embeddings.pkl'), 'rb') as f:
    test_embeddings_pkl = pickle.load(f)

# Извлекаем book embeddings (768-dim)
train_book_emb = train_embeddings_pkl['book_emb']
test_book_emb = test_embeddings_pkl['book_emb']

print(f" Train book embeddings: {train_book_emb.shape}")
print(f" Test book embeddings: {test_book_emb.shape}")

# Baseline
with open(find('baseline_artifacts.pkl'), 'rb') as f:
    baseline_artifacts = pickle.load(f)

baseline_rmse = baseline_artifacts['best_baseline_rmse']
print(f" Baseline RMSE: {baseline_rmse:.4f}")


[1] ЗАГРУЗКА ДАННЫХ...
 Train: 874,496 × 21
 Test: 107,260 × 21

 Загрузка embeddings...
 Train book embeddings: (874496, 768)
 Test book embeddings: (107260, 768)
 Baseline RMSE: 0.8104


In [4]:

# ============================================================================
# [2] СОЗДАНИЕ ПРИЗНАКОВ 
# ============================================================================

print("\n[2] СОЗДАНИЕ ПРИЗНАКОВ...")

# 17 числовых признаков
numeric_features = [
    # Interaction (6)
    'tag_overlap_count', 'tag_overlap_ratio', 'tag_jaccard',
    'history_similarity', 'embedding_cosine_sim', 'embedding_euclidean_dist',
    # User (4)
    'avg_user_rating', 'ratings_count', 'tag_vocab_size', 'activity_score',
    # Book (3)
    'book_avg_rating', 'book_ratings_count', 'book_popularity',
    # Preprocessing (4)
    'language_code_encoded', 'year_normalized', 'publication_era', 'average_rating'
]

# Энкодим segment
segment_mapping = {'new': 0, 'inactive': 1, 'active': 2, 'very_active': 3}
train_df['segment_encoded'] = train_df['segment'].map(segment_mapping).fillna(1)
test_df['segment_encoded'] = test_df['segment'].map(segment_mapping).fillna(1)

# Базовый набор: 17 + 1 = 18
base_features = numeric_features + ['segment_encoded']

# Создаем DataFrame с embeddings
emb_columns = [f'book_emb_{i}' for i in range(768)]
train_emb_df = pd.DataFrame(train_book_emb, columns=emb_columns, index=train_df.index)
test_emb_df = pd.DataFrame(test_book_emb, columns=emb_columns, index=test_df.index)

# Полный набор: 18 + 768 = 786
all_features = base_features + emb_columns

print(f" Базовых признаков: {len(base_features)}")
print(f" Embedding признаков: {len(emb_columns)}")
print(f" ВСЕГО: {len(all_features)}")

# Объединяем базовые + embeddings
X_train_full = pd.concat([
    train_df[base_features].reset_index(drop=True),
    train_emb_df.reset_index(drop=True)
], axis=1).values

X_test_full = pd.concat([
    test_df[base_features].reset_index(drop=True),
    test_emb_df.reset_index(drop=True)
], axis=1).values

y_train_full = train_df['rating'].values
y_test = test_df['rating'].values

print(f"\n X_train: {X_train_full.shape}")
print(f" X_test: {X_test_full.shape}")

# ============================================================================
# [3] ОБУЧЕНИЕ MLP
# ============================================================================

print("\n[3] ОБУЧЕНИЕ MLP...")

# Split train/val
X_train_nn, X_val_nn, y_train_nn, y_val_nn = train_test_split(
    X_train_full, y_train_full,
    test_size=0.15,
    random_state=42,
    stratify=y_train_full
)

print(f" NN train: {X_train_nn.shape}")
print(f" NN val: {X_val_nn.shape}")

# Нормализация
scaler_nn = StandardScaler()
X_train_nn_scaled = scaler_nn.fit_transform(X_train_nn)
X_val_nn_scaled = scaler_nn.transform(X_val_nn)
X_test_nn_scaled = scaler_nn.transform(X_test_full)

print(" Нормализация завершена")

# Обучение
print("\n Архитектура: Input(786) → Dense(128) → Dense(64) → Output(1)")

start_time = time.time()

mlp = MLPRegressor(
    hidden_layer_sizes=(128, 64),
    activation='relu',
    solver='adam',
    learning_rate_init=1e-3,
    batch_size=256,
    max_iter=30,
    random_state=42,
    verbose=True,
    early_stopping=True,
    validation_fraction=0.1,
    n_iter_no_change=5
)

mlp.fit(X_train_nn_scaled, y_train_nn)

training_time = time.time() - start_time
print(f"\n Обучение завершено за {training_time/60:.1f} минут")



[2] СОЗДАНИЕ ПРИЗНАКОВ...
 Базовых признаков: 18
 Embedding признаков: 768
 ВСЕГО: 786

 X_train: (874496, 786)
 X_test: (107260, 786)

[3] ОБУЧЕНИЕ MLP...
 NN train: (743321, 786)
 NN val: (131175, 786)
 Нормализация завершена

 Архитектура: Input(786) → Dense(128) → Dense(64) → Output(1)
Iteration 1, loss = 0.33763621
Validation score: 0.374129
Iteration 2, loss = 0.29842484
Validation score: 0.381988
Iteration 3, loss = 0.29125489
Validation score: 0.401923
Iteration 4, loss = 0.28682646
Validation score: 0.405178
Iteration 5, loss = 0.28406278
Validation score: 0.405419
Iteration 6, loss = 0.28163529
Validation score: 0.407705
Iteration 7, loss = 0.27911023
Validation score: 0.411542
Iteration 8, loss = 0.27705440
Validation score: 0.414508
Iteration 9, loss = 0.27529693
Validation score: 0.419745
Iteration 10, loss = 0.27354051
Validation score: 0.414633
Iteration 11, loss = 0.27200140
Validation score: 0.417496
Iteration 12, loss = 0.27102742
Validation score: 0.424777
Iteration

In [5]:
# ============================================================================
# [4] ОЦЕНКА
# ============================================================================

# === Базовые метрики (RMSE, MAE) ===

print("\n" + "="*80)
print("БАЗОВЫЕ МЕТРИКИ")
print("="*80)

# Валидация
mlp_pred_val = mlp.predict(X_val_nn_scaled)
mlp_rmse_val = np.sqrt(mean_squared_error(y_val_nn, np.clip(mlp_pred_val, 1, 5)))
mlp_mae_val = mean_absolute_error(y_val_nn, np.clip(mlp_pred_val, 1, 5))

print(f"\nВалидация (15% train):")
print(f"  RMSE: {mlp_rmse_val:.4f}")
print(f"  MAE:  {mlp_mae_val:.4f}")

# Test
mlp_pred_test = mlp.predict(X_test_nn_scaled)
mlp_rmse_test = np.sqrt(mean_squared_error(y_test, np.clip(mlp_pred_test, 1, 5)))
mlp_mae_test = mean_absolute_error(y_test, np.clip(mlp_pred_test, 1, 5))
mlp_improvement = (baseline_rmse - mlp_rmse_test) / baseline_rmse * 100

print(f"\n Test:")
print(f"  RMSE:        {mlp_rmse_test:.4f}")
print(f"  MAE:         {mlp_mae_test:.4f}")
print(f"  Improvement: {mlp_improvement:+.2f}% vs baseline")

# По buckets
low_mask = y_test <= 3
high_mask = y_test >= 4

mlp_rmse_low = np.sqrt(mean_squared_error(y_test[low_mask], np.clip(mlp_pred_test[low_mask], 1, 5)))
mlp_rmse_high = np.sqrt(mean_squared_error(y_test[high_mask], np.clip(mlp_pred_test[high_mask], 1, 5)))

print(f"\n RMSE по rating buckets:")
print(f"  RMSE (rating ≤3): {mlp_rmse_low:.4f}")
print(f"  RMSE (rating ≥4): {mlp_rmse_high:.4f}")


# === RANKING METRICS ===

print("\n" + "="*80)
print("RANKING METRICS: Precision@K, Recall@K, nDCG@K")
print("="*80)

from collections import defaultdict

# Функции для расчёта
def precision_at_k(y_true, y_pred, k=3, threshold=4.0):
    """Precision@K: доля релевантных книг в топ-K"""
    k_actual = min(k, len(y_pred))
    if k_actual == 0:
        return 0.0
    
    top_k_idx = np.argsort(y_pred)[::-1][:k_actual]
    relevant_in_top_k = sum(y_true[i] >= threshold for i in top_k_idx)
    return relevant_in_top_k / k_actual


def recall_at_k(y_true, y_pred, k=3, threshold=4.0):
    """Recall@K: какую долю релевантных книг нашли"""
    total_relevant = sum(y_true >= threshold)
    
    if total_relevant == 0:
        return 0.0
    
    k_actual = min(k, len(y_pred))
    top_k_idx = np.argsort(y_pred)[::-1][:k_actual]
    relevant_in_top_k = sum(y_true[i] >= threshold for i in top_k_idx)
    return relevant_in_top_k / total_relevant


def ndcg_at_k(y_true, y_pred, k=3):
    """nDCG@K: качество ранжирования"""
    k_actual = min(k, len(y_pred))
    if k_actual == 0:
        return 0.0
    
    top_k_idx = np.argsort(y_pred)[::-1][:k_actual]
    
    dcg = sum((2**y_true[i] - 1) / np.log2(pos + 2) 
              for pos, i in enumerate(top_k_idx))
    
    ideal_idx = np.argsort(y_true)[::-1][:k_actual]
    idcg = sum((2**y_true[i] - 1) / np.log2(pos + 2) 
               for pos, i in enumerate(ideal_idx))
    
    if idcg == 0:
        return 0.0
    
    return dcg / idcg


# Загружаем test_dataset для user_id
print("\n Загрузка test_dataset.csv...")
test_dataset = pd.read_csv(find('test_dataset.csv'))
print(f" Загружено: {len(test_dataset):,} записей")

# Группируем по пользователям
test_with_pred = test_dataset[['user_id']].copy()
test_with_pred['y_true'] = y_test
test_with_pred['y_pred_mlp'] = np.clip(mlp_pred_test, 1, 5)

# Загружаем предсказания других моделей для сравнения
print("\n Загрузка предсказаний других моделей...")
predictions_dict = np.load(find('ml_predictions_full.npy'), allow_pickle=True).item()

test_with_pred['y_pred_catboost'] = predictions_dict['catboost']
test_with_pred['y_pred_ensemble'] = predictions_dict['catboost'] * 0.15 + \
                                    predictions_dict['lightgbm'] * 0.35 + \
                                    predictions_dict['knn_features'] * 0.50

# Baseline predictions
print("\n Расчёт baseline predictions...")
with open(find('baseline_artifacts.pkl'), 'rb') as f:
    baseline_artifacts = pickle.load(f)

global_mean = baseline_artifacts['global_mean']
user_bias_dict = baseline_artifacts['user_bias_dict']
book_bias_dict = baseline_artifacts['book_bias_dict']

def predict_baseline(user_id, book_id):
    pred = global_mean
    pred += user_bias_dict.get(user_id, 0)
    pred += book_bias_dict.get(book_id, 0)
    return np.clip(pred, 1, 5)

from tqdm import tqdm

baseline_pred_test = np.array([
    predict_baseline(row['user_id'], row['book_id'])
    for _, row in tqdm(test_df.iterrows(), total=len(test_df), desc="Baseline", disable=True)
])

test_with_pred['y_pred_baseline'] = baseline_pred_test

# Группируем по user_id
user_groups = test_with_pred.groupby('user_id')

print(f" Всего пользователей: {len(user_groups):,}")


# Расчёт метрик для всех моделей
k_values = [1, 2, 3]

models_to_compare = {
    'Baseline (User+Book)': 'y_pred_baseline',
    'CatBoost': 'y_pred_catboost',
    'Optimized Ensemble': 'y_pred_ensemble',
    'MLP (786 features)': 'y_pred_mlp'
}

results = {
    model_name: {k: {"precision": [], "recall": [], "ndcg": []} for k in k_values}
    for model_name in models_to_compare.keys()
}

print("\n Расчёт ranking metrics...")

for user_id, group in tqdm(user_groups, desc="Processing users", total=len(user_groups)):
    y_true_user = group['y_true'].values
    
    if len(y_true_user) < 1:
        continue
    
    for model_name, col_name in models_to_compare.items():
        y_pred_user = group[col_name].values
        
        for k in k_values:
            results[model_name][k]["precision"].append(
                precision_at_k(y_true_user, y_pred_user, k)
            )
            results[model_name][k]["recall"].append(
                recall_at_k(y_true_user, y_pred_user, k)
            )
            results[model_name][k]["ndcg"].append(
                ndcg_at_k(y_true_user, y_pred_user, k)
            )


# === ВЫВОД РЕЗУЛЬТАТОВ ===

print("\n" + "="*80)
print("RANKING METRICS: РЕЗУЛЬТАТЫ")
print("="*80)

for k in k_values:
    print(f"\n K={k}:")
    print(f"{'Model':<30} {'Precision@{}'.format(k):<12} {'Recall@{}'.format(k):<12} {'nDCG@{}'.format(k):<12}")
    print("-" * 70)
    
    for model_name in models_to_compare.keys():
        p = np.mean(results[model_name][k]['precision'])
        r = np.mean(results[model_name][k]['recall'])
        n = np.mean(results[model_name][k]['ndcg'])
        
        

# === СРАВНЕНИЕ MLP vs ENSEMBLE (лучшая) ===

print("\n" + "="*80)
print("СРАВНЕНИЕ: MLP vs Optimized Ensemble (лучшая по RMSE)")
print("="*80)

mlp_key = 'MLP (786 features)'
best_key = 'Optimized Ensemble'

for k in k_values:
    mlp_p = np.mean(results[mlp_key][k]['precision'])
    best_p = np.mean(results[best_key][k]['precision'])
    
    mlp_r = np.mean(results[mlp_key][k]['recall'])
    best_r = np.mean(results[best_key][k]['recall'])
    
    mlp_n = np.mean(results[mlp_key][k]['ndcg'])
    best_n = np.mean(results[best_key][k]['ndcg'])
    
    imp_p = ((mlp_p - best_p) / best_p * 100) if best_p > 0 else 0
    imp_r = ((mlp_r - best_r) / best_r * 100) if best_r > 0 else 0
    imp_n = ((mlp_n - best_n) / best_n * 100) if best_n > 0 else 0
    
    print(f"\n K={k}:")
    print(f"  Precision: {best_p:.4f} (Ensemble) vs {mlp_p:.4f} (MLP)  ({imp_p:+.2f}%)")
    print(f"  Recall:    {best_r:.4f} (Ensemble) vs {mlp_r:.4f} (MLP)  ({imp_r:+.2f}%)")
    print(f"  nDCG:      {best_n:.4f} (Ensemble) vs {mlp_n:.4f} (MLP)  ({imp_n:+.2f}%)")


# === СРАВНЕНИЕ MLP vs BASELINE ===

print("\n" + "="*80)
print("СРАВНЕНИЕ: MLP vs Baseline")
print("="*80)

baseline_key = 'Baseline (User+Book)'

for k in k_values:
    mlp_p = np.mean(results[mlp_key][k]['precision'])
    baseline_p = np.mean(results[baseline_key][k]['precision'])
    
    mlp_r = np.mean(results[mlp_key][k]['recall'])
    baseline_r = np.mean(results[baseline_key][k]['recall'])
    
    mlp_n = np.mean(results[mlp_key][k]['ndcg'])
    baseline_n = np.mean(results[baseline_key][k]['ndcg'])
    
    imp_p = ((mlp_p - baseline_p) / baseline_p * 100) if baseline_p > 0 else 0
    imp_r = ((mlp_r - baseline_r) / baseline_r * 100) if baseline_r > 0 else 0
    imp_n = ((mlp_n - baseline_n) / baseline_n * 100) if baseline_n > 0 else 0
    
    print(f"\n K={k}:")
    print(f"  Precision: {baseline_p:.4f} → {mlp_p:.4f}  ({imp_p:+.2f}%)")
    print(f"  Recall:    {baseline_r:.4f} → {mlp_r:.4f}  ({imp_r:+.2f}%)")
    print(f"  nDCG:      {baseline_n:.4f} → {mlp_n:.4f}  ({imp_n:+.2f}%)")


# === ТАБЛИЦА ДЛЯ ОТЧЁТА ===

print("\n" + "="*80)
print(" СВОДНАЯ ТАБЛИЦА: RMSE + RANKING METRICS")
print("="*80)

summary_data = []
for model_name in models_to_compare.keys():
    # RMSE
    if model_name == 'Baseline (User+Book)':
        rmse = baseline_rmse
    elif model_name == 'MLP (786 features)':
        rmse = mlp_rmse_test
    elif model_name == 'CatBoost':
        rmse = np.sqrt(mean_squared_error(y_test, np.clip(predictions_dict['catboost'], 1, 5)))
    elif model_name == 'Optimized Ensemble':
        rmse = np.sqrt(mean_squared_error(y_test, np.clip(test_with_pred['y_pred_ensemble'].values, 1, 5)))
    
    # Ranking metrics (K=1)
    p1 = np.mean(results[model_name][1]['precision'])
    r1 = np.mean(results[model_name][1]['recall'])
    n1 = np.mean(results[model_name][1]['ndcg'])
    
    summary_data.append({
        'Model': model_name,
        'RMSE': rmse,
        'Precision@1': p1,
        'Recall@1': r1,
        'nDCG@1': n1
    })

summary_df = pd.DataFrame(summary_data)
summary_df = summary_df.sort_values('RMSE')

print("\n" + summary_df.to_string(index=False))

print("\n" + "="*80)
print(" ОЦЕНКА ЗАВЕРШЕНА")
print("="*80)




БАЗОВЫЕ МЕТРИКИ

Валидация (15% train):
  RMSE: 0.7414
  MAE:  0.5676

 Test:
  RMSE:        0.8288
  MAE:         0.6331
  Improvement: -2.27% vs baseline

 RMSE по rating buckets:
  RMSE (rating ≤3): 1.1754
  RMSE (rating ≥4): 0.5745

RANKING METRICS: Precision@K, Recall@K, nDCG@K

 Загрузка test_dataset.csv...
 Загружено: 107,260 записей

 Загрузка предсказаний других моделей...

 Расчёт baseline predictions...
 Всего пользователей: 35,659

 Расчёт ranking metrics...


Processing users: 100%|██████████| 35659/35659 [00:26<00:00, 1370.93it/s]



RANKING METRICS: РЕЗУЛЬТАТЫ

 K=1:
Model                          Precision@1  Recall@1     nDCG@1      
----------------------------------------------------------------------

 K=2:
Model                          Precision@2  Recall@2     nDCG@2      
----------------------------------------------------------------------

 K=3:
Model                          Precision@3  Recall@3     nDCG@3      
----------------------------------------------------------------------

СРАВНЕНИЕ: MLP vs Optimized Ensemble (лучшая по RMSE)

 K=1:
  Precision: 0.8802 (Ensemble) vs 0.8828 (MLP)  (+0.29%)
  Recall:    0.4730 (Ensemble) vs 0.4755 (MLP)  (+0.52%)
  nDCG:      0.9517 (Ensemble) vs 0.9502 (MLP)  (-0.16%)

 K=2:
  Precision: 0.7802 (Ensemble) vs 0.7794 (MLP)  (-0.10%)
  Recall:    0.7602 (Ensemble) vs 0.7597 (MLP)  (-0.07%)
  nDCG:      0.9661 (Ensemble) vs 0.9653 (MLP)  (-0.09%)

 K=3:
  Precision: 0.6610 (Ensemble) vs 0.6610 (MLP)  (-0.01%)
  Recall:    0.9123 (Ensemble) vs 0.9123 (MLP)  (-0.

In [6]:

# ============================================================================
# [5] СРАВНЕНИЕ С ДРУГИМИ МОДЕЛЯМИ
# ============================================================================

print("\n[5] СРАВНЕНИЕ...")

# Загружаем готовые предсказания
predictions_dict = np.load(find('ml_predictions_full.npy'), allow_pickle=True).item()

catboost_pred = predictions_dict['catboost']
lgb_pred = predictions_dict['lightgbm']
knn_pred = predictions_dict['knn_features']

comparison = {
    'Model': [
        '[BASELINE]',
        'CatBoost (786 features)',
        'LightGBM (786 features)',
        'KNN (786 features)',
        'MLP (786 features)'
    ],
    'RMSE': [
        baseline_rmse,
        np.sqrt(mean_squared_error(y_test, np.clip(catboost_pred, 1, 5))),
        np.sqrt(mean_squared_error(y_test, np.clip(lgb_pred, 1, 5))),
        np.sqrt(mean_squared_error(y_test, np.clip(knn_pred, 1, 5))),
        mlp_rmse_test
    ]
}

df_comp = pd.DataFrame(comparison)
df_comp['Improvement'] = (baseline_rmse - df_comp['RMSE']) / baseline_rmse * 100

print("\n" + "="*70)
print(f"{'Model':<30} {'RMSE':<10} {'Improvement':<15}")
print("="*70)

for _, row in df_comp.iterrows():
    if 'BASELINE' in row['Model']:
        print(f"{row['Model']:<30} {row['RMSE']:<10.4f} {'-':<15}")
        print("-"*70)
    else:
        print(f"{row['Model']:<30} {row['RMSE']:<10.4f} {row['Improvement']:+.2f}%")

print("="*70)

# Позиция MLP
better_models = sum(df_comp.iloc[1:4]['RMSE'] < mlp_rmse_test)
mlp_rank = better_models + 1
print(f"\n MLP: {mlp_rank} место из 4 моделей")

# ============================================================================
# [6] STACKING
# ============================================================================

print("\n[6] STACKING С MLP...")

from sklearn.linear_model import Ridge

X_meta = np.column_stack([
    catboost_pred,
    lgb_pred,
    knn_pred,
    mlp_pred_test
])

X_meta_train, X_meta_val, y_meta_train, y_meta_val = train_test_split(
    X_meta, y_test,
    test_size=0.5,
    random_state=42
)

meta_model = Ridge(alpha=1.0)
meta_model.fit(X_meta_train, y_meta_train)

stacking_pred = meta_model.predict(X_meta_val)

stacking_rmse = np.sqrt(mean_squared_error(y_meta_val, np.clip(stacking_pred, 1, 5)))
stacking_mae = mean_absolute_error(y_meta_val, np.clip(stacking_pred, 1, 5))
stacking_improvement = (baseline_rmse - stacking_rmse) / baseline_rmse * 100

print(f"\n Коэффициенты Ridge:")
print(f"   CatBoost: {meta_model.coef_[0]:+.4f}")
print(f"   LightGBM: {meta_model.coef_[1]:+.4f}")
print(f"   KNN: {meta_model.coef_[2]:+.4f}")
print(f"   MLP: {meta_model.coef_[3]:+.4f}")

print(f"\n Stacking:")
print(f"   RMSE: {stacking_rmse:.4f}")
print(f"   MAE: {stacking_mae:.4f}")
print(f"   Улучшение: {stacking_improvement:+.2f}%")

# ============================================================================
# [7] СОХРАНЕНИЕ
# ============================================================================

print("\n[7] СОХРАНЕНИЕ...")

with open   (find('mlp_model_786.pkl'), 'wb') as f:
    pickle.dump({'model': mlp, 'scaler': scaler_nn}, f)

np.save(find('mlp_predictions_786.npy'), {
    'test': mlp_pred_test,
    'validation': mlp_pred_val,
    'y_test': y_test,
    'rmse': mlp_rmse_test
})

with open('stacking_with_mlp_786.pkl', 'wb') as f:
    pickle.dump({'model': meta_model, 'rmse': stacking_rmse}, f)

print(" Сохранено: mlp_model_786.pkl")
print(" Сохранено: mlp_predictions_786.npy")
print(" Сохранено: stacking_with_mlp_786.pkl")

print("\n" + "="*80)
print("ГОТОВО!")
print("="*80)
print(f"\n MLP (786 признаков): {mlp_rmse_test:.4f} ({mlp_improvement:+.2f}%)")
print(f" Stacking: {stacking_rmse:.4f} ({stacking_improvement:+.2f}%)")
print("="*80)



[5] СРАВНЕНИЕ...

Model                          RMSE       Improvement    
[BASELINE]                     0.8104     -              
----------------------------------------------------------------------
CatBoost (786 features)        0.7731     +4.60%
LightGBM (786 features)        0.7869     +2.90%
KNN (786 features)             0.7819     +3.51%
MLP (786 features)             0.8288     -2.27%

 MLP: 4 место из 4 моделей

[6] STACKING С MLP...

 Коэффициенты Ridge:
   CatBoost: -0.0883
   LightGBM: +0.3859
   KNN: +0.3126
   MLP: +0.3590

 Stacking:
   RMSE: 0.7178
   MAE: 0.5724
   Улучшение: +11.44%

[7] СОХРАНЕНИЕ...
 Сохранено: mlp_model_786.pkl
 Сохранено: mlp_predictions_786.npy
 Сохранено: stacking_with_mlp_786.pkl

ГОТОВО!

 MLP (786 признаков): 0.8288 (-2.27%)
 Stacking: 0.7178 (+11.44%)


In [7]:
# ============================================================================
# [5] СРАВНЕНИЕ С ДРУГИМИ МОДЕЛЯМИ + RANKING METRICS
# ============================================================================

print("\n[5] СРАВНЕНИЕ...")

# Загружаем готовые предсказания
predictions_dict = np.load(find('ml_predictions_full.npy'), allow_pickle=True).item()

catboost_pred = predictions_dict['catboost']
lgb_pred = predictions_dict['lightgbm']
knn_pred = predictions_dict['knn_features']

# === БАЗОВЫЕ МЕТРИКИ (RMSE) ===

print("\n" + "="*80)
print("СРАВНЕНИЕ: RMSE")
print("="*80)

comparison = {
    'Model': [
        '[BASELINE]',
        'CatBoost (786 features)',
        'LightGBM (786 features)',
        'KNN (786 features)',
        'MLP (786 features)'
    ],
    'RMSE': [
        baseline_rmse,
        np.sqrt(mean_squared_error(y_test, np.clip(catboost_pred, 1, 5))),
        np.sqrt(mean_squared_error(y_test, np.clip(lgb_pred, 1, 5))),
        np.sqrt(mean_squared_error(y_test, np.clip(knn_pred, 1, 5))),
        mlp_rmse_test
    ]
}

df_comp = pd.DataFrame(comparison)
df_comp['Improvement'] = (baseline_rmse - df_comp['RMSE']) / baseline_rmse * 100

print(f"\n{'Model':<30} {'RMSE':<10} {'Improvement':<15}")
print("-"*60)

for _, row in df_comp.iterrows():
    if 'BASELINE' in row['Model']:
        print(f"{row['Model']:<30} {row['RMSE']:<10.4f} {'-':<15}")
    else:
        marker = "MLP" in row['Model'] and "*" or " "
        print(f"{marker} {row['Model']:<28} {row['RMSE']:<10.4f} {row['Improvement']:+.2f}%")

# Позиция MLP
better_models = sum(df_comp.iloc[1:4]['RMSE'] < mlp_rmse_test)
mlp_rank = better_models + 1
print(f"\n MLP: {mlp_rank} место из 4 ML-моделей по RMSE")


# === RANKING METRICS ===

print("\n" + "="*80)
print("СРАВНЕНИЕ: RANKING METRICS")
print("="*80)

from collections import defaultdict

# Функции для ranking metrics
def precision_at_k(y_true, y_pred, k=3, threshold=4.0):
    """Precision@K"""
    k_actual = min(k, len(y_pred))
    if k_actual == 0:
        return 0.0
    top_k_idx = np.argsort(y_pred)[::-1][:k_actual]
    relevant_in_top_k = sum(y_true[i] >= threshold for i in top_k_idx)
    return relevant_in_top_k / k_actual

def recall_at_k(y_true, y_pred, k=3, threshold=4.0):
    """Recall@K"""
    total_relevant = sum(y_true >= threshold)
    if total_relevant == 0:
        return 0.0
    k_actual = min(k, len(y_pred))
    top_k_idx = np.argsort(y_pred)[::-1][:k_actual]
    relevant_in_top_k = sum(y_true[i] >= threshold for i in top_k_idx)
    return relevant_in_top_k / total_relevant

def ndcg_at_k(y_true, y_pred, k=3):
    """nDCG@K"""
    k_actual = min(k, len(y_pred))
    if k_actual == 0:
        return 0.0
    
    top_k_idx = np.argsort(y_pred)[::-1][:k_actual]
    dcg = sum((2**y_true[i] - 1) / np.log2(pos + 2) 
              for pos, i in enumerate(top_k_idx))
    
    ideal_idx = np.argsort(y_true)[::-1][:k_actual]
    idcg = sum((2**y_true[i] - 1) / np.log2(pos + 2) 
               for pos, i in enumerate(ideal_idx))
    
    return dcg / idcg if idcg > 0 else 0.0


# Загружаем test_dataset для user_id
print("\n Загрузка test_dataset.csv...")
test_dataset = pd.read_csv(find('test_dataset.csv'))

# Baseline predictions
print(" Расчёт baseline predictions...")
with open(find('baseline_artifacts.pkl'), 'rb') as f:
    baseline_artifacts = pickle.load(f)

global_mean = baseline_artifacts['global_mean']
user_bias_dict = baseline_artifacts['user_bias_dict']
book_bias_dict = baseline_artifacts['book_bias_dict']

def predict_baseline(user_id, book_id):
    pred = global_mean + user_bias_dict.get(user_id, 0) + book_bias_dict.get(book_id, 0)
    return np.clip(pred, 1, 5)

from tqdm import tqdm

baseline_pred_test = np.array([
    predict_baseline(row['user_id'], row['book_id'])
    for _, row in tqdm(test_df.iterrows(), total=len(test_df), desc="Baseline", leave=False)
])

# Создаём датафрейм с предсказаниями всех моделей
test_with_pred = test_dataset[['user_id']].copy()
test_with_pred['y_true'] = y_test
test_with_pred['baseline'] = baseline_pred_test
test_with_pred['catboost'] = catboost_pred
test_with_pred['lightgbm'] = lgb_pred
test_with_pred['knn'] = knn_pred
test_with_pred['mlp'] = np.clip(mlp_pred_test, 1, 5)

# Группируем по user_id
user_groups = test_with_pred.groupby('user_id')

print(f" Пользователей: {len(user_groups):,}")


# Расчёт ranking metrics
k_values = [1, 2, 3]

models = {
    'Baseline': 'baseline',
    'CatBoost': 'catboost',
    'LightGBM': 'lightgbm',
    'KNN': 'knn',
    'MLP': 'mlp'
}

results = {
    model: {k: {"precision": [], "recall": [], "ndcg": []} for k in k_values}
    for model in models.keys()
}

print("\n Расчёт ranking metrics для всех моделей...")

for user_id, group in tqdm(user_groups, desc="Processing", total=len(user_groups)):
    y_true_user = group['y_true'].values
    
    if len(y_true_user) < 1:
        continue
    
    for model_name, col_name in models.items():
        y_pred_user = group[col_name].values
        
        for k in k_values:
            results[model_name][k]["precision"].append(precision_at_k(y_true_user, y_pred_user, k))
            results[model_name][k]["recall"].append(recall_at_k(y_true_user, y_pred_user, k))
            results[model_name][k]["ndcg"].append(ndcg_at_k(y_true_user, y_pred_user, k))


# Вывод результатов
print("\n" + "="*80)
print("RANKING METRICS: Все модели")
print("="*80)

for k in k_values:
    print(f"\n K={k}:")
    print(f"{'Model':<15} {'Precision@{}'.format(k):<13} {'Recall@{}'.format(k):<13} {'nDCG@{}'.format(k):<13}")
    print("-" * 60)
    
    for model_name in models.keys():
        p = np.mean(results[model_name][k]['precision'])
        r = np.mean(results[model_name][k]['recall'])
        n = np.mean(results[model_name][k]['ndcg'])
        
        


# Сравнение MLP с лучшей моделью
best_model_name = min(
    [m for m in models.keys() if m != 'Baseline'],
    key=lambda m: df_comp[df_comp['Model'].str.contains(m.split()[0])]['RMSE'].values[0]
)

print(f"\n" + "="*80)
print(f"СРАВНЕНИЕ: MLP vs {best_model_name} (лучшая по RMSE)")
print("="*80)

for k in k_values:
    mlp_p = np.mean(results['MLP'][k]['precision'])
    best_p = np.mean(results[best_model_name][k]['precision'])
    
    mlp_r = np.mean(results['MLP'][k]['recall'])
    best_r = np.mean(results[best_model_name][k]['recall'])
    
    mlp_n = np.mean(results['MLP'][k]['ndcg'])
    best_n = np.mean(results[best_model_name][k]['ndcg'])
    
    imp_p = ((mlp_p - best_p) / best_p * 100) if best_p > 0 else 0
    imp_r = ((mlp_r - best_r) / best_r * 100) if best_r > 0 else 0
    imp_n = ((mlp_n - best_n) / best_n * 100) if best_n > 0 else 0
    
    print(f"\n K={k}:")
    print(f"  Precision: {best_p:.4f} ({best_model_name}) vs {mlp_p:.4f} (MLP)  [{imp_p:+.2f}%]")
    print(f"  Recall:    {best_r:.4f} ({best_model_name}) vs {mlp_r:.4f} (MLP)  [{imp_r:+.2f}%]")
    print(f"  nDCG:      {best_n:.4f} ({best_model_name}) vs {mlp_n:.4f} (MLP)  [{imp_n:+.2f}%]")


# ============================================================================
# [6] STACKING + RANKING METRICS
# ============================================================================

print("\n" + "="*80)
print("[6] STACKING С MLP...")
print("="*80)

from sklearn.linear_model import Ridge

X_meta = np.column_stack([
    catboost_pred,
    lgb_pred,
    knn_pred,
    mlp_pred_test
])

X_meta_train, X_meta_val, y_meta_train, y_meta_val = train_test_split(
    X_meta, y_test,
    test_size=0.5,
    random_state=42
)

meta_model = Ridge(alpha=1.0)
meta_model.fit(X_meta_train, y_meta_train)

stacking_pred = meta_model.predict(X_meta_val)

stacking_rmse = np.sqrt(mean_squared_error(y_meta_val, np.clip(stacking_pred, 1, 5)))
stacking_mae = mean_absolute_error(y_meta_val, np.clip(stacking_pred, 1, 5))
stacking_improvement = (baseline_rmse - stacking_rmse) / baseline_rmse * 100

print(f"\n Коэффициенты Ridge:")
print(f"  CatBoost: {meta_model.coef_[0]:+.4f}")
print(f"  LightGBM: {meta_model.coef_[1]:+.4f}")
print(f"  KNN:      {meta_model.coef_[2]:+.4f}")
print(f"  MLP:      {meta_model.coef_[3]:+.4f}")

print(f"\n Stacking результаты:")
print(f"  RMSE:        {stacking_rmse:.4f}")
print(f"  MAE:         {stacking_mae:.4f}")
print(f"  Improvement: {stacking_improvement:+.2f}% vs baseline")


# === RANKING METRICS ДЛЯ STACKING ===

print("\n" + "="*80)
print("RANKING METRICS: Stacking с MLP")
print("="*80)

# Индексы для meta_val
meta_val_indices = X_meta_train.shape[0] + np.arange(X_meta_val.shape[0])

# Создаём датафрейм для stacking
test_with_stacking = test_dataset.iloc[meta_val_indices].copy()
test_with_stacking['y_true'] = y_meta_val
test_with_stacking['stacking'] = np.clip(stacking_pred, 1, 5)

# Добавляем предсказания других моделей для сравнения
test_with_stacking['catboost'] = catboost_pred[meta_val_indices]
test_with_stacking['mlp'] = np.clip(mlp_pred_test[meta_val_indices], 1, 5)

# Группируем
user_groups_stacking = test_with_stacking.groupby('user_id')

print(f" Пользователей в stacking validation: {len(user_groups_stacking):,}")

# Расчёт метрик для stacking
models_stacking = {
    'CatBoost (лучшая одиночная)': 'catboost',
    'MLP': 'mlp',
    'Stacking (CatBoost+LightGBM+KNN+MLP)': 'stacking'
}

results_stacking = {
    model: {k: {"precision": [], "recall": [], "ndcg": []} for k in k_values}
    for model in models_stacking.keys()
}

print("\n Расчёт ranking metrics для stacking...")

for user_id, group in tqdm(user_groups_stacking, desc="Processing", total=len(user_groups_stacking)):
    y_true_user = group['y_true'].values
    
    if len(y_true_user) < 1:
        continue
    
    for model_name, col_name in models_stacking.items():
        y_pred_user = group[col_name].values
        
        for k in k_values:
            results_stacking[model_name][k]["precision"].append(precision_at_k(y_true_user, y_pred_user, k))
            results_stacking[model_name][k]["recall"].append(recall_at_k(y_true_user, y_pred_user, k))
            results_stacking[model_name][k]["ndcg"].append(ndcg_at_k(y_true_user, y_pred_user, k))


# Вывод результатов
print("\n" + "="*80)
print("RANKING METRICS: Сравнение Stacking")
print("="*80)

for k in k_values:
    print(f"\n K={k}:")
    print(f"{'Model':<40} {'Precision@{}'.format(k):<13} {'Recall@{}'.format(k):<13} {'nDCG@{}'.format(k):<13}")
    print("-" * 85)
    
    for model_name in models_stacking.keys():
        p = np.mean(results_stacking[model_name][k]['precision'])
        r = np.mean(results_stacking[model_name][k]['recall'])
        n = np.mean(results_stacking[model_name][k]['ndcg'])
        
       


# Улучшение Stacking vs CatBoost
print(f"\n" + "="*80)
print("УЛУЧШЕНИЕ: Stacking vs CatBoost (лучшая одиночная)")
print("="*80)

for k in k_values:
    catboost_p = np.mean(results_stacking['CatBoost (лучшая одиночная)'][k]['precision'])
    stacking_p = np.mean(results_stacking['Stacking (CatBoost+LightGBM+KNN+MLP)'][k]['precision'])
    
    catboost_r = np.mean(results_stacking['CatBoost (лучшая одиночная)'][k]['recall'])
    stacking_r = np.mean(results_stacking['Stacking (CatBoost+LightGBM+KNN+MLP)'][k]['recall'])
    
    catboost_n = np.mean(results_stacking['CatBoost (лучшая одиночная)'][k]['ndcg'])
    stacking_n = np.mean(results_stacking['Stacking (CatBoost+LightGBM+KNN+MLP)'][k]['ndcg'])
    
    imp_p = ((stacking_p - catboost_p) / catboost_p * 100) if catboost_p > 0 else 0
    imp_r = ((stacking_r - catboost_r) / catboost_r * 100) if catboost_r > 0 else 0
    imp_n = ((stacking_n - catboost_n) / catboost_n * 100) if catboost_n > 0 else 0
    
    print(f"\n K={k}:")
    print(f"  Precision: {catboost_p:.4f} → {stacking_p:.4f}  [{imp_p:+.2f}%]")
    print(f"  Recall:    {catboost_r:.4f} → {stacking_r:.4f}  [{imp_r:+.2f}%]")
    print(f"  nDCG:      {catboost_n:.4f} → {stacking_n:.4f}  [{imp_n:+.2f}%]")


# ============================================================================
# [7] СОХРАНЕНИЕ
# ============================================================================

print("\n" + "="*80)
print("[7] СОХРАНЕНИЕ...")
print("="*80)

# Модель MLP
with open(find('mlp_model_786.pkl'), 'wb') as f:
    pickle.dump({'model': mlp, 'scaler': scaler_nn}, f)

print(" Сохранено: mlp_model_786.pkl")

# Предсказания MLP
np.save(find('mlp_predictions_786.npy'), {
    'test': mlp_pred_test,
    'validation': mlp_pred_val,
    'y_test': y_test,
    'rmse': mlp_rmse_test,
    'ranking_results': {
        k: {
            'precision': float(np.mean(results['MLP'][k]['precision'])),
            'recall': float(np.mean(results['MLP'][k]['recall'])),
            'ndcg': float(np.mean(results['MLP'][k]['ndcg']))
        }
        for k in k_values
    }
})

print(" Сохранено: mlp_predictions_786.npy")

# Stacking model
with open(find('stacking_with_mlp_786.pkl'), 'wb') as f:
    pickle.dump({
        'model': meta_model,
        'rmse': stacking_rmse,
        'mae': stacking_mae,
        'improvement': stacking_improvement,
        'coefficients': {
            'catboost': float(meta_model.coef_[0]),
            'lightgbm': float(meta_model.coef_[1]),
            'knn': float(meta_model.coef_[2]),
            'mlp': float(meta_model.coef_[3])
        },
        'ranking_results': {
            k: {
                'precision': float(np.mean(results_stacking['Stacking (CatBoost+LightGBM+KNN+MLP)'][k]['precision'])),
                'recall': float(np.mean(results_stacking['Stacking (CatBoost+LightGBM+KNN+MLP)'][k]['recall'])),
                'ndcg': float(np.mean(results_stacking['Stacking (CatBoost+LightGBM+KNN+MLP)'][k]['ndcg']))
            }
            for k in k_values
        }
    }, f)

print(" Сохранено: stacking_with_mlp_786.pkl")

# Сводная таблица для отчёта
summary_df = pd.DataFrame({
    'Model': ['MLP', 'Stacking (с MLP)'],
    'RMSE': [mlp_rmse_test, stacking_rmse],
    'MAE': [mlp_mae_test, stacking_mae],
    'Precision@1': [
        np.mean(results['MLP'][1]['precision']),
        np.mean(results_stacking['Stacking (CatBoost+LightGBM+KNN+MLP)'][1]['precision'])
    ],
    'Recall@1': [
        np.mean(results['MLP'][1]['recall']),
        np.mean(results_stacking['Stacking (CatBoost+LightGBM+KNN+MLP)'][1]['recall'])
    ],
    'nDCG@1': [
        np.mean(results['MLP'][1]['ndcg']),
        np.mean(results_stacking['Stacking (CatBoost+LightGBM+KNN+MLP)'][1]['ndcg'])
    ]
})

summary_df.to_csv('mlp_summary_results.csv', index=False)
print(" Сохранено: mlp_summary_results.csv")



[5] СРАВНЕНИЕ...

СРАВНЕНИЕ: RMSE

Model                          RMSE       Improvement    
------------------------------------------------------------
[BASELINE]                     0.8104     -              
  CatBoost (786 features)      0.7731     +4.60%
  LightGBM (786 features)      0.7869     +2.90%
  KNN (786 features)           0.7819     +3.51%
* MLP (786 features)           0.8288     -2.27%

 MLP: 4 место из 4 ML-моделей по RMSE

СРАВНЕНИЕ: RANKING METRICS

 Загрузка test_dataset.csv...
 Расчёт baseline predictions...


                                                                    

 Пользователей: 35,659

 Расчёт ranking metrics для всех моделей...


Processing: 100%|██████████| 35659/35659 [00:15<00:00, 2236.86it/s]



RANKING METRICS: Все модели

 K=1:
Model           Precision@1   Recall@1      nDCG@1       
------------------------------------------------------------

 K=2:
Model           Precision@2   Recall@2      nDCG@2       
------------------------------------------------------------

 K=3:
Model           Precision@3   Recall@3      nDCG@3       
------------------------------------------------------------

СРАВНЕНИЕ: MLP vs CatBoost (лучшая по RMSE)

 K=1:
  Precision: 0.8868 (CatBoost) vs 0.8828 (MLP)  [-0.45%]
  Recall:    0.4786 (CatBoost) vs 0.4755 (MLP)  [-0.64%]
  nDCG:      0.9587 (CatBoost) vs 0.9502 (MLP)  [-0.88%]

 K=2:
  Precision: 0.7831 (CatBoost) vs 0.7794 (MLP)  [-0.47%]
  Recall:    0.7635 (CatBoost) vs 0.7597 (MLP)  [-0.51%]
  nDCG:      0.9704 (CatBoost) vs 0.9653 (MLP)  [-0.53%]

 K=3:
  Precision: 0.6610 (CatBoost) vs 0.6610 (MLP)  [-0.01%]
  Recall:    0.9124 (CatBoost) vs 0.9123 (MLP)  [-0.01%]
  nDCG:      0.9865 (CatBoost) vs 0.9839 (MLP)  [-0.26%]

[6] STACKING 

Processing: 100%|██████████| 17843/17843 [00:05<00:00, 3539.09it/s]



RANKING METRICS: Сравнение Stacking

 K=1:
Model                                    Precision@1   Recall@1      nDCG@1       
-------------------------------------------------------------------------------------

 K=2:
Model                                    Precision@2   Recall@2      nDCG@2       
-------------------------------------------------------------------------------------

 K=3:
Model                                    Precision@3   Recall@3      nDCG@3       
-------------------------------------------------------------------------------------

УЛУЧШЕНИЕ: Stacking vs CatBoost (лучшая одиночная)

 K=1:
  Precision: 0.6613 → 0.8781  [+32.79%]
  Recall:    0.3202 → 0.4760  [+48.67%]
  nDCG:      0.6816 → 0.8923  [+30.93%]

 K=2:
  Precision: 0.6596 → 0.7843  [+18.90%]
  Recall:    0.6386 → 0.7942  [+24.38%]
  nDCG:      0.7725 → 0.9279  [+20.11%]

 K=3:
  Precision: 0.6603 → 0.6608  [+0.08%]
  Recall:    0.9598 → 0.9605  [+0.07%]
  nDCG:      0.8922 → 0.9650  [+8.16%]

[7] 