In [None]:
!pip install sqlalchemy psycopg2-binary nest_asyncio asyncpg tenacity scikit-learn matplotlib seaborn pandas numpy scipy tabulate tqdm

In [None]:
import os
import shutil
from datetime import datetime

# Buat folder backup
backup_dir = f"backup_results_{datetime.now().strftime('%Y%m%d_%H%M%S')}"
os.makedirs(backup_dir, exist_ok=True)

# Backup semua hasil lama
files_to_backup = [
    'evaluation_df_cache.pkl',
    'performance_results_cache.pkl',
    'evaluation_results/results_summary_metrics.csv',
    'evaluation_results/results_distribution_metrics.csv',
    'evaluation_results/results_statistical_tests.json'
]

print("üì¶ Membackup hasil lama...")
for file in files_to_backup:
    if os.path.exists(file):
        shutil.copy(file, backup_dir)
        print(f"  ‚úÖ {file} ‚Üí {backup_dir}/")

# Hapus cache untuk re-run
cache_files = [
    'evaluation_df_cache.pkl',
    'performance_results_cache.pkl'
]

print("\nüóëÔ∏è Menghapus cache...")
for cache_file in cache_files:
    if os.path.exists(cache_file):
        os.remove(cache_file)
        print(f"  ‚úÖ Deleted: {cache_file}")

print("\n‚úÖ Siap untuk re-run! Jalankan Cell 10.")

In [None]:
# ===== CELL 3: SETUP DAN IMPORT LIBRARIES =====
import nest_asyncio, asyncio
from asyncio import Semaphore
from tenacity import retry, stop_after_attempt, wait_exponential

# Terapkan patch asyncio untuk mengizinkan event loop berjalan di dalam notebook
nest_asyncio.apply()

from sqlalchemy.ext.asyncio import create_async_engine, AsyncSession
from sqlalchemy.orm import sessionmaker
import contextlib
import logging

# Konfigurasi logging dasar
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
logger = logging.getLogger(__name__)

# --- KONEKSI DATABASE ASYNCHRONOUS ---
# Pastikan URL database Anda benar
DATABASE_URL = "postgresql+asyncpg://user:rekompari@localhost:5432/pariwisata"

engine = create_async_engine(
    DATABASE_URL,
    pool_size=10,  # Ukuran pool koneksi
    max_overflow=5, # Jumlah koneksi tambahan yang diizinkan
    pool_timeout=30, # Waktu tunggu untuk mendapatkan koneksi
    pool_recycle=1800 # Daur ulang koneksi setiap 30 menit
)

AsyncSessionLocal = sessionmaker(
    bind=engine,
    expire_on_commit=False,
    class_=AsyncSession
)

@contextlib.asynccontextmanager
async def get_db():
    """Provider sesi database asynchronous yang aman."""
    async with AsyncSessionLocal() as session:
        try:
            yield session
        except Exception:
            await session.rollback()
            raise
        finally:
            await session.close()

# --- HELPER UNTUK KEAMANAN KONEKSI ---

# Semaphore untuk membatasi jumlah operasi DB bersamaan (mencegah overload)
db_semaphore = Semaphore(5)

@retry(stop=stop_after_attempt(3), wait=wait_exponential(multiplier=1, min=1, max=10))
async def safe_db_operation(func, *args, **kwargs):
    """
    Menjalankan operasi database dengan semaphore dan retry logic.
    Ini penting untuk stabilitas evaluasi.
    """
    async with db_semaphore:
        try:
            return await func(*args, **kwargs)
        except Exception as e:
            logger.error(f"Operasi database gagal setelah beberapa percobaan: {e}")
            raise

print("‚úÖ Database engine dan helper functions siap.")

In [None]:
# ===== CELL 4: IMPORT MODULES =====
import sys
# Hati-hati dengan path ini, sesuaikan dengan struktur folder Anda
# Ini mengizinkan notebook untuk mengimpor file .py dari folder backend Anda
sys.path.append('../pariwisata-recommender/backend')

# Import model-model dari file .py Anda
# Meskipun kita akan mendefinisikan ulang beberapa logika, kita masih butuh ini untuk perbandingan
from app.services.base_recommender import BaseRecommender
from app.services.collaborative_recommender import CollaborativeRecommender
from app.services.content_based_recommender import ContentBasedRecommender
from app.services.hybrid_recommender import HybridRecommender
from app.services.mab_optimizer import MABOptimizer

# Import library standar untuk analisis data dan numerik
import pandas as pd
import numpy as np
import time
import random
from collections import Counter

# Import library untuk logging dan progress bar
import logging
from tqdm.notebook import tqdm

# Import library untuk machine learning dan evaluasi
from sklearn.model_selection import train_test_split
from scipy import stats # <--- TAMBAHAN BARU (untuk t-test)

# Import library untuk visualisasi
import matplotlib.pyplot as plt
import seaborn as sns

# --- Konfigurasi Tampilan ---
# Atur gaya visualisasi
sns.set(style="whitegrid", palette="muted")
# Atur opsi tampilan pandas
pd.set_option('display.max_columns', 50)
pd.set_option('display.width', 1000)

# Atur logger
logger = logging.getLogger(__name__)
logger.setLevel(logging.INFO)

print("‚úÖ Semua modul berhasil di-import.")

In [None]:
# ===== CELL 6: LOAD DAN SPLIT DATA (VERSI TEMPORAL) =====
from sqlalchemy import select
from app.models.rating import Rating # Pastikan model Rating diimpor

async def load_ratings_df():
    """Load semua rating data dari database dengan penanganan koneksi yang baik."""
    logger.info("üì¶ Memuat data ratings dari database...")
    try:
        async with get_db() as db:
            # Mengurutkan berdasarkan created_at sangat penting untuk temporal split
            query = select(Rating).order_by(Rating.created_at)
            res = await db.execute(query)
            rows = res.scalars().all()
        
        # Pastikan kolom created_at ada di model Rating Anda
        data = []
        has_created_at = False
        if rows and hasattr(rows[0], 'created_at'):
            has_created_at = True
        
        if has_created_at:
            data = [{'user_id': r.user_id, 
                     'destination_id': r.destination_id, 
                     'rating': float(r.rating),
                     'created_at': r.created_at 
                    } for r in rows]
            logger.info("Berhasil memuat data dengan timestamp 'created_at'.")
        else:
            # Fallback jika 'created_at' tidak ada di model/DB
            logger.warning("Kolom 'created_at' tidak ditemukan!")
            logger.warning("Menggunakan timestamp acak sebagai fallback. Ini TIDAK ideal untuk evaluasi temporal.")
            data = [{'user_id': r.user_id, 
                     'destination_id': r.destination_id, 
                     'rating': float(r.rating),
                     # Membuat timestamp acak untuk simulasi
                     'created_at': pd.Timestamp.now() - pd.to_timedelta(np.random.randint(1, 365), 'd')
                    } for r in rows]

        df = pd.DataFrame(data)
        
        # Pastikan tipe data benar
        df['created_at'] = pd.to_datetime(df['created_at'])
        df['user_id'] = df['user_id'].astype(int)
        df['destination_id'] = df['destination_id'].astype(int)
        
        return df
    except Exception as e:
        logger.error(f"Error saat memuat ratings: {str(e)}")
        raise

# --- FUNGSI SPLIT DATA TEMPORAL (LEBIH ROBUST) ---
def create_temporal_split(df, test_size=0.2, min_ratings=5):
    """
    Split data secara temporal per user (Stratified Temporal Split).
    Hanya user dengan 'min_ratings' yang akan dimasukkan ke set evaluasi.
    """
    print(f"\n‚úÇÔ∏è Membuat stratified temporal train/test split...")
    
    user_rating_counts = df.groupby('user_id').size()
    # Filter users: Hanya yang punya cukup rating untuk di-split
    valid_users = user_rating_counts[user_rating_counts >= min_ratings].index
    df_filtered = df[df['user_id'].isin(valid_users)].copy()
    
    print(f"   Total users: {df['user_id'].nunique():,}")
    print(f"   Users dengan ‚â•{min_ratings} ratings (valid untuk evaluasi): {len(valid_users):,}")
    
    train_data = []
    test_data = []
    
    # Ground truth (hanya item yang disukai >= 4.0 di test set)
    ground_truth_cache_global = {}

    for user_id in tqdm(valid_users, desc="Memisahkan data per user"):
        user_ratings = df_filtered[df_filtered['user_id'] == user_id].sort_values('created_at', ascending=True)
        
        # Tentukan titik split
        split_idx = int(len(user_ratings) * (1 - test_size))
        # Pastikan minimal 1 rating di train set
        split_idx = max(1, split_idx) 
        # Pastikan minimal 1 rating di test set
        if split_idx >= len(user_ratings):
            split_idx = len(user_ratings) - 1

        train_chunk = user_ratings.iloc[:split_idx]
        test_chunk = user_ratings.iloc[split_idx:]
        
        train_data.append(train_chunk)
        test_data.append(test_chunk)
            
        # Simpan ground truth (item yang disukai)
        ground_truth_cache_global[user_id] = test_chunk[test_chunk['rating'] >= 4.0]['destination_id'].tolist()

    train_df = pd.concat(train_data, ignore_index=True)
    test_df = pd.concat(test_data, ignore_index=True) # Ini adalah test set kita
    
    print(f"\n‚úÖ Split selesai:")
    print(f"   Train: {len(train_df):,} ratings ({train_df['user_id'].nunique():,} users)")
    print(f"   Test:  {len(test_df):,} ratings ({test_df['user_id'].nunique():,} users)")
    
    # Filter ground truth: hanya user yang punya item >= 4.0 di test set
    eligible_users_global = [uid for uid, items in ground_truth_cache_global.items() if len(items) > 0]
    print(f"   Eligible users (punya item 'disukai' di test set): {len(eligible_users_global):,}")

    return train_df, test_df, ground_truth_cache_global, eligible_users_global

# --- EKSEKUSI LOAD DAN SPLIT ---
try:
    # 1. Load data
    ratings_df = await safe_db_operation(load_ratings_df)
    print(f"Total ratings dimuat: {len(ratings_df)}")
    print(f"Unique users: {ratings_df['user_id'].nunique()}")
    print(f"Unique destinations: {ratings_df['destination_id'].nunique()}")

    # 2. Eksekusi split
    # Kita hanya perlu train_df untuk melatih model, dan ground_truth/eligible_users untuk evaluasi
    train_df, test_df, ground_truth_cache, eligible_users = create_temporal_split(
        ratings_df, 
        test_size=0.2, 
        min_ratings=5 # Butuh minimal 5 rating agar split 80/20 masuk akal
    )

    print("\nVariabel global 'train_df', 'test_df', 'ground_truth_cache', 'eligible_users' telah dibuat.")

except Exception as e:
    logger.error(f"Gagal pada CELL 6: {e}")
    # Buat DataFrame kosong agar sel berikutnya tidak error
    train_df, test_df = pd.DataFrame(), pd.DataFrame()
    ground_truth_cache, eligible_users = {}, []
    print("Gagal memuat atau memisahkan data. Membuat DataFrame kosong.")

In [None]:
# ===== CELL 8: EVALUATION METRICS =====

def precision_at_k(recs_ids, ground_truth_ids, k=10):
    """
    Menghitung Precision@k.
    Seberapa banyak item yang direkomendasikan relevan?
    
    Perbaikan: Menangani kasus jika len(recs) < k.
    """
    if not recs_ids or k == 0:
        return 0.0
    
    # Ambil top-k rekomendasi
    recs_ids = recs_ids[:k]
    
    # Denominator adalah jumlah item yang *benar-benar* direkomendasikan, maks k
    denominator = min(k, len(recs_ids))
    if denominator == 0:
        return 0.0
        
    # Hitung item yang relevan
    hits = len(set(recs_ids) & set(ground_truth_ids))
    return hits / denominator

def recall_at_k(recs_ids, ground_truth_ids, k=10):
    """
    Menghitung Recall@k.
    Seberapa banyak item relevan yang berhasil ditemukan?
    """
    if not recs_ids or not ground_truth_ids:
        return 0.0 # Tidak ada recall jika tidak ada ground truth
    
    # Ambil top-k rekomendasi
    recs_ids = recs_ids[:k]
    
    # Hitung item yang relevan
    hits = len(set(recs_ids) & set(ground_truth_ids))
    return hits / len(ground_truth_ids)

def ndcg_at_k(recs_ids, ground_truth_ids, k=10):
    """
    Menghitung Normalized Discounted Cumulative Gain (NDCG)@k.
    Mengukur kualitas *ranking* (item relevan di atas lebih baik).
    """
    if not recs_ids or not ground_truth_ids:
        return 0.0
        
    recs_ids = recs_ids[:k]
    
    # --- Hitung DCG (Discounted Cumulative Gain) ---
    dcg = 0.0
    for i, did in enumerate(recs_ids):
        if did in ground_truth_ids:
            # Item relevan, tambahkan gain (1) didiskon oleh posisi logaritmik
            dcg += 1 / np.log2(i + 2) # i+2 karena index mulai dari 0
            
    # --- Hitung IDCG (Ideal Discounted Cumulative Gain) ---
    # Ini adalah DCG sempurna jika semua item relevan ada di k teratas
    ideal_hits = min(len(ground_truth_ids), k)
    idcg = sum(1 / np.log2(i + 2) for i in range(ideal_hits))
    
    if idcg == 0:
        return 0.0 # Tidak ada item relevan sama sekali
        
    return dcg / idcg

# --- METRIK DIVERSITY (DARI CELL 11.5 LAMA) ---
def intra_list_diversity(recommendations, item_categories):
    """
    Menghitung Intra-List Diversity (ILD) berdasarkan kategori item.
    Mengukur seberapa beragam item dalam SATU list rekomendasi.
    Dihitung sebagai: (jumlah pasangan item beda kategori) / (total pasangan item)
    """
    if not recommendations or len(recommendations) <= 1:
        return 0.0 # Tidak ada diversity jika hanya 0 atau 1 item
        
    categories = []
    for item_id in recommendations:
        # Ambil kategori, fallback ke 'unknown' jika tidak ditemukan
        # Ini penting agar item tanpa kategori tetap dihitung sebagai unik
        categories.append(item_categories.get(item_id, f"unknown_{item_id}"))
    
    n = len(categories)
    if n <= 1:
        return 0.0
        
    different_pairs = 0
    total_pairs = n * (n - 1) / 2
    
    if total_pairs == 0:
        return 0.0
        
    # Hitung jumlah pasangan yang *berbeda*
    for i in range(n):
        for j in range(i + 1, n):
            if categories[i] != categories[j]:
                different_pairs += 1
    
    return different_pairs / total_pairs

print("‚úÖ Metrik evaluasi (Precision@k, Recall@k, NDCG@k, Intra-List Diversity) siap.")

In [None]:
# ===== CELL 9: IMPLEMENTASI DAN INISIALISASI MODEL =====

# Kita butuh `text` untuk query SQL mentah
from sqlalchemy import text 
from app.models.destinations import Destination
from app.models.category import Category

# --- 1. IMPLEMENTASI COLLABORATIVE FILTERING (CF) ---
#    (Menggunakan logika NMF yang sama dari tesis Anda, tapi dilatih di train_df)

from sklearn.decomposition import NMF
from scipy.sparse import csr_matrix

class ProperCollaborativeRecommender:
    """Implementasi CF (NMF) yang bersih untuk evaluasi."""
    def __init__(self):
        super().__init__()
        # --- ALGORITMA INTI DARI FILE ANDA ---
        # Ini adalah baris terpenting yang kita salin dari file .py Anda
        # untuk memastikan metodologi tesis Anda tetap sama.
        self.nmf_model = NMF(n_components=50, random_state=42, max_iter=500)
        # -----------------------------------------
        self.user_item_matrix = None
        self.user_factors = None
        self.item_factors = None
        self.user_encoder = {} # map user_id -> matrix_index
        self.item_encoder = {} # map item_id -> matrix_index
        self.user_decoder = {} # map matrix_index -> user_id
        self.item_decoder = {} # map matrix_index -> item_id
        self.is_trained = False
    
    async def train(self, ratings_df: pd.DataFrame):
        """Train model CF hanya menggunakan train_df."""
        logger.info("ü§ñ Training ProperCollaborativeRecommender...")
        
        # 1. Buat encoder (pemetaan)
        unique_users = ratings_df['user_id'].unique()
        unique_items = ratings_df['destination_id'].unique()
        
        self.user_encoder = {user_id: idx for idx, user_id in enumerate(unique_users)}
        self.user_decoder = {idx: user_id for user_id, idx in self.user_encoder.items()}
        self.item_encoder = {item_id: idx for idx, item_id in enumerate(unique_items)}
        self.item_decoder = {idx: item_id for item_id, idx in self.item_encoder.items()}
        
        # 2. Buat sparse matrix
        rows = [self.user_encoder[uid] for uid in ratings_df['user_id']]
        cols = [self.item_encoder[did] for did in ratings_df['destination_id']]
        data = ratings_df['rating'].values
        
        self.user_item_matrix = csr_matrix(
            (data, (rows, cols)), 
            shape=(len(unique_users), len(unique_items))
        )
        
        # 3. Latih model NMF
        logger.info(f"Melatih NMF dengan matrix shape: {self.user_item_matrix.shape}...")
        self.user_factors = self.nmf_model.fit_transform(self.user_item_matrix)
        self.item_factors = self.nmf_model.components_
        
        self.is_trained = True
        logger.info("‚úÖ ProperCollaborativeRecommender (NMF) berhasil di-train.")
        
    async def predict(self, user_id, num_recommendations=10):
        """Prediksi skor untuk pengguna."""
        if not self.is_trained:
            raise Exception("CF model belum di-train.")
        
        # Handle Cold Start: User tidak ada di data training
        if user_id not in self.user_encoder:
            logger.warning(f"CF Cold Start: User {user_id} tidak ada di train_df.")
            return [] # Kembalikan list kosong
            
        # 1. Dapatkan index internal pengguna
        user_idx = self.user_encoder[user_id]
        
        # 2. Rekonstruksi skor rating untuk pengguna ini
        #    Ini adalah perkalian P_user * Q_items
        user_scores = self.user_factors[user_idx].dot(self.item_factors)
        
        # 3. Hapus item yang sudah di-rating oleh pengguna di train_df
        #    Kita ambil baris pengguna dari matrix asli
        rated_item_indices = self.user_item_matrix[user_idx].indices
        user_scores[rated_item_indices] = -1 # Beri skor sangat rendah
        
        # 4. Ambil top-k item
        #    'argpartition' lebih cepat dari 'argsort' penuh
        k = num_recommendations
        top_k_indices = np.argpartition(user_scores, -k)[-k:]
        # Urutkan k item tersebut
        top_k_sorted = top_k_indices[np.argsort(user_scores[top_k_indices])][::-1]

        # 5. Ubah kembali ke destination_id
        recommendations = []
        for item_idx in top_k_sorted:
            destination_id = self.item_decoder[item_idx]
            score = user_scores[item_idx]
            # Normalisasi skor (0-1) untuk hybrid
            normalized_score = max(0, min(1, score / (np.max(user_scores) + 1e-9)))
            recommendations.append({
                'destination_id': destination_id, 
                'score': normalized_score
            })
        
        return recommendations


# --- 2. IMPLEMENTASI CONTENT-BASED (CB) ---

async def get_destination_categories_from_db():
    """Mengambil kategori destinasi NYATA dari database."""
    logger.info("Memuat kategori destinasi dari DB...")
    try:
        async with get_db() as db:
            # --- QUERY DIPERBAIKI ---
            # Gunakan DISTINCT ON untuk mengambil HANYA 1 kategori per destinasi
            # (diurutkan berdasarkan c.id terkecil)
            query = text("""
            SELECT DISTINCT ON (d.id) 
                d.id as destination_id, 
                c.name as category_name
            FROM destinations d
            LEFT JOIN destination_categories dc ON d.id = dc.destination_id
            LEFT JOIN categories c ON c.id = dc.category_id
            ORDER BY d.id, c.id ASC
            """)
            # -----------------------
            
            result = await db.execute(query)
            rows = result.fetchall()
            
            destination_categories = {}
            for row in rows:
                dest_id, category = row[0], row[1]
                # Fallback jika kategori ternyata NULL
                destination_categories[dest_id] = category if category else "Wisata Lainnya"
            
            logger.info(f"Berhasil memuat {len(destination_categories)} kategori destinasi.")
            return destination_categories
            
    except Exception as e:
        logger.warning(f"Tidak dapat mengakses kategori dari database: {e}. Menggunakan simulasi.")
        return None

class ProperContentBasedRecommender:
    """Implementasi CB yang bersih untuk evaluasi."""
    def __init__(self):
        self.destinations_data = {}
        self.category_popularity = {}
        self.all_categories_map = {}
        self.categories_list = ['Wisata Alam', 'Wisata Sejarah', 'Wisata Kuliner', 'Wisata Buatan', 'Wisata Keluarga', 'Wisata Lainnya']
        self.is_trained = False

    async def train(self):
        """Train CB model menggunakan train_df dan kategori dari DB."""
        logger.info("Training ProperContentBasedRecommender...")
        db_categories = await safe_db_operation(get_destination_categories_from_db)
        if db_categories:
            self.all_categories_map = db_categories
            self.categories_list = list(set(db_categories.values()))
        
        all_destinations = ratings_df['destination_id'].unique()
        
        for dest_id in all_destinations:
            category = self.all_categories_map.get(dest_id, self.categories_list[dest_id % len(self.categories_list)])
            popularity = len(train_df[train_df['destination_id'] == dest_id])
            self.destinations_data[dest_id] = {'category': category, 'popularity': popularity}
            if dest_id not in self.all_categories_map:
                self.all_categories_map[dest_id] = category
        
        for category in self.categories_list:
            cat_dest_ids = [did for did, data in self.destinations_data.items() if data['category'] == category]
            if cat_dest_ids:
                pop_series = train_df[train_df['destination_id'].isin(cat_dest_ids)]['destination_id'].value_counts()
                self.category_popularity[category] = pop_series
        
        self.is_trained = True
        logger.info("‚úÖ ProperContentBasedRecommender berhasil di-train.")

    async def predict(self, user_id, num_recommendations=10):
        if not self.is_trained: raise Exception("CB model belum di-train.")
        user_ratings = train_df[train_df['user_id'] == user_id]
        rated_items = set(user_ratings['destination_id'].tolist())
        
        if user_ratings.empty: # Cold Start
            recommendations = []
            num_per_cat = max(1, num_recommendations // len(self.category_popularity))
            for category, pop_series in self.category_popularity.items():
                if pop_series is not None and not pop_series.empty:
                    recommendations.extend(pop_series.head(num_per_cat).index.tolist())
            final_recs = [dest_id for dest_id in recommendations if dest_id not in rated_items]
            final_recs = list(dict.fromkeys(final_recs))
            return [{'destination_id': dest_id, 'score': 0.5} for dest_id in final_recs[:num_recommendations]]

        user_categories = [self.destinations_data[did]['category'] for did in user_ratings['destination_id'] if did in self.destinations_data]
        if not user_categories: return []
        
        category_counts = Counter(user_categories)
        preferred_categories = sorted(category_counts.items(), key=lambda x: x[1], reverse=True)
        
        recommendations = []
        for category, _ in preferred_categories:
            if category in self.category_popularity and self.category_popularity[category] is not None:
                for dest_id in self.category_popularity[category].index:
                    if dest_id not in rated_items and dest_id not in recommendations:
                        recommendations.append(dest_id)
        
        return [{'destination_id': dest_id, 'score': 1.0 / (i + 1)} for i, dest_id in enumerate(recommendations[:num_recommendations])]

    def get_categories(self):
        return self.all_categories_map

# --- 3. IMPLEMENTASI CONTEXT-AWARE ---

class ContextAwareComponent:
    """
    Mensimulasikan konteks yang kaya (cuaca, musim) berdasarkan tesis
    dan file real_time_data.py.
    """
    def __init__(self):
        # Logika disalin dari file real_time_data.py Anda
        self.weather_conditions = ["cerah", "berawan", "hujan_ringan", "hujan_lebat"]
        self.seasons = ["kemarau", "hujan"]
        self.kemarau_months = [5, 6, 7, 8, 9, 10]  # Mei - Oktober
        self.hujan_months = [11, 12, 1, 2, 3, 4]    # November - April
        logger.info("ContextAwareComponent (rich context) diinisialisasi.")

    def _get_season(self, month):
        """Helper untuk menentukan musim di Indonesia."""
        if month in self.kemarau_months:
            return "kemarau"
        else:
            return "hujan"

    def get_context(self, user_id):
        """
        Mensimulasikan konteks yang kaya secara deterministik (konsisten per user).
        Diadaptasi dari 'get_mock_context' di real_time_data.py.
        """
        # Seed berdasarkan user_id agar konteksnya konsisten
        random.seed(user_id)
        
        # --- Waktu (Logika Lama) ---
        hour = random.randint(8, 22)
        is_weekend = random.choice([True, False])
        time_of_day = 'night'
        if 8 <= hour < 11: time_of_day = 'morning'
        elif 11 <= hour < 15: time_of_day = 'afternoon'
        elif 15 <= hour < 18: time_of_day = 'evening'

        # --- Musim & Cuaca (Logika BARU dari real_time_data.py) ---
        random_month = random.randint(1, 12)
        season = self._get_season(random_month)
        
        if season == "hujan":
            # Bobot saat musim hujan
            weather = random.choices(self.weather_conditions, weights=[0.2, 0.3, 0.3, 0.2])[0]
        else: # kemarau
            # Bobot saat musim kemarau
            weather = random.choices(self.weather_conditions, weights=[0.6, 0.3, 0.08, 0.02])[0]
        
        # Mengembalikan konteks yang LENGKAP
        return {
            'time_of_day': time_of_day,
            'is_weekend': is_weekend,
            'hour': hour,
            'weather': weather,  # <--- Konteks Baru
            'season': season      # <--- Konteks Baru
        }

    def get_contextual_boost(self, recommendations, context, item_categories):
        """
        Memberikan 'boost' skor berdasarkan KONTEKS YANG KAYA (cuaca, waktu).
        """
        boosted_recs = []
        for rec in recommendations:
            dest_id = rec['destination_id']
            # Dapatkan kategori item (misal: 'Wisata Alam', 'Wisata Kuliner')
            category = item_categories.get(dest_id, "Wisata Lainnya")
            boost = 0.0
            
            # --- Logika Boost yang Diperbarui (Contoh) ---
            
            # 1. Boost Waktu (lama)
            if context['time_of_day'] == 'evening' and category == 'Wisata Kuliner':
                boost += 0.15
            
            # 2. Boost Cuaca (BARU)
            # Jika cuaca 'cerah', prioritaskan 'Wisata Alam'
            if context['weather'] == 'cerah' and category == 'Wisata Alam':
                boost += 0.1
            
            # Jika 'hujan', prioritaskan 'Wisata Buatan' (misal: museum, mall, indoor)
            if context['weather'].startswith('hujan') and category == 'Wisata Buatan':
                boost += 0.1
            
            # 3. Boost Musim/Akhir Pekan (BARU)
            # Jika 'kemarau' DAN 'akhir pekan', prioritaskan 'Wisata Keluarga'
            if context['season'] == 'kemarau' and context['is_weekend'] and category == 'Wisata Keluarga':
                boost += 0.1
            
            # ---------------------------------
            
            new_rec = rec.copy()
            new_rec['score'] += boost # Tambahkan boost ke skor asli
            boosted_recs.append(new_rec)
            
        return boosted_recs

# --- 4. IMPLEMENTASI MMR RERANKER ---

class MMRReranker:
    def __init__(self, item_categories_map):
        self.item_categories = item_categories_map
        self.category_cache = {}

    def _get_category(self, item_id):
        if item_id not in self.category_cache:
            self.category_cache[item_id] = self.item_categories.get(item_id, f"unknown_{item_id}")
        return self.category_cache[item_id]

    def _get_similarity(self, item_id1, item_id2):
        return 1.0 if self._get_category(item_id1) == self._get_category(item_id2) else 0.0

    def rerank(self, recommendations, lambda_val=0.5, k=10):
        if not recommendations: return []
        original_recs = {rec['destination_id']: rec['score'] for rec in recommendations}
        candidates = list(original_recs.keys())
        reranked_list = []
        
        while len(reranked_list) < k and candidates:
            best_item = None; best_mmr_score = -float('inf')
            for item_id in candidates:
                relevance = original_recs[item_id]
                if not reranked_list:
                    similarity = 0.0
                else:
                    similarity = max(self._get_similarity(item_id, selected_id) for selected_id in reranked_list)
                mmr_score = (lambda_val * relevance) - ((1 - lambda_val) * similarity)
                if mmr_score > best_mmr_score:
                    best_mmr_score = mmr_score; best_item = item_id
            if best_item:
                reranked_list.append(best_item); candidates.remove(best_item)
            else: break
        return reranked_list

# --- 5. IMPLEMENTASI MULTI-ARMED BANDIT (MAB) ---

class SimpleMAB:
    """Implementasi UCB1 MAB untuk memilih lambda."""
    def __init__(self, n_arms=11):
        # 11 arms untuk lambda [0.0, 0.1, ..., 1.0]
        self.n_arms = n_arms
        self.arms = np.linspace(0, 1, n_arms)
        self.counts = np.zeros(n_arms, dtype=int)
        # Ganti nama 'rewards' menjadi 'avg_rewards' agar lebih jelas
        self.avg_rewards = np.zeros(n_arms, dtype=float)
        self.total_pulls = 0

    def select_arm(self):
        """Pilih arm (lambda) menggunakan UCB1 DAN update count."""
        
        # --- LOGIKA KEMBALI KE SINI ---
        self.total_pulls += 1
        
        # --- Fase Eksplorasi Awal ---
        # Pastikan setiap arm dicoba setidaknya sekali
        for arm in range(self.n_arms):
            if self.counts[arm] == 0:
                # --- LOGIKA KEMBALI KE SINI ---
                self.counts[arm] += 1
                return arm, self.arms[arm]
        
        # --- Fase Eksploitasi/Eksplorasi (UCB1) ---
        # (avg_rewards sudah tersimpan)
        
        # Tambahkan 1e-9 untuk keamanan jika total_pulls = 0 (meskipun seharusnya tidak)
        exploration_bonus = np.sqrt(2 * np.log(self.total_pulls + 1e-9) / self.counts)
        ucb_values = self.avg_rewards + exploration_bonus
        
        selected_arm = np.argmax(ucb_values)
        
        # --- LOGIKA KEMBALI KE SINI ---
        self.counts[selected_arm] += 1
        return selected_arm, self.arms[selected_arm]

    def update(self, arm_index, reward):
        """Update reward untuk arm yang dipilih."""
        
        # --- LOGIKA DIPINDAHKAN DARI SINI ---
        # self.total_pulls += 1 (sudah pindah ke select_arm)
        # self.counts[arm_index] += 1 (sudah pindah ke select_arm)
        # ------------------------------------
        
        n = self.counts[arm_index]
        
        # Jika n=0 (sebagai pengaman, seharusnya tidak terjadi)
        if n == 0:
            return

        # Gunakan formula update incremental yang stabil secara numerik
        # R_n = R_{n-1} + (x_n - R_{n-1}) / n
        old_avg_reward = self.avg_rewards[arm_index]
        new_avg_reward = old_avg_reward + (reward - old_avg_reward) / n
        
        self.avg_rewards[arm_index] = new_avg_reward

# --- 6. IMPLEMENTASI HYBRID RECOMMENDER (ORKESTRATOR) ---

class ProperHybridRecommender:
    """Orkestrator yang bersih untuk menjalankan semua 5 strategi."""
    def __init__(self, cf_model, cb_model, context_comp, mmr_reranker, mab):
        self.cf = cf_model; self.cb = cb_model; self.context = context_comp
        self.mmr = mmr_reranker; self.mab = mab
        # Bobot hybrid (sesuai file .py produksi)
        self.cf_weight = 0.5
        self.cb_weight = 0.5

    async def _combine_scores(self, cf_recs, cb_recs):
        combined = {}
        for rec in cf_recs: combined[rec['destination_id']] = combined.get(rec['destination_id'], 0) + rec['score'] * self.cf_weight
        for rec in cb_recs: combined[rec['destination_id']] = combined.get(rec['destination_id'], 0) + rec['score'] * self.cb_weight
        sorted_recs = sorted(combined.items(), key=lambda x: x[1], reverse=True)
        return [{'destination_id': did, 'score': score} for did, score in sorted_recs]

    async def predict(self, user_id, strategy='hybrid_mab_mmr', k=10):
        """Satu fungsi untuk menjalankan salah satu dari 5 strategi."""
        
        # --- 1. CF (Baseline 1) ---
        if strategy == 'cf':
            recs = await self.cf.predict(user_id, num_recommendations=k)
            return [r['destination_id'] for r in recs]
        
        # --- 2. CB (Baseline 2) ---
        if strategy == 'cb':
            recs = await self.cb.predict(user_id, num_recommendations=k)
            return [r['destination_id'] for r in recs]

        # --- Untuk semua strategi Hybrid ---
        cf_recs_raw = await self.cf.predict(user_id, num_recommendations=50)
        cb_recs_raw = await self.cb.predict(user_id, num_recommendations=50)
        combined_recs = await self._combine_scores(cf_recs_raw, cb_recs_raw)
        user_context = self.context.get_context(user_id)
        contextual_recs = self.context.get_contextual_boost(combined_recs, user_context, self.cb.get_categories())
        sorted_contextual_recs = sorted(contextual_recs, key=lambda x: x['score'], reverse=True)

        # --- 3. Hybrid (Baseline 3) ---
        if strategy == 'hybrid':
            return [r['destination_id'] for r in sorted_contextual_recs[:k]]

        # --- 4. Hybrid + Static MMR (Baseline 4) ---
        if strategy == 'hybrid_mmr_static':
            return self.mmr.rerank(sorted_contextual_recs, lambda_val=0.5, k=k)

        # --- 5. Hybrid + MAB + MMR (Proposed Model) ---
        if strategy == 'hybrid_mab_mmr':
            arm_index, dynamic_lambda = self.mab.select_arm()
            reranked_ids = self.mmr.rerank(sorted_contextual_recs, lambda_val=dynamic_lambda, k=k)
            # Kembalikan ID dan arm_index yang dipilih (penting untuk CELL 11)
            return reranked_ids, arm_index 
        
        raise ValueError(f"Strategi tidak dikenal: {strategy}")

# --- 7. INISIALISASI SEMUA KOMPONEN ---

async def initialize_all_models():
    """Menginisialisasi dan melatih semua komponen model."""
    global collab_model_engine, cb_model_engine, mab_engine, hybrid_model_engine, mmr_reranker
    
    try:
        # 1. Latih CF
        collab_model_engine = ProperCollaborativeRecommender()
        await collab_model_engine.train(train_df) # Latih dengan train_df
        
        # 2. Latih CB
        cb_model_engine = ProperContentBasedRecommender()
        await cb_model_engine.train()
        
        # 3. Dapatkan map kategori dari CB (penting untuk MMR)
        item_categories_map = cb_model_engine.get_categories()
        if not item_categories_map:
            raise Exception("Gagal mendapatkan map kategori dari CB model.")
        
        # 4. Inisialisasi komponen lain
        context_comp = ContextAwareComponent()
        mmr_reranker = MMRReranker(item_categories_map)
        mab_engine = SimpleMAB(n_arms=11)
        
        # 5. Inisialisasi Orkestrator Hybrid
        hybrid_model_engine = ProperHybridRecommender(
            cf_model=collab_model_engine,
            cb_model=cb_model_engine,
            context_comp=context_comp,
            mmr_reranker=mmr_reranker,
            mab=mab_engine
        )
        
        logger.info("‚úÖ‚úÖ‚úÖ Semua komponen model berhasil diinisialisasi.")
        return True

    except Exception as e:
        logger.error(f"Gagal menginisialisasi model: {e}")
        return False

# --- EKSEKUSI INISIALISASI ---
# Variabel global akan dibuat:
# - collab_model_engine (Model CF)
# - cb_model_engine (Model CB)
# - mab_engine (Model MAB)
# - hybrid_model_engine (Orkestrator utama)

if await initialize_all_models():
    print("‚úÖ Engine model (hybrid_model_engine, dll.) siap digunakan.")
    
    # Tes cepat
    if eligible_users:
        test_user = eligible_users[0]
        print(f"Menjalankan tes prediksi untuk user {test_user}...")
        recs = await hybrid_model_engine.predict(test_user, strategy='hybrid_mab_mmr', k=5)
        print(f"Hasil tes MAB-MMR (recs, arm): {recs}")
        recs_cf = await hybrid_model_engine.predict(test_user, strategy='cf', k=5)
        print(f"Hasil tes CF: {recs_cf}")
    else:
        print("Tidak ada user yang eligible untuk tes cepat.")
else:
    print("‚ùå Gagal menginisialisasi engine model. Cek error di atas.")

In [None]:
# ===== CELL 10: EKSEKUSI EVALUASI BATCH =====
import pickle

# Nama file untuk menyimpan cache hasil evaluasi
EVAL_CACHE_FILE = 'evaluation_df_cache.pkl'

async def run_evaluation_for_user(user_id, model_engine):
    """
    Menjalankan SEMUA 5 model untuk SATU pengguna dan mengembalikan hasilnya.
    Didesain untuk dijalankan secara konkuren.
    """
    try:
        # --- Baseline 1: CF ---
        cf_recs = await model_engine.predict(user_id, strategy='cf', k=10)
        
        # --- Baseline 2: CB ---
        cb_recs = await model_engine.predict(user_id, strategy='cb', k=10)
        
        # --- Baseline 3: Hybrid (CF+CB+Context) ---
        hybrid_recs = await model_engine.predict(user_id, strategy='hybrid', k=10)
        
        # --- Baseline 4: Hybrid + Static MMR (lambda=0.5) ---
        static_mmr_recs = await model_engine.predict(user_id, strategy='hybrid_mmr_static', k=10)
        
        # --- Baseline 5 (Proposed): Hybrid + MAB + MMR ---
        # Ini mengembalikan (list_ids, arm_index)
        mab_recs, arm_index = await model_engine.predict(user_id, strategy='hybrid_mab_mmr', k=10)
        
        # Kembalikan sebagai dict untuk DataFrame
        return {
            'user_id': user_id,
            'recommendations_cf': cf_recs,
            'recommendations_cb': cb_recs,
            'recommendations_hybrid': hybrid_recs,
            'recommendations_hybrid_mmr_static': static_mmr_recs,
            'recommendations_hybrid_mab_mmr': mab_recs,
            'mab_arm_index': arm_index # <--- Simpan arm yang dipilih MAB
        }
        
    except Exception as e:
        logger.error(f"Gagal mengevaluasi pengguna {user_id}: {e}")
        # Kembalikan data kosong agar tidak merusak batch
        return {
            'user_id': user_id,
            'recommendations_cf': [],
            'recommendations_cb': [],
            'recommendations_hybrid': [],
            'recommendations_hybrid_mmr_static': [],
            'recommendations_hybrid_mab_mmr': [],
            'mab_arm_index': None
        }

# --- MAIN EXECUTION ---
try:
    # 1. Coba muat dari cache terlebih dahulu
    evaluation_df = pd.read_pickle(EVAL_CACHE_FILE)
    logger.info(f"‚úÖ Berhasil memuat 'evaluation_df' dari cache: {EVAL_CACHE_FILE}")
    print(f"‚úÖ Berhasil memuat 'evaluation_df' dari cache: {EVAL_CACHE_FILE}")
    print(f"   Total users di cache: {len(evaluation_df)}")
    
    # Cek apakah cache valid (memiliki kolom baru)
    if 'mab_arm_index' not in evaluation_df.columns:
         print("‚ö†Ô∏è Cache tidak valid (kolom 'mab_arm_index' hilang). Menjalankan ulang evaluasi.")
         raise FileNotFoundError # Paksa eksekusi ulang

except FileNotFoundError:
    logger.warning(f"Cache '{EVAL_CACHE_FILE}' tidak ditemukan. Memulai evaluasi penuh...")
    print(f"Cache '{EVAL_CACHE_FILE}' tidak ditemukan. Memulai evaluasi penuh...")
    
    # Ambil daftar pengguna yang valid dari CELL 6
    eval_users_list = eligible_users
    
    # Periksa apakah prasyarat ada
    if not eval_users_list:
        print("‚ùå Tidak ada 'eligible_users' untuk dievaluasi. Hentikan.")
        evaluation_df = pd.DataFrame() # Buat df kosong
    elif 'hybrid_model_engine' not in globals() or hybrid_model_engine is None:
        print("‚ùå 'hybrid_model_engine' tidak ditemukan. Jalankan CELL 9 dulu.")
        evaluation_df = pd.DataFrame() # Buat df kosong
    else:
        # Tentukan ukuran batch (seberapa banyak user dievaluasi bersamaan)
        batch_size = 20
        num_batches = (len(eval_users_list) + batch_size - 1) // batch_size
        all_results = []
        
        print(f"Memulai evaluasi untuk {len(eval_users_list)} pengguna dalam {num_batches} batch...")
        
        for i in tqdm(range(num_batches), desc="Mengevaluasi batch pengguna"):
            start_idx = i * batch_size
            end_idx = min((i + 1) * batch_size, len(eval_users_list))
            user_batch = eval_users_list[start_idx:end_idx]
            
            # Buat daftar 'tasks' untuk dijalankan secara konkuren
            tasks = [
                run_evaluation_for_user(user_id, hybrid_model_engine) 
                for user_id in user_batch
            ]
            
            # Jalankan semua tasks di batch ini secara bersamaan
            batch_results = await asyncio.gather(*tasks)
            all_results.extend(batch_results)
        
        # 2. Konversi hasil ke DataFrame
        evaluation_df = pd.DataFrame(all_results)
        
        # 3. Simpan ke cache untuk penggunaan di masa depan
        try:
            evaluation_df.to_pickle(EVAL_CACHE_FILE)
            print(f"\n‚úÖ Evaluasi selesai. Hasil disimpan ke cache: {EVAL_CACHE_FILE}")
        except Exception as e:
            print(f"\n‚ö†Ô∏è Peringatan: Evaluasi selesai, tapi GAGAL menyimpan ke cache: {e}")

# --- Tampilkan Hasil ---
if not evaluation_df.empty:
    print(f"\nTotal users dievaluasi: {len(evaluation_df)}")
    print("Contoh hasil 'evaluation_df':")
    # Tampilkan 5 baris pertama
    display(evaluation_df.head())
else:
    print("‚ö†Ô∏è 'evaluation_df' kosong. Tidak ada hasil untuk ditampilkan.")

In [None]:
# ===== CELL 11: ANALISIS METRIK PERFORMA & UJI SIGNIFIKANSI =====
import pickle
from scipy import stats # Pastikan diimpor dari CELL 4

# Nama file untuk cache hasil performa
PERF_CACHE_FILE = 'performance_results_cache.pkl'

# Model-model yang akan kita evaluasi
MODEL_NAMES = [
    'cf', 
    'cb', 
    'hybrid', 
    'hybrid_mmr_static', 
    'hybrid_mab_mmr'
]

def calculate_reward(ndcg, diversity, ndcg_weight=0.5, diversity_weight=0.5):
    """
    Menghitung 'reward' untuk MAB berdasarkan kombinasi akurasi dan keragaman.
    Skor reward antara 0 dan 1.
    """
    # Pastikan metrik berada dalam rentang [0, 1]
    ndcg = max(0, min(1, ndcg))
    diversity = max(0, min(1, diversity))
    
    return (ndcg_weight * ndcg) + (diversity_weight * diversity)

async def calculate_all_metrics():
    """
    Fungsi utama untuk menghitung semua metrik dari evaluation_df
    dan melatih MAB.
    """
    logger.info("Memulai kalkulasi metrik performa...")
    
    # 1. Dapatkan prasyarat
    if 'cb_model_engine' not in globals() or cb_model_engine is None:
        print("‚ùå 'cb_model_engine' tidak ditemukan. Jalankan CELL 9.")
        return None, None
        
    # Dapatkan map kategori (dibutuhkan untuk metrik diversity)
    item_categories_map = cb_model_engine.get_categories()
    if not item_categories_map:
        print("‚ùå Peta kategori kosong. Tidak bisa menghitung diversity.")
        return None, None
        
    if evaluation_df.empty:
        print("‚ùå 'evaluation_df' kosong. Jalankan CELL 10.")
        return None, None

    # 2. Siapkan dictionary untuk menyimpan SEMUA skor individu
    # Ini penting untuk t-test!
    all_individual_scores = {
        model: {'precision': [], 'recall': [], 'ndcg': [], 'diversity': []} 
        for model in MODEL_NAMES
    }

    # 3. Iterasi per pengguna di evaluation_df
    for _, row in tqdm(evaluation_df.iterrows(), total=len(evaluation_df), desc="Menghitung Metrik Pengguna"):
        user_id = row['user_id']
        
        # Dapatkan ground truth (item > 4.0) untuk pengguna ini
        gt = ground_truth_cache.get(user_id, [])
        if not gt:
            continue # Lewati jika pengguna tidak punya ground truth

        # Hitung metrik untuk setiap model
        for model_key in MODEL_NAMES:
            col_name = f'recommendations_{model_key}'
            recs = row[col_name]
            
            # Hitung semua metrik
            p_k = precision_at_k(recs, gt, k=10)
            r_k = recall_at_k(recs, gt, k=10)
            n_k = ndcg_at_k(recs, gt, k=10)
            d_k = intra_list_diversity(recs, item_categories_map)
            
            # Simpan skor individu
            all_individual_scores[model_key]['precision'].append(p_k)
            all_individual_scores[model_key]['recall'].append(r_k)
            all_individual_scores[model_key]['ndcg'].append(n_k)
            all_individual_scores[model_key]['diversity'].append(d_k)
            
            # 4. KHUSUS UNTUK MAB: Hitung reward dan update model MAB
            if model_key == 'hybrid_mab_mmr':
                arm_index = row['mab_arm_index']
                if arm_index is not None:
                    # Hitung reward
                    reward = calculate_reward(n_k, d_k)
                    # Update MAB (dari CELL 9)
                    mab_engine.update(arm_index, reward)

    logger.info("Kalkulasi metrik individu selesai.")
    
    # 5. Hitung rata-rata (summary) dari skor individu
    performance_summary = {}
    print("\n" + "="*50)
    print("üìä HASIL PERFORMA RATA-RATA MODEL üìä")
    print("="*50)
    
    for model_name, metrics in all_individual_scores.items():
        if not metrics['precision']: # Cek jika ada data
            logger.warning(f"Tidak ada data metrik untuk model: {model_name}")
            continue
            
        summary = {
            'Precision@10': np.mean(metrics['precision']),
            'Recall@10': np.mean(metrics['recall']),
            'NDCG@10': np.mean(metrics['ndcg']),
            'Diversity': np.mean(metrics['diversity']),
            'Precision_Std': np.std(metrics['precision']),
            'Recall_Std': np.std(metrics['recall']),
            'NDCG_Std': np.std(metrics['ndcg']),
            'Diversity_Std': np.std(metrics['diversity']),
            'Users': len(metrics['precision'])
        }
        performance_summary[model_name] = summary
        
        # Cetak hasil
        print(f"\n--- Model: {model_name.upper()} ---")
        print(f"  Precision@10: {summary['Precision@10']:.4f} (¬±{summary['Precision_Std']:.4f})")
        print(f"  Recall@10:    {summary['Recall@10']:.4f} (¬±{summary['Recall_Std']:.4f})")
        print(f"  NDCG@10:      {summary['NDCG@10']:.4f} (¬±{summary['NDCG_Std']:.4f})")
        print(f"  Diversity:    {summary['Diversity']:.4f} (¬±{summary['Diversity_Std']:.4f})")
        print(f"  (n_users = {summary['Users']})")

    return performance_summary, all_individual_scores

def run_significance_tests(individual_scores, proposed_model='hybrid_mab_mmr', baselines=None):
    """
    Menjalankan Paired T-Test antara model yang diusulkan dan semua baseline.
    """
    if baselines is None:
        baselines = ['cf', 'cb', 'hybrid', 'hybrid_mmr_static']
        
    print("\n" + "="*50)
    print(f"üî¨ UJI SIGNIFIKANSI STATISTIK (PAIRED T-TEST) üî¨")
    print(f"   Model Utama: {proposed_model}")
    print("="*50)
    
    metrics_to_test = ['precision', 'recall', 'ndcg', 'diversity']
    
    # Data untuk hasil t-test (jika ingin disimpan)
    test_results = {}

    for baseline in baselines:
        if baseline == proposed_model:
            continue
            
        print(f"\n--- Perbandingan: [{proposed_model.upper()}] vs [{baseline.upper()}] ---")
        test_results[baseline] = {}
        
        for metric in metrics_to_test:
            # Ambil list skor individu untuk kedua model
            proposed_scores = individual_scores[proposed_model][metric]
            baseline_scores = individual_scores[baseline][metric]
            
            # Pastikan panjangnya sama
            min_len = min(len(proposed_scores), len(baseline_scores))
            if min_len < 2:
                print(f"  METRIC {metric.upper()}: Tidak cukup data (n={min_len})")
                continue
            
            proposed_scores = proposed_scores[:min_len]
            baseline_scores = baseline_scores[:min_len]
            
            # Lakukan Paired T-Test
            # H0: Rata-rata kedua model SAMA
            # H1: Rata-rata kedua model BERBEDA
            t_stat, p_value = stats.ttest_rel(proposed_scores, baseline_scores)
            
            print(f"\n  Metric: {metric.upper()}")
            print(f"    {proposed_model} (Mean): {np.mean(proposed_scores):.4f}")
            print(f"    {baseline} (Mean): {np.mean(baseline_scores):.4f}")
            print(f"    P-Value: {p_value:.6f}")
            
            # Interpretasi hasil
            if p_value < 0.05: # Ambang batas signifikansi 5%
                if t_stat > 0:
                    print("    HASIL: ‚úÖ Signifikan! Model Anda LEBIH BAIK.")
                else:
                    print("    HASIL: ‚ùå Signifikan! Model Anda LEBIH BURUK.")
            else:
                print("    HASIL: ‚ö†Ô∏è Tidak signifikan. Perbedaan tidak terbukti.")
            
            test_results[baseline][metric] = {'t_stat': t_stat, 'p_value': p_value}
    
    return test_results

# --- MAIN EXECUTION CELL 11 ---
try:
    # 1. Coba muat dari cache dulu
    with open(PERF_CACHE_FILE, 'rb') as f:
        cached_data = pickle.load(f)
        performance_summary = cached_data['summary']
        all_individual_scores = cached_data['individual']
    print(f"‚úÖ Berhasil memuat HASIL PERFORMA dari cache: {PERF_CACHE_FILE}")
    
    # Tampilkan summary dari cache
    print("\n" + "="*50)
    print("üìä HASIL PERFORMA RATA-RATA (DARI CACHE) üìä")
    print("="*50)
    for model_name, summary in performance_summary.items():
        print(f"\n--- Model: {model_name.upper()} ---")
        print(f"  Precision@10: {summary['Precision@10']:.4f} (¬±{summary['Precision_Std']:.4f})")
        print(f"  Recall@10:    {summary['Recall@10']:.4f} (¬±{summary['Recall_Std']:.4f})")
        print(f"  NDCG@10:      {summary['NDCG@10']:.4f} (¬±{summary['NDCG_Std']:.4f})")
        print(f"  Diversity:    {summary['Diversity']:.4f} (¬±{summary['Diversity_Std']:.4f})")

except FileNotFoundError:
    logger.warning(f"Cache '{PERF_CACHE_FILE}' tidak ditemukan. Menjalankan kalkulasi penuh...")
    print(f"Cache '{PERF_CACHE_FILE}' tidak ditemukan. Menjalankan kalkulasi penuh...")
    
    # 2. Jalankan kalkulasi penuh
    performance_summary, all_individual_scores = await calculate_all_metrics()
    
    # 3. Simpan hasil ke cache
    if performance_summary:
        try:
            with open(PERF_CACHE_FILE, 'wb') as f:
                pickle.dump({'summary': performance_summary, 'individual': all_individual_scores}, f)
            print(f"\n‚úÖ Hasil performa disimpan ke cache: {PERF_CACHE_FILE}")
        except Exception as e:
            print(f"\n‚ö†Ô∏è Gagal menyimpan hasil performa ke cache: {e}")

# 4. JALANKAN UJI SIGNIFIKANSI STATISTIK
# Ini akan selalu berjalan (tidak perlu di-cache) karena cepat
if 'all_individual_scores' in globals() and all_individual_scores:
    statistical_test_results = run_significance_tests(all_individual_scores)
    
    # 5. Tampilkan status MAB setelah di-update
    print("\n" + "="*50)
    print("ü§ñ STATUS MAB SETELAH UPDATE ü§ñ")
    print("="*50)
    print("Lambda (Arm) | Jumlah Dipilih (Pulls) | Rata-rata Reward")
    print("-----------------------------------------------------")
    mab_counts = mab_engine.counts
    mab_rewards = mab_engine.avg_rewards
    mab_arms = mab_engine.arms
    
    for i in range(len(mab_arms)):
        print(f"  Œª = {mab_arms[i]:.1f}     | {mab_counts[i]:<20} | {mab_rewards[i]:.4f}")
    
    print(f"\nTotal pulls: {mab_engine.total_pulls}")
    best_arm_index = np.argmax(mab_rewards)
    print(f"üèÜ Lambda terbaik (berdasarkan reward): {mab_arms[best_arm_index]:.1f} (Reward: {mab_rewards[best_arm_index]:.4f})")

else:
    print("‚ùå Tidak ada 'all_individual_scores'. Tidak bisa menjalankan Uji Signifikansi atau menampilkan MAB.")

# Buat DataFrame dari summary untuk visualisasi
performance_df = pd.DataFrame(performance_summary).T.reset_index().rename(columns={'index': 'Model'})

In [None]:
# ===== CELL 11.5: ANALISIS KONVERGENSI MAB =====

import matplotlib.pyplot as plt

# Simulasi konvergensi MAB (dari data evaluation_df)
user_rewards = []
cumulative_best_lambda = []

print("üìà Analisis Konvergensi MAB:")
print("="*50)

# Simulasi learning curve
for i, (_, row) in enumerate(evaluation_df.iterrows()):
    if row['mab_arm_index'] is not None:
        arm_idx = row['mab_arm_index']
        # Hitung reward yang seharusnya
        # (ambil dari metrik yang sudah dihitung di all_individual_scores)
        user_id = row['user_id']
        # Ini simulasi sederhana - bisa diperbaiki
        user_rewards.append(mab_engine.avg_rewards[arm_idx])
        cumulative_best_lambda.append(mab_engine.arms[np.argmax(mab_engine.avg_rewards[:arm_idx+1])])

# Plot learning curve
if user_rewards:
    fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(16, 5))
    
    # Plot 1: Cumulative Average Reward
    cumulative_avg = np.cumsum(user_rewards) / np.arange(1, len(user_rewards) + 1)
    ax1.plot(cumulative_avg, linewidth=2)
    ax1.set_xlabel('Jumlah Iterasi (User)', fontsize=12)
    ax1.set_ylabel('Rata-rata Reward Kumulatif', fontsize=12)
    ax1.set_title('Konvergensi Reward MAB', fontsize=14, fontweight='bold')
    ax1.grid(True, alpha=0.3)
    
    # Plot 2: Best Lambda Evolution
    ax2.plot(cumulative_best_lambda, linewidth=2, color='orange')
    ax2.set_xlabel('Jumlah Iterasi (User)', fontsize=12)
    ax2.set_ylabel('Lambda Terbaik (Cumulative)', fontsize=12)
    ax2.set_title('Evolusi Pemilihan Lambda Optimal', fontsize=14, fontweight='bold')
    ax2.set_ylim([-0.1, 1.1])
    ax2.grid(True, alpha=0.3)
    
    plt.tight_layout()
    plt.savefig('mab_convergence_analysis.png', dpi=300, bbox_inches='tight')
    plt.show()
    
    print(f"‚úÖ Grafik konvergensi disimpan: mab_convergence_analysis.png")
    print(f"   Final average reward: {cumulative_avg[-1]:.4f}")
    print(f"   Converged lambda: {cumulative_best_lambda[-1]:.2f}")

In [None]:
# ===== CELL 12: VISUALISASI HASIL PERFORMA (VERSI BERSIH TANPA WARNING) =====

# Periksa apakah data yang dibutuhkan ada
if 'performance_df' not in globals() or performance_df.empty:
    print("‚ùå 'performance_df' tidak ditemukan. Jalankan CELL 11 terlebih dahulu.")
elif 'all_individual_scores' not in globals() or not all_individual_scores:
    print("‚ùå 'all_individual_scores' tidak ditemukan. Jalankan CELL 11 terlebih dahulu.")
else:
    print("‚úÖ Memulai pembuatan visualisasi (versi bersih)...")
    
    # --- 1. GRAFIK BATANG PERBANDINGAN RATA-RATA ---
    
    model_order = ['cf', 'cb', 'hybrid', 'hybrid_mmr_static', 'hybrid_mab_mmr']
    palette = {
        'cf': 'gray', 
        'cb': 'lightblue', 
        'hybrid': 'blue', 
        'hybrid_mmr_static': 'orange', 
        'hybrid_mab_mmr': 'green' # Highlight
    }
    
    fig, axes = plt.subplots(2, 2, figsize=(18, 12))
    fig.suptitle('Perbandingan Rata-rata Metrik Model', fontsize=20, fontweight='bold')
    
    metrics_to_plot = [
        ('Precision@10', 'Precision@10'), 
        ('Recall@10', 'Recall@10'), 
        ('NDCG@10', 'NDCG@10'), 
        ('Diversity', 'Diversity')
    ]
    
    for ax, (metric_col, title) in zip(axes.flatten(), metrics_to_plot):
        sns.barplot(
            data=performance_df, 
            x='Model', 
            y=metric_col, 
            ax=ax, 
            order=model_order,
            palette=palette,
            hue='Model',
            legend=False
        )
        for p in ax.patches:
            ax.annotate(f'{p.get_height():.4f}', (p.get_x() + p.get_width() / 2., p.get_height()), 
                        ha='center', va='center', xytext=(0, 9), textcoords='offset points')
        
        ax.set_title(title, fontsize=16)
        ax.set_xlabel('Model', fontsize=12)
        ax.set_ylabel('Score', fontsize=12)
        # --- PERBAIKAN: Cara modern untuk memutar label ---
        ax.tick_params(axis='x', rotation=15)
        # ------------------------------------------------
        ax.set_ylim(top=ax.get_ylim()[1] * 1.15)
        
    plt.tight_layout(rect=[0, 0.03, 1, 0.95])
    plt.savefig('performance_comparison_bar.png', dpi=300, bbox_inches='tight')
    plt.show()

    # --- 2. BOX PLOT DISTRIBUSI SKOR ---
    plot_data = []
    for model_name, metrics in all_individual_scores.items():
        for metric_name, scores in metrics.items():
            for score in scores:
                plot_data.append({'Model': model_name, 'Metric': metric_name, 'Score': score})
    individual_df = pd.DataFrame(plot_data)

    fig, axes = plt.subplots(2, 2, figsize=(18, 12))
    fig.suptitle('Distribusi Skor Metrik Model', fontsize=20, fontweight='bold')

    for ax, metric_name in zip(axes.flatten(), ['precision', 'recall', 'ndcg', 'diversity']):
        sns.boxplot(
            data=individual_df[individual_df['Metric'] == metric_name],
            x='Model',
            y='Score',
            ax=ax,
            order=model_order,
            palette=palette,
            hue='Model',
            legend=False
        )
        ax.set_title(f'Distribusi {metric_name.upper()}', fontsize=16)
        ax.set_xlabel('Model', fontsize=12)
        ax.set_ylabel('Score', fontsize=12)
        # --- PERBAIKAN: Cara modern untuk memutar label ---
        ax.tick_params(axis='x', rotation=15)
        # ------------------------------------------------

    plt.tight_layout(rect=[0, 0.03, 1, 0.95])
    plt.savefig('performance_distribution_boxplot.png', dpi=300, bbox_inches='tight')
    plt.show()


    # --- 3. PARETO FRONTIER (TRADE-OFF AKURASI vs DIVERSITY) ---
    plt.figure(figsize=(12, 8))
    pareto_data = performance_df.set_index('Model').loc[model_order]
    
    sns.scatterplot(
        data=pareto_data, x='Diversity', y='NDCG@10', hue='Model', 
        palette=palette, s=200, style='Model', markers=True, edgecolor='black'
    )
    
    for model_name in pareto_data.index:
        plt.text(x=pareto_data.loc[model_name, 'Diversity'] + 0.005,
                 y=pareto_data.loc[model_name, 'NDCG@10'], s=model_name,
                 fontdict=dict(color='black', size=10))
    
    pareto_sorted = pareto_data.sort_values(by='Diversity')
    pareto_points = []
    current_max_ndcg = -float('inf')
    
    for model_name, row in pareto_sorted.iterrows():
        if row['NDCG@10'] > current_max_ndcg:
            pareto_points.append((row['Diversity'], row['NDCG@10']))
            current_max_ndcg = row['NDCG@10']
    
    if len(pareto_points) > 1:
        pareto_x, pareto_y = zip(*pareto_points)
        plt.plot(pareto_x, pareto_y, 'k--', alpha=0.5, label='Pareto Frontier (Optimal Trade-off)')
    
    plt.title('Trade-off: Akurasi (NDCG) vs. Keragaman (Diversity)', fontsize=16, fontweight='bold')
    plt.xlabel('Keragaman (Intra-List Diversity)', fontsize=14)
    plt.ylabel('Akurasi Peringkat (NDCG@10)', fontsize=14)
    plt.legend(loc='lower right')
    plt.grid(True, linestyle='--', alpha=0.7)
    plt.savefig('pareto_frontier_tradeoff.png', dpi=300, bbox_inches='tight')
    plt.show()

    # --- 4. DISTRIBUSI PEMILIHAN LAMBDA (ARM) MAB ---
    if 'mab_engine' in globals():
        plt.figure(figsize=(12, 6))
        
        mab_counts = mab_engine.counts
        mab_arms_float = mab_engine.arms 
        
        # --- PERBAIKAN: Mengganti 'color' dengan 'palette' ---
        sns.barplot(
            x=mab_arms_float, 
            y=mab_counts, 
            palette='dark:blue',  # <--- Ini perbaikannya
            hue=mab_arms_float, 
            dodge=False, 
            legend=False
        )
        # -------------------------------------------------
        
        plt.xticks(ticks=range(len(mab_arms_float)), labels=[f"{arm:.1f}" for arm in mab_arms_float])
        
        plt.title('Distribusi Pemilihan Lengan (Lambda) oleh MAB', fontsize=16, fontweight='bold')
        plt.xlabel('Nilai Lambda (Œª) untuk MMR', fontsize=14)
        plt.ylabel('Jumlah Pemilihan (Pulls)', fontsize=14)
        plt.savefig('mab_lambda_distribution.png', dpi=300, bbox_inches='tight')
        plt.show()
    else:
        print("‚ö†Ô∏è 'mab_engine' tidak ditemukan. Tidak bisa mem-plot distribusi lambda.")

In [None]:
# ===== CELL 13: ANALISIS DISTRIBUSI, COVERAGE, DAN LONG TAIL =====

# --- 1. Fungsi Helper ---

def calculate_gini_coefficient(all_recommendations):
    """
    Menghitung Gini Coefficient untuk mengukur ketidaksetaraan 
    distribusi rekomendasi.
    0 = kesetaraan sempurna, 1 = ketidaksetaraan total.
    """
    if not all_recommendations:
        return 0.0
        
    # Hitung frekuensi setiap item
    item_counts = Counter(all_recommendations)
    counts = np.array(list(item_counts.values()))
    
    # Rumus Gini Coefficient
    n = len(counts)
    if n == 0:
        return 0.0
        
    counts_sorted = np.sort(counts)
    index = np.arange(1, n + 1)
    
    # Gini index
    gini = (np.sum((2 * index - n - 1) * counts_sorted)) / (n * np.sum(counts_sorted))
    return float(gini)

def calculate_catalog_coverage(all_recommendations, all_items):
    """
    Menghitung Catalog Coverage.
    (Item unik yang direkomendasikan) / (Total item unik di katalog)
    """
    if not all_items:
        return 0.0
    
    recommended_items_unique = set(all_recommendations)
    return len(recommended_items_unique) / len(all_items)

def plot_long_tail_distribution(model_recommendations_map, item_popularity):
    """
    Membuat plot Long Tail untuk membandingkan semua model.
    model_recommendations_map: dict {'model_name': [all_recs]}
    item_popularity: pd.Series (index=destination_id, values=rating_count)
    """
    plt.figure(figsize=(15, 8))
    
    # 1. Plot Popularitas Item (Garis Abu-abu)
    # Urutkan item dari paling populer ke paling tidak populer
    popularity_sorted = item_popularity.sort_values(ascending=False).reset_index(drop=True)
    
    ax1 = plt.gca() # Dapatkan axis saat ini
    ax1.plot(popularity_sorted.index, popularity_sorted.values, 
             color='gray', linestyle='--', label='Popularitas Item (Long Tail)', alpha=0.7)
    ax1.set_yscale('log') # Skala log untuk popularitas
    ax1.set_xlabel('Item (Diurutkan berdasarkan Popularitas)', fontsize=12)
    ax1.set_ylabel('Popularitas (Jumlah Rating) - Skala Log', fontsize=12, color='gray')
    ax1.tick_params(axis='y', labelcolor='gray')

    # 2. Plot Frekuensi Rekomendasi (untuk setiap model)
    ax2 = ax1.twinx() # Buat axis Y kedua
    
    # Dapatkan palet warna dari CELL 12
    palette = {
        'cf': 'gray', 'cb': 'lightblue', 'hybrid': 'blue', 
        'hybrid_mmr_static': 'orange', 'hybrid_mab_mmr': 'green'
    }
    
    for model_name, all_recs in model_recommendations_map.items():
        if not all_recs:
            continue
            
        # Hitung frekuensi rekomendasi
        rec_counts = Counter(all_recs)
        rec_counts_df = pd.DataFrame.from_dict(rec_counts, orient='index', columns=['rec_count'])
        
        # Gabungkan dengan popularitas dan urutkan
        plot_data = rec_counts_df.join(item_popularity).fillna(0)
        plot_data = plot_data.sort_values(by='popularity', ascending=False)
        
        # Plot frekuensi rekomendasi
        ax2.plot(
            plot_data.index.values, 
            plot_data['rec_count'].values, 
            label=f'Rekomendasi {model_name}', 
            color=palette.get(model_name, 'red'), 
            alpha=0.8
        )
        
    ax2.set_ylabel('Frekuensi Rekomendasi (Jumlah Direkomendasikan)', fontsize=12, color='black')
    ax2.tick_params(axis='y', labelcolor='black')

    plt.title('Analisis Long Tail: Popularitas Item vs Frekuensi Rekomendasi', fontsize=16, fontweight='bold')
    # Gabungkan legend dari kedua axis
    lines, labels = ax1.get_legend_handles_labels()
    lines2, labels2 = ax2.get_legend_handles_labels()
    ax2.legend(lines + lines2, labels + labels2, loc='upper right')
    
    plt.grid(True, which="both", ls="--", alpha=0.5)
    plt.savefig('long_tail_distribution.png', dpi=300, bbox_inches='tight')
    plt.show()

# --- MAIN EXECUTION CELL 13 ---

if 'evaluation_df' not in globals() or evaluation_df.empty:
    print("‚ùå 'evaluation_df' tidak ditemukan. Jalankan CELL 10 terlebih dahulu.")
else:
    # 1. Kumpulkan semua rekomendasi dari 'evaluation_df'
    all_recommendations_map = {}
    for model_key in MODEL_NAMES:
        col_name = f'recommendations_{model_key}'
        # Gabungkan semua list rekomendasi menjadi satu list besar
        all_recs_list = [item for sublist in evaluation_df[col_name].dropna() for item in sublist]
        all_recommendations_map[model_key] = all_recs_list

    # 2. Dapatkan item katalog
    # Gunakan 'ratings_df' (dari CELL 6) untuk katalog lengkap
    all_destination_ids = set(ratings_df['destination_id'].unique())
    
    # 3. Hitung Gini dan Coverage
    distribution_stats = {}
    print("\n" + "="*50)
    print("üìä ANALISIS DISTRIBUSI DAN COVERAGE üìä")
    print("="*50)
    
    for model_name, all_recs in all_recommendations_map.items():
        if not all_recs:
            print(f"--- Model: {model_name.upper()} ---")
            print("  (Tidak ada rekomendasi)")
            continue
            
        gini = calculate_gini_coefficient(all_recs)
        coverage = calculate_catalog_coverage(all_recs, all_destination_ids)
        
        distribution_stats[model_name] = {
            'Gini Coefficient (Lower is Better)': gini,
            'Catalog Coverage (Higher is Better)': coverage,
            'Total Recommendations': len(all_recs),
            'Unique Items Recommended': len(set(all_recs))
        }
        
        print(f"\n--- Model: {model_name.upper()} ---")
        print(f"  Gini Coefficient:  {gini:.4f}")
        print(f"  Catalog Coverage:  {coverage:.4f} ({distribution_stats[model_name]['Unique Items Recommended']} / {len(all_destination_ids)} items)")
        
    # Tampilkan sebagai DataFrame
    distribution_df = pd.DataFrame(distribution_stats).T
    display(distribution_df)

    # 4. Buat Plot Long Tail
    print("\n" + "="*50)
    print("üìà MEMBUAT PLOT LONG TAIL üìà")
    print("="*50)
    
    # Hitung popularitas item (jumlah rating di data training)
    item_popularity = train_df['destination_id'].value_counts().rename('popularity')
    
    # Plot
    plot_long_tail_distribution(all_recommendations_map, item_popularity)

In [None]:
# ===== CELL 14: EXPORT HASIL AKHIR =====
import json

print("\n" + "="*50)
print("üì¶ MENYIMPAN HASIL AKHIR KE FILE üì¶")
print("="*50)

EXPORT_DIR = "evaluation_results"
import os
os.makedirs(EXPORT_DIR, exist_ok=True)
print(f"Hasil akan disimpan di folder: {EXPORT_DIR}/")

# --- 1. Simpan Ringkasan Metrik Performa (performance_df) ---
if 'performance_df' in globals() and not performance_df.empty:
    try:
        csv_path = os.path.join(EXPORT_DIR, "results_summary_metrics.csv")
        excel_path = os.path.join(EXPORT_DIR, "results_summary_metrics.xlsx")
        
        performance_df.to_csv(csv_path, index=False)
        performance_df.to_excel(excel_path, index=False)
        
        print(f"‚úÖ 1. Ringkasan Metrik Rata-rata disimpan ke:")
        print(f"   - {csv_path}")
        print(f"   - {excel_path}")
    except Exception as e:
        print(f"‚ö†Ô∏è Gagal menyimpan 'performance_df': {e}")
else:
    print("‚ö†Ô∏è 1. 'performance_df' tidak ditemukan. File tidak disimpan.")

# --- 2. Simpan Metrik Distribusi (distribution_df) ---
if 'distribution_df' in globals() and not distribution_df.empty:
    try:
        csv_path = os.path.join(EXPORT_DIR, "results_distribution_metrics.csv")
        excel_path = os.path.join(EXPORT_DIR, "results_distribution_metrics.xlsx")
        
        distribution_df.to_csv(csv_path) # Simpan index (nama model)
        distribution_df.to_excel(excel_path) # Simpan index
        
        print(f"\n‚úÖ 2. Metrik Distribusi (Gini, Coverage) disimpan ke:")
        print(f"   - {csv_path}")
        print(f"   - {excel_path}")
    except Exception as e:
        print(f"‚ö†Ô∏è Gagal menyimpan 'distribution_df': {e}")
else:
    print("‚ö†Ô∏è 2. 'distribution_df' tidak ditemukan. File tidak disimpan.")
    
# --- 3. Simpan Hasil Uji Signifikansi (statistical_test_results) ---
if 'statistical_test_results' in globals() and statistical_test_results:
    try:
        json_path = os.path.join(EXPORT_DIR, "results_statistical_tests.json")
        
        # Gunakan json.dump untuk menyimpan file .json
        with open(json_path, 'w') as f:
            json.dump(statistical_test_results, f, indent=4)
            
        print(f"\n‚úÖ 3. Hasil Uji Signifikansi (T-Test) disimpan ke:")
        print(f"   - {json_path}")
    except Exception as e:
        print(f"‚ö†Ô∏è Gagal menyimpan 'statistical_test_results': {e}")
else:
    print("‚ö†Ô∏è 3. 'statistical_test_results' tidak ditemukan. File tidak disimpan.")

# --- 4. Simpan Semua Skor Individu (individual_df) ---
# (Kita buat 'individual_df' di CELL 12 untuk plotting)
if 'individual_df' in globals() and not individual_df.empty:
    try:
        # Gunakan kompresi 'gzip' karena file ini bisa sangat besar
        csv_path = os.path.join(EXPORT_DIR, "results_individual_scores.csv.gz")
        
        individual_df.to_csv(csv_path, index=False, compression='gzip')
        
        print(f"\n‚úÖ 4. Semua Skor Individu (mentah) disimpan ke:")
        print(f"   - {csv_path} (terkompresi)")
    except Exception as e:
        print(f"‚ö†Ô∏è Gagal menyimpan 'individual_df': {e}")
else:
    print("‚ö†Ô∏è 4. 'individual_df' tidak ditemukan. File tidak disimpan.")

# --- 5. Tampilkan file cache utama (untuk referensi) ---
print("\n" + "="*50)
print("üìÅ LOKASI FILE CACHE UTAMA (DATA MENTAH) üìÅ")
print("="*50)
print(f"Rekomendasi mentah per pengguna: ./{EVAL_CACHE_FILE}")
print(f"Hasil performa mentah (cache):   ./{PERF_CACHE_FILE}")

print("\n\nüéâ === EVALUASI SELESAI === üéâ")

In [None]:
# ===== CELL 15.5: SENSITIVITY ANALYSIS - REWARD FUNCTION =====
from tabulate import tabulate

print("="*60)
print("üî¨ SENSITIVITY ANALYSIS: REWARD FUNCTION WEIGHTING üî¨")
print("="*60)
print("\n‚ÑπÔ∏è Analisis ini menggunakan data yang SUDAH dievaluasi.")
print("   Kita akan RE-CALCULATE reward dengan bobot berbeda untuk melihat")
print("   lambda mana yang akan dipilih MAB jika reward function berbeda.\n")

# Konfigurasi bobot yang akan diuji
sensitivity_configs = [
    {'name': '70-30 (NDCG Priority)', 'ndcg_w': 0.7, 'div_w': 0.3},
    {'name': '60-40 (Moderate NDCG)', 'ndcg_w': 0.6, 'div_w': 0.4},
    {'name': '50-50 (Balanced)', 'ndcg_w': 0.5, 'div_w': 0.5},
    {'name': '40-60 (Moderate Div)', 'ndcg_w': 0.4, 'div_w': 0.6},
    {'name': '30-70 (Diversity Priority)', 'ndcg_w': 0.3, 'div_w': 0.7},
]

sensitivity_results = []
all_mab_eval_data = pd.DataFrame() # DataFrame kosong sebagai default

try:
    # 1. Persiapkan DataFrame data mentah (bergantung pada CELL 10 & 11)
    # Ini harus ada di luar loop agar 'NameError' tidak terjadi
    ndcg_scores = all_individual_scores['hybrid_mab_mmr']['ndcg']
    diversity_scores = all_individual_scores['hybrid_mab_mmr']['diversity']
    # Ambil arm_indices dari evaluation_df
    arm_indices = evaluation_df[evaluation_df['mab_arm_index'].notnull()]['mab_arm_index'].astype(int)
    
    min_len = min(len(ndcg_scores), len(diversity_scores), len(arm_indices))
    
    all_mab_eval_data = pd.DataFrame({
        'ndcg': ndcg_scores[:min_len],
        'diversity': diversity_scores[:min_len],
        'mab_arm_index': arm_indices[:min_len]
    })
    
    if all_mab_eval_data.empty:
        raise ValueError("Data evaluasi MAB kosong.")

except Exception as e:
    print(f"‚ùå ERROR: Gagal mempersiapkan data untuk simulasi. Jalankan CELL 10 & 11 dulu.")
    print(f"   Detail Error: {e}")

# --- Jalankan simulasi HANYA jika data berhasil disiapkan ---
if not all_mab_eval_data.empty:
    
    for config in sensitivity_configs:
        print(f"\n--- Testing: {config['name']} ---")
        
        # Buat MAB baru yang bersih untuk setiap simulasi
        test_mab = SimpleMAB(n_arms=11)

        # Re-calculate reward dari data existing
        for _, row in tqdm(all_mab_eval_data.iterrows(), total=len(all_mab_eval_data), desc=f"Simulating {config['name']}"):
            
            # 1. Ambil data asli
            ndcg_val = row['ndcg']
            diversity_val = row['diversity']
            arm_index_raw = row['mab_arm_index']

            if pd.isna(arm_index_raw):
                continue
                
            # --- PERBAIKAN: Konversi ke integer ---
            try:
                arm_index = int(arm_index_raw)
            except ValueError:
                print(f"Skipping invalid arm_index: {arm_index_raw}")
                continue
            # ------------------------------------
            
            # 2. Cek jika arm_index valid
            if arm_index is None or arm_index < 0 or arm_index >= test_mab.n_arms:
                continue
            
            # 3. Hitung reward BARU (PERBAIKAN URUTAN)
            reward_new = calculate_reward(ndcg_val, diversity_val, config['ndcg_w'], config['div_w'])

            # 4. Lakukan increment count secara manual (PERBAIKAN LOGIKA 0.0)
            # Simulasi ini harus meniru 'select_arm' (yg menambah count) SEBELUM 'update'
            test_mab.total_pulls += 1
            if 0 <= arm_index < test_mab.n_arms:
                test_mab.counts[arm_index] += 1
                # 5. Panggil update
                test_mab.update(arm_index, reward_new)
            else:
                # Batalkan penambahan pull jika arm tidak valid
                test_mab.total_pulls -= 1
                print(f"Skipping out-of-bounds arm_index: {arm_index}")

            # 5. Panggil update (sekarang reward_new sudah ada)
            test_mab.update(arm_index, reward_new)

        # Analisis hasil simulasi
        
        # PERBAIKAN: Gunakan .avg_rewards, bukan .rewards
        best_arm_idx = np.argmax(test_mab.avg_rewards)
        best_lambda = test_mab.arms[best_arm_idx]
        best_reward = test_mab.avg_rewards[best_arm_idx]
        
        # PERBAIKAN: Gunakan .avg_rewards, bukan .rewards
        avg_reward = np.mean(test_mab.avg_rewards)
        std_reward = np.std(test_mab.avg_rewards)
        
        result = {
            'Config': config['name'],
            'NDCG_Weight': config['ndcg_w'],
            'Diversity_Weight': config['div_w'],
            'Best_Lambda': best_lambda,
            'Best_Reward': best_reward,
            'Avg_Reward': avg_reward,
            'Std_Reward': std_reward
        }
        sensitivity_results.append(result)

        print(f"  Best Lambda: {best_lambda:.1f}")
        print(f"  Best Reward: {best_reward:.4f}")
        print(f"  Avg Reward: {avg_reward:.4f} (¬±{std_reward:.4f})")

    # --- Tampilkan Tabel Summary ---
    print("\n" + "="*60)
    print("üìä SUMMARY: SENSITIVITY ANALYSIS")
    print("="*60)
    
    sensitivity_df = pd.DataFrame(sensitivity_results)
    
    # Atur presisi desimal untuk tampilan
    pd.set_option('display.float_format', '{:.4f}'.format)
    
    # Tampilkan tabel yang rapi
    print(tabulate(sensitivity_df, headers='keys', tablefmt='psql', showindex=True))
    
    # Reset format display
    pd.reset_option('display.float_format')