<a href="https://colab.research.google.com/github/FaruqAlfa/Stock-Mid-Price-Prediction-Based-on-LOB-Data-Using-the-Transformer-Model/blob/main/Transformer_Preprocessing_data_and_Modeling.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


# **Kode Pre Processing**

## **Preprocessing menggunakan minmax scaller**

In [None]:
# Install packages jika belum ada
# !pip install pandas numpy matplotlib seaborn scikit-learn scipy torch

import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler, StandardScaler
from scipy import stats
import torch
from datetime import datetime
import os
import pickle
import json

# 1. Membaca dan Membersihkan Data Hilang dan Anomali
def load_and_clean_data(file_path):
    print("Tahap 1: Membaca dan membersihkan data...")

    df_raw = pd.read_csv(file_path)
    print(f"Data awal: {df_raw.shape[0]} baris, {df_raw.shape[1]} kolom")
    print("5 baris pertama:")
    print(df_raw.head())

    # Periksa dan tampilkan baris yang seluruhnya NaN
    nan_rows = df_raw[df_raw.isnull().all(axis=1)]
    print(f"Jumlah baris yang seluruhnya NaN: {len(nan_rows)}")
    if len(nan_rows) > 0:
        print("Contoh baris yang seluruhnya NaN:")
        print(nan_rows.head())

    # Baca ulang dengan skip_blank_lines=True untuk menghindari baris kosong
    df = pd.read_csv(file_path, skip_blank_lines=True)
    print(f"Dataset awal memiliki {df.shape[0]} baris dan {df.shape[1]} kolom.")

    # Hapus baris yang seluruhnya NaN
    df = df.dropna(how='all')
    print(f"Setelah menghapus baris kosong: {df.shape[0]} baris")

    # Konversi timestamp ke format datetime
    df['timestamp'] = pd.to_datetime(df['timestamp'])

    # Periksa nilai yang hilang di setiap kolom
    missing_values = df.isnull().sum()
    missing_cols = missing_values[missing_values > 0]
    if len(missing_cols) > 0:
        print(f"Jumlah nilai yang hilang di setiap kolom:\n{missing_cols}")
        # Interpolasi nilai yang hilang
        df = df.interpolate(method='linear')
        print("Nilai yang hilang telah diinterpolasi.")
    else:
        print("Tidak ada nilai yang hilang dalam dataset.")

    # Deteksi dan tangani anomali pada kolom harga
    price_columns = [col for col in df.columns if 'price' in col]
    anomaly_count = 0
    for col in price_columns:
        if col in df.columns:
            Q1 = df[col].quantile(0.25)
            Q3 = df[col].quantile(0.75)
            IQR = Q3 - Q1
            lower_bound = Q1 - 1.5 * IQR
            upper_bound = Q3 + 1.5 * IQR
            outliers = ((df[col] < lower_bound) | (df[col] > upper_bound))
            anomaly_count += outliers.sum()
            if outliers.sum() > 0:
                print(f"Ditemukan {outliers.sum()} anomali pada kolom {col}")
                df.loc[df[col] < lower_bound, col] = lower_bound
                df.loc[df[col] > upper_bound, col] = upper_bound

    if anomaly_count == 0:
        print("Tidak ditemukan anomali dalam kolom harga.")

    print(f"Dataset setelah dibersihkan memiliki {df.shape[0]} baris.")
    return df

# 2. Sinkronisasi Waktu
def synchronize_time(df, interval='1min'):
    print("\nTahap 2: Melakukan sinkronisasi waktu...")
    print(f"Data sebelum resampling: {df.shape[0]} baris")

    df = df.set_index('timestamp')

    price_cols = [col for col in df.columns if 'price' in col]
    volume_cols = [col for col in df.columns if 'volume' in col]
    agg_dict = {col: 'mean' for col in price_cols}
    agg_dict.update({col: 'sum' for col in volume_cols})
    for col in df.columns:
        if col not in agg_dict:
            agg_dict[col] = 'mean'

    df_resampled = df.resample(interval).agg(agg_dict)

    # Hapus baris yang memiliki NaN setelah resampling
    df_resampled = df_resampled.dropna()
    df_resampled.reset_index(inplace=True)

    print(f"Dataset setelah sinkronisasi waktu memiliki {df_resampled.shape[0]} baris.")
    return df_resampled

# 3 & 4. Hitung Mid-Price
def calculate_mid_price(df):
    print("\nTahap 3 & 4: Menghitung mid-price...")

    # Cari kolom bid dan offer yang tersedia
    bid_cols = [col for col in df.columns if 'bid_price' in col]
    offer_cols = [col for col in df.columns if 'offer_price' in col]

    print(f"Ditemukan {len(bid_cols)} kolom bid dan {len(offer_cols)} kolom offer")

    # Hitung mid-price untuk setiap level yang tersedia
    mid_price_cols = []
    for bid_col in bid_cols:
        # Ekstrak level dari nama kolom
        level = bid_col.split('_')[-1]
        offer_col = f'offer_price_{level}'

        if offer_col in df.columns:
            mid_col = f'mid_price_{level}'
            df[mid_col] = (df[bid_col] + df[offer_col]) / 2
            mid_price_cols.append(mid_col)

    # Hitung mid-price gabungan jika ada lebih dari satu level
    if len(mid_price_cols) > 0:
        df['mid_price'] = df[mid_price_cols].mean(axis=1)
        print(f"Mid-price telah ditambahkan untuk {len(mid_price_cols)} level dan mid-price gabungan.")
    else:
        print("Warning: Tidak ditemukan pasangan bid-offer price untuk menghitung mid-price")
        # Jika tidak ada kolom price, buat mid_price dummy
        df['mid_price'] = 0

    return df

# 5. Normalisasi dan Standarisasi
def normalize_standardize_data(df, method='standardize'):
    print("\nTahap 5: Melakukan normalisasi dan standarisasi...")
    print(f"Data sebelum scaling: {df.shape[0]} baris, {df.shape[1]} kolom")

    timestamp = df['timestamp'].copy()
    features = df.drop('timestamp', axis=1)

    if method == 'normalize':
        scaler = MinMaxScaler()
        features_scaled = scaler.fit_transform(features)
        print("Data telah dinormalisasi ke rentang [0, 1].")
    else:
        scaler = StandardScaler()
        features_scaled = scaler.fit_transform(features)
        print("Data telah distandarisasi dengan mean=0, std=1.")

    df_scaled = pd.DataFrame(features_scaled, columns=features.columns)
    df_scaled['timestamp'] = timestamp.values
    cols = df_scaled.columns.tolist()
    df_scaled = df_scaled[['timestamp'] + [col for col in cols if col != 'timestamp']]

    print(f"Data setelah scaling: {df_scaled.shape[0]} baris, {df_scaled.shape[1]} kolom")
    return df_scaled, scaler

# 6. Sliding Window - DIPERBAIKI
def create_sliding_window(df, window_size=10):
    print("\nTahap 6: Membuat time-series sliding window...")
    print(f"Data input: {df.shape[0]} baris")
    print(f"Window size: {window_size}")

    # Pastikan kita memiliki cukup data untuk sliding window
    if len(df) <= window_size:
        raise ValueError(f"Dataset terlalu kecil. Membutuhkan minimal {window_size + 1} baris, tetapi hanya memiliki {len(df)} baris.")

    # Simpan timestamp untuk referensi nanti
    timestamps = df['timestamp'].values

    # Ambil features (semua kolom kecuali timestamp dan mid_price)
    feature_cols = [col for col in df.columns if col not in ['timestamp', 'mid_price']]
    features = df[feature_cols].values
    targets = df['mid_price'].values

    print(f"Jumlah fitur: {len(feature_cols)}")
    print(f"Nama fitur: {feature_cols[:5]}...")  # Tampilkan 5 fitur pertama

    X, y, window_timestamps = [], [], []

    # Membuat sliding window
    for i in range(len(df) - window_size):
        X.append(features[i:i+window_size])
        y.append(targets[i+window_size])
        window_timestamps.append(timestamps[i+window_size])  # Timestamp untuk target

    X, y = np.array(X), np.array(y)
    window_timestamps = np.array(window_timestamps)

    print(f"Sliding window berhasil dibuat:")
    print(f"- Jumlah sampel: {len(X)}")
    print(f"- Shape X: {X.shape} (samples, timesteps, features)")
    print(f"- Shape y: {y.shape}")
    print(f"- Timestamps: {len(window_timestamps)}")

    return X, y, window_timestamps

# 7. Split Data - DIPERBAIKI
def split_data(X, y, timestamps, train_ratio=0.7, val_ratio=0.15):
    print("\nTahap 7: Membagi data...")
    n_samples = len(X)
    print(f"Total sampel: {n_samples}")

    if n_samples < 10:
        print("Warning: Dataset sangat kecil, menyesuaikan rasio pembagian...")
        train_ratio = 0.6
        val_ratio = 0.2

    train_size = int(n_samples * train_ratio)
    val_size = int(n_samples * val_ratio)

    # Pastikan minimal ada 1 sampel untuk setiap split
    train_size = max(1, train_size)
    val_size = max(1, min(val_size, n_samples - train_size - 1))
    test_size = n_samples - train_size - val_size

    print(f"Pembagian data: Train={train_size}, Val={val_size}, Test={test_size}")

    X_train = X[:train_size]
    y_train = y[:train_size]
    ts_train = timestamps[:train_size]

    X_val = X[train_size:train_size+val_size]
    y_val = y[train_size:train_size+val_size]
    ts_val = timestamps[train_size:train_size+val_size]

    X_test = X[train_size+val_size:]
    y_test = y[train_size+val_size:]
    ts_test = timestamps[train_size+val_size:]

    return (X_train, y_train, ts_train,
            X_val, y_val, ts_val,
            X_test, y_test, ts_test)

# 8. Format for Transformer
def format_for_transformer(X_train, y_train, X_val, y_val, X_test, y_test):
    print("\nTahap 8: Format untuk Transformer...")
    X_train_tensor = torch.FloatTensor(X_train)
    y_train_tensor = torch.FloatTensor(y_train)
    X_val_tensor = torch.FloatTensor(X_val)
    y_val_tensor = torch.FloatTensor(y_val)
    X_test_tensor = torch.FloatTensor(X_test)
    y_test_tensor = torch.FloatTensor(y_test)

    def get_positional_encoding(seq_len, d_model):
        position = np.arange(seq_len)[:, np.newaxis]
        div_term = np.exp(np.arange(0, d_model, 2) * -(np.log(10000.0) / d_model))
        pos_encoding = np.zeros((seq_len, d_model))
        pos_encoding[:, 0::2] = np.sin(position * div_term)
        if d_model > 1:
            pos_encoding[:, 1::2] = np.cos(position * div_term[:len(pos_encoding[0, 1::2])])
        return torch.FloatTensor(pos_encoding)

    seq_len, feature_dim = X_train.shape[1], X_train.shape[2]
    pos_encoding = get_positional_encoding(seq_len, feature_dim)

    # Tambahkan positional encoding
    for tensor in [X_train_tensor, X_val_tensor, X_test_tensor]:
        for i in range(tensor.size(0)):
            tensor[i] += pos_encoding

    print("Data siap untuk model Transformer.")
    return X_train_tensor, y_train_tensor, X_val_tensor, y_val_tensor, X_test_tensor, y_test_tensor

# Utility
def print_full_dataframe(df, info=None):
    if info:
        print(f"\n=== {info} ===")
    pd.set_option('display.max_rows', 20)  # Batasi tampilan untuk readability
    pd.set_option('display.max_columns', None)
    print(df)
    pd.reset_option('display.max_rows')
    pd.reset_option('display.max_columns')

# Fungsi untuk menyimpan hasil preprocessing dalam format CSV - DIPERBAIKI
def save_preprocessed_data_as_csv(results, output_dir, include_timestamps=True):
    """
    Menyimpan hasil preprocessing dalam format CSV.
    """
    print(f"\nMenyimpan hasil preprocessing sebagai CSV di {output_dir}...")

    # Buat direktori jika belum ada
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)

    # 1. Simpan DataFrame hasil preprocessing lengkap
    df_preprocessed = results['df_preprocessed']
    preprocessed_path = os.path.join(output_dir, 'preprocessed_data.csv')
    df_preprocessed.to_csv(preprocessed_path, index=False)
    print(f"Data hasil preprocessing tersimpan di '{preprocessed_path}'")

    # 2. Simpan data train, val, test dengan format yang lebih sederhana
    datasets = [
        ('train', results['X_train'], results['y_train'], results.get('ts_train')),
        ('val', results['X_val'], results['y_val'], results.get('ts_val')),
        ('test', results['X_test'], results['y_test'], results.get('ts_test'))
    ]

    file_paths = {}

    for split_name, X, y, timestamps in datasets:
        if len(X) == 0:
            print(f"Warning: {split_name} dataset kosong, melewati...")
            continue

        # Konversi tensor ke numpy jika perlu
        if torch.is_tensor(X):
            X_np = X.cpu().numpy()
            y_np = y.cpu().numpy()
        else:
            X_np = X
            y_np = y

        # Buat dataframe untuk split ini
        data_dict = {}

        # Tambahkan timestamp jika ada
        if timestamps is not None and include_timestamps:
            data_dict['timestamp'] = timestamps

        # Tambahkan target
        data_dict['target'] = y_np

        # Flatten sliding window features dengan nama yang lebih sederhana
        n_timesteps, n_features = X_np.shape[1], X_np.shape[2]

        for t in range(n_timesteps):
            for f in range(n_features):
                feature_name = f't{t}_f{f}'
                data_dict[feature_name] = X_np[:, t, f]

        # Buat DataFrame dan simpan
        df_split = pd.DataFrame(data_dict)
        file_path = os.path.join(output_dir, f'{split_name}_data.csv')
        df_split.to_csv(file_path, index=False)
        file_paths[f'{split_name}_path'] = file_path
        print(f"Data {split_name} tersimpan di '{file_path}' ({len(df_split)} baris)")

    # 3. Simpan metadata
    X_sample = results['X_train']
    if torch.is_tensor(X_sample):
        X_sample = X_sample.cpu().numpy()

    metadata = {
        'original_data_shape': results['df_original'].shape,
        'preprocessed_data_shape': results['df_preprocessed'].shape,
        'window_size': X_sample.shape[1] if len(X_sample) > 0 else 0,
        'n_features': X_sample.shape[2] if len(X_sample) > 0 else 0,
        'n_samples_train': len(results['X_train']),
        'n_samples_val': len(results['X_val']),
        'n_samples_test': len(results['X_test']),
        'scaling_method': 'standardize' if isinstance(results['scaler'], StandardScaler) else 'normalize',
        'feature_names': list(df_preprocessed.columns) if df_preprocessed is not None else None,
        'timestamp_included': include_timestamps,
        'creation_date': datetime.now().strftime('%Y-%m-%d %H:%M:%S')
    }

    # Simpan metadata sebagai JSON
    metadata_path = os.path.join(output_dir, 'metadata.json')
    with open(metadata_path, 'w') as f:
        json.dump(metadata, f, indent=4)
    print(f"Metadata tersimpan di '{metadata_path}'")

    # Simpan scaler
    scaler_path = os.path.join(output_dir, 'scaler.pkl')
    with open(scaler_path, 'wb') as f:
        pickle.dump(results['scaler'], f)
    print(f"Scaler tersimpan di '{scaler_path}'")

    file_paths.update({
        'preprocessed_path': preprocessed_path,
        'metadata_path': metadata_path,
        'scaler_path': scaler_path
    })

    return file_paths

# Pipeline - DIPERBAIKI
def preprocess_lob_data(file_path, window_size=60, scaling_method='standardize', time_interval='1min', show_full_data=False):
    print("==== MULAI PREPROCESSING DATASET LOB ====")

    # Tahap 1: Load and clean
    df_clean = load_and_clean_data(file_path)
    if show_full_data:
        print_full_dataframe(df_clean, "Data setelah dibersihkan")

    # Tahap 2: Synchronize time
    df_sync = synchronize_time(df_clean, interval=time_interval)
    if show_full_data:
        print_full_dataframe(df_sync, "Data setelah sinkronisasi waktu")

    # Tahap 3-4: Calculate mid-price
    df_mid = calculate_mid_price(df_sync)
    if show_full_data:
        mid_price_cols = [col for col in df_mid.columns if 'mid_price' in col]
        print_full_dataframe(df_mid[['timestamp'] + mid_price_cols], "Kolom mid-price")

    # Tahap 5: Scale data
    df_scaled, scaler = normalize_standardize_data(df_mid, method=scaling_method)
    if show_full_data:
        print_full_dataframe(df_scaled, "Data setelah scaling")

    # Tahap 6: Create sliding window
    X, y, timestamps = create_sliding_window(df_scaled, window_size=window_size)

    # Tahap 7: Split data
    (X_train, y_train, ts_train,
     X_val, y_val, ts_val,
     X_test, y_test, ts_test) = split_data(X, y, timestamps)

    # Tahap 8: Format for transformer
    X_train_tensor, y_train_tensor, X_val_tensor, y_val_tensor, X_test_tensor, y_test_tensor = format_for_transformer(
        X_train, y_train, X_val, y_val, X_test, y_test
    )

    print("\n==== RINGKASAN HASIL PREPROCESSING ====")
    print(f"Data asli: {df_clean.shape[0]} baris")
    print(f"Setelah sinkronisasi: {df_sync.shape[0]} baris")
    print(f"Setelah sliding window: {len(X)} sampel")
    print(f"Train: {len(X_train)}, Val: {len(X_val)}, Test: {len(X_test)}")
    print("==== PREPROCESSING SELESAI ====")

    return {
        'df_original': df_clean,
        'df_preprocessed': df_scaled,
        'scaler': scaler,
        'X_train': X_train_tensor,
        'y_train': y_train_tensor,
        'X_val': X_val_tensor,
        'y_val': y_val_tensor,
        'X_test': X_test_tensor,
        'y_test': y_test_tensor,
        'ts_train': ts_train,
        'ts_val': ts_val,
        'ts_test': ts_test
    }

# Pemanggilan jika dijalankan sebagai script utama
if __name__ == "__main__":
    # Untuk Google Colab
    # from google.colab import drive
    # drive.mount('/content/drive')
    # file_path = '/content/drive/MyDrive/SKRIPSI/Dataset_1/lob_data_BRPT.csv'

    # Untuk local testing
    file_path = '/content/drive/MyDrive/SKRIPSI/Dataset/lob_data_PTRO.csv'

    # Jalankan preprocessing dengan window size yang lebih kecil untuk dataset kecil
    results = preprocess_lob_data(
        file_path=file_path,
        window_size=10,  # Gunakan window size yang lebih kecil
        scaling_method='standardize',
        time_interval='1min',
        show_full_data=True  # Ubah ke False jika tidak ingin melihat detail
    )

    # Simpan hasil preprocessing sebagai CSV
    output_dir = '/content/drive/MyDrive/SKRIPSI/dataset_setelah_preprocessing/minMaxScaller/preprocessed_data_minmax_scaller_PTRO' # Sesuaikan path
    csv_files = save_preprocessed_data_as_csv(results, output_dir)

    # Informasi file yang telah disimpan
    print("\nFile CSV yang telah disimpan:")
    for file_type, path in csv_files.items():
        print(f"- {file_type}: {path}")

    print("\nPreprocessing dan penyimpanan selesai.")

==== MULAI PREPROCESSING DATASET LOB ====
Tahap 1: Membaca dan membersihkan data...
Data awal: 1315 baris, 45 kolom
5 baris pertama:
             timestamp  last_price  percentage_change  high_price  low_price  \
0  2025-05-19 13:00:00        3170              -0.94        3240       3160   
1  2025-05-19 13:01:00        3180              -0.62        3240       3160   
2  2025-05-19 13:02:00        3180              -0.62        3240       3160   
3  2025-05-19 13:03:00        3180              -0.62        3240       3160   
4  2025-05-19 13:04:00        3180              -0.62        3240       3160   

   bid_price_1  bid_volume_1  offer_price_1  offer_volume_1  bid_price_2  ...  \
0         3170        536200           3180          129500         3160  ...   
1         3170        549100           3180          125200         3160  ...   
2         3170        540900           3180           96500         3160  ...   
3         3170        557700           3180           76100   

# **Modelling**

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
import numpy as np
import matplotlib.pyplot as plt
import time
import math
import pandas as pd
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
import copy
import os
import json
import pickle
import re
import warnings
warnings.filterwarnings('ignore')

# Setup device
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Using device: {device}")

# ================================
# 1. SIMPLIFIED DATA LOADER
# ================================

def load_preprocessed_csv_data(data_dir, debug=True):
    """
    Simplified data loader focusing on core functionality
    """
    print(f"Loading preprocessed data from: {data_dir}")

    # Load metadata
    metadata_path = os.path.join(data_dir, 'metadata.json')
    if not os.path.exists(metadata_path):
        raise FileNotFoundError(f"Metadata file not found at {metadata_path}")

    with open(metadata_path, 'r') as f:
        metadata = json.load(f)

    print(f"Metadata loaded: {list(metadata.keys())}")

    # Load scaler
    scaler_path = os.path.join(data_dir, 'scaler.pkl')
    scaler = None
    if os.path.exists(scaler_path):
        with open(scaler_path, 'rb') as f:
            scaler = pickle.load(f)
        print("Scaler loaded successfully")

    # Load data files with flexible naming
    datasets = {}
    file_mappings = {
        'train': ['train_data.csv'],
        'val': ['val_data.csv', 'validation_data.csv'],
        'test': ['test_data.csv']
    }

    for split, possible_names in file_mappings.items():
        found = False
        for file_name in possible_names:
            file_path = os.path.join(data_dir, file_name)
            if os.path.exists(file_path):
                datasets[split] = pd.read_csv(file_path)
                print(f"{split.capitalize()} data: {len(datasets[split])} samples from {file_name}")
                found = True
                break

        if not found:
            raise FileNotFoundError(f"{split.capitalize()} data not found. Tried: {possible_names}")

    # Extract features and targets
    def extract_features_targets(df, debug_name=""):
        # Find target column
        target_col = None
        if 'target' in df.columns:
            target_col = 'target'
        else:
            target_candidates = [col for col in df.columns if any(x in col.lower() for x in ['mid_price', 'target', 'price'])]
            if target_candidates:
                target_col = target_candidates[0]
                print(f"Using '{target_col}' as target for {debug_name}")

        if target_col is None:
            raise ValueError(f"No target column found in {debug_name} data")

        y = df[target_col].values

        # Extract features (look for time-feature pattern)
        feature_pattern = re.compile(r't(\d+)_f(\d+)')
        feature_cols = []
        timesteps = set()
        features = set()

        for col in df.columns:
            match = feature_pattern.search(col)
            if match:
                t, f = int(match.group(1)), int(match.group(2))
                timesteps.add(t)
                features.add(f)
                feature_cols.append((col, t, f))

        if not feature_cols:
            # Fallback: use all non-target columns
            exclude_cols = {'target', 'timestamp', 'index', target_col}
            feature_cols = [(col, 0, i) for i, col in enumerate(df.columns)
                           if col not in exclude_cols and not any(x in col.lower() for x in ['timestamp', 'index'])]
            print(f"Warning: No time-feature pattern found, using {len(feature_cols)} columns as features")
            timesteps = {0}
            features = set(range(len(feature_cols)))

        # Determine dimensions
        n_timesteps = len(timesteps)
        n_features = len(features)
        n_samples = len(df)

        if debug:
            print(f"{debug_name} - Timesteps: {sorted(timesteps)}")
            print(f"{debug_name} - Features: {sorted(features)}")
            print(f"{debug_name} - Shape will be: ({n_samples}, {n_timesteps}, {n_features})")

        # Create 3D array
        X = np.zeros((n_samples, n_timesteps, n_features))

        # Fill array
        for col_name, t, f in feature_cols:
            if t in timesteps and f in features:
                t_idx = sorted(timesteps).index(t)
                f_idx = sorted(features).index(f)

                # Ensure column exists and has valid data
                if col_name in df.columns:
                    col_data = df[col_name].values
                    # Replace inf values with finite numbers
                    col_data = np.where(np.isinf(col_data), 0, col_data)
                    X[:, t_idx, f_idx] = col_data

        # Handle NaN and inf values
        X = np.nan_to_num(X, nan=0.0, posinf=1e6, neginf=-1e6)

        # Clip extreme values to prevent gradient issues
        X = np.clip(X, -1e6, 1e6)
        y = np.clip(y, -1e6, 1e6)

        # Clean NaN/inf in targets
        if np.any(np.isnan(y)) or np.any(np.isinf(y)):
            print(f"Warning: Cleaning NaN/inf in {debug_name} targets")
            y = np.nan_to_num(y, nan=0.0, posinf=1e6, neginf=-1e6)

        return X, y, n_timesteps, n_features

    # Process all datasets
    X_train, y_train, n_timesteps, n_features = extract_features_targets(datasets['train'], "train")
    X_val, y_val, _, _ = extract_features_targets(datasets['val'], "validation")
    X_test, y_test, _, _ = extract_features_targets(datasets['test'], "test")

    # Data validation
    print(f"\nData validation:")
    print(f"Train: X{X_train.shape}, y{y_train.shape}")
    print(f"Val:   X{X_val.shape}, y{y_val.shape}")
    print(f"Test:  X{X_test.shape}, y{y_test.shape}")

    # Convert to tensors
    data = {
        'X_train': torch.FloatTensor(X_train),
        'y_train': torch.FloatTensor(y_train),
        'X_val': torch.FloatTensor(X_val),
        'y_val': torch.FloatTensor(y_val),
        'X_test': torch.FloatTensor(X_test),
        'y_test': torch.FloatTensor(y_test),
        'scaler': scaler,
        'n_timesteps': n_timesteps,
        'n_features': n_features
    }

    return data

# ================================
# 2. SIMPLIFIED TRANSFORMER MODEL
# ================================

class SimplePositionalEncoding(nn.Module):
    """
    Standard positional encoding
    """
    def __init__(self, d_model, max_len=1000, dropout=0.1):
        super(SimplePositionalEncoding, self).__init__()
        self.dropout = nn.Dropout(p=dropout)

        pe = torch.zeros(max_len, d_model)
        position = torch.arange(0, max_len, dtype=torch.float).unsqueeze(1)
        div_term = torch.exp(torch.arange(0, d_model, 2).float() * (-math.log(10000.0) / d_model))

        pe[:, 0::2] = torch.sin(position * div_term)
        if d_model % 2 == 1:
            pe[:, 1::2] = torch.cos(position * div_term[:-1])
        else:
            pe[:, 1::2] = torch.cos(position * div_term)
        pe = pe.unsqueeze(0)

        self.register_buffer('pe', pe)

    def forward(self, x):
        seq_len = x.size(1)
        return self.dropout(x + self.pe[:, :seq_len, :])

class SimpleLOBTransformer(nn.Module):
    """
    Simplified LOB Transformer focusing on core performance
    """
    def __init__(self, input_dim, d_model=128, n_heads=8, n_layers=4, d_ff=512,
                 dropout=0.1, max_seq_len=100):
        super(SimpleLOBTransformer, self).__init__()

        self.input_dim = input_dim
        self.d_model = d_model

        # Input processing
        self.input_norm = nn.LayerNorm(input_dim)
        self.input_projection = nn.Linear(input_dim, d_model)

        # Positional encoding
        self.pos_encoding = SimplePositionalEncoding(d_model, max_seq_len, dropout)

        # Standard transformer encoder
        encoder_layer = nn.TransformerEncoderLayer(
            d_model=d_model,
            nhead=n_heads,
            dim_feedforward=d_ff,
            dropout=dropout,
            batch_first=True,
            activation='gelu'
        )

        self.transformer_encoder = nn.TransformerEncoder(encoder_layer, num_layers=n_layers)

        # Output layers
        self.global_pool = nn.AdaptiveAvgPool1d(1)
        self.output_head = nn.Sequential(
            nn.Linear(d_model, d_model // 2),
            nn.GELU(),
            nn.Dropout(dropout),
            nn.Linear(d_model // 2, d_model // 4),
            nn.GELU(),
            nn.Dropout(dropout),
            nn.Linear(d_model // 4, 1)
        )

        # Initialize weights
        self.apply(self._init_weights)

    def _init_weights(self, module):
        if isinstance(module, nn.Linear):
            torch.nn.init.xavier_uniform_(module.weight)
            if module.bias is not None:
                torch.nn.init.zeros_(module.bias)
        elif isinstance(module, nn.LayerNorm):
            torch.nn.init.ones_(module.weight)
            torch.nn.init.zeros_(module.bias)

    def forward(self, x, mask=None):
        # Input processing
        x = self.input_norm(x)
        x = self.input_projection(x)

        # Add positional encoding
        x = self.pos_encoding(x)

        # Transformer encoding
        x = self.transformer_encoder(x, src_key_padding_mask=mask)

        # Global average pooling
        x = x.transpose(1, 2)  # [batch, features, seq_len]
        x = self.global_pool(x).squeeze(-1)  # [batch, features]

        # Output prediction
        output = self.output_head(x).squeeze(-1)

        return output

# ================================
# 3. SIMPLIFIED TRAINER
# ================================

class SimpleTrainer:
    """
    Simplified training system focusing on core metrics
    """
    def __init__(self, model, train_loader, val_loader, test_loader, config):
        self.model = model.to(device)
        self.train_loader = train_loader
        self.val_loader = val_loader
        self.test_loader = test_loader
        self.config = config

        # Loss function
        self.criterion = nn.MSELoss()

        # Optimizer
        self.optimizer = optim.AdamW(
            model.parameters(),
            lr=config['learning_rate'],
            weight_decay=config['weight_decay']
        )

        # Learning rate scheduler
        self.scheduler = optim.lr_scheduler.ReduceLROnPlateau(
            self.optimizer,
            mode='min',
            factor=0.5,
            patience=5,
            verbose=True
        )

        # Tracking
        self.train_losses = []
        self.val_losses = []
        self.best_val_loss = float('inf')
        self.best_model_state = None
        self.patience_counter = 0

    def train_epoch(self):
        self.model.train()
        total_loss = 0
        all_preds = []
        all_targets = []
        valid_batches = 0

        for batch_idx, (X, y) in enumerate(self.train_loader):
            X, y = X.to(device), y.to(device)

            # Skip problematic batches
            if torch.isnan(X).any() or torch.isnan(y).any():
                continue

            self.optimizer.zero_grad()

            # Forward pass
            outputs = self.model(X)

            # Skip if output contains NaN
            if torch.isnan(outputs).any():
                continue

            loss = self.criterion(outputs, y)

            # Skip if loss is NaN
            if torch.isnan(loss):
                continue

            # Backward pass
            try:
                loss.backward()
                torch.nn.utils.clip_grad_norm_(self.model.parameters(), max_norm=1.0)
                self.optimizer.step()

                total_loss += loss.item()
                all_preds.extend(outputs.detach().cpu().numpy())
                all_targets.extend(y.detach().cpu().numpy())
                valid_batches += 1

            except RuntimeError as e:
                print(f"Skipping batch {batch_idx} due to error: {e}")
                continue

        # Calculate metrics
        avg_loss = total_loss / max(1, valid_batches)
        return avg_loss, all_preds, all_targets

    def validate(self):
        self.model.eval()
        total_loss = 0
        all_preds = []
        all_targets = []
        valid_batches = 0

        with torch.no_grad():
            for X, y in self.val_loader:
                X, y = X.to(device), y.to(device)

                if torch.isnan(X).any() or torch.isnan(y).any():
                    continue

                outputs = self.model(X)

                if torch.isnan(outputs).any():
                    continue

                loss = self.criterion(outputs, y)

                if torch.isnan(loss):
                    continue

                total_loss += loss.item()
                all_preds.extend(outputs.cpu().numpy())
                all_targets.extend(y.cpu().numpy())
                valid_batches += 1

        avg_loss = total_loss / max(1, valid_batches)
        return avg_loss, all_preds, all_targets

    def train(self):
        print("Starting training...")
        start_time = time.time()

        for epoch in range(self.config['num_epochs']):
            # Training
            train_loss, train_preds, train_targets = self.train_epoch()
            self.train_losses.append(train_loss)

            # Validation
            val_loss, val_preds, val_targets = self.validate()
            self.val_losses.append(val_loss)

            # Learning rate scheduling
            self.scheduler.step(val_loss)

            # Calculate R¬≤ for monitoring
            train_r2 = r2_score(train_targets, train_preds) if len(train_targets) > 0 else 0
            val_r2 = r2_score(val_targets, val_preds) if len(val_targets) > 0 else 0

            # Early stopping check
            if val_loss < self.best_val_loss:
                self.best_val_loss = val_loss
                self.best_model_state = copy.deepcopy(self.model.state_dict())
                self.patience_counter = 0
            else:
                self.patience_counter += 1

            # Print progress
            print(f"Epoch {epoch+1:3d}/{self.config['num_epochs']} | "
                  f"Train Loss: {train_loss:.6f} | Train R¬≤: {train_r2:.4f} | "
                  f"Val Loss: {val_loss:.6f} | Val R¬≤: {val_r2:.4f}")

            # Early stopping
            if self.patience_counter >= self.config['patience']:
                print(f"Early stopping at epoch {epoch+1}")
                break

        # Load best model
        if self.best_model_state is not None:
            self.model.load_state_dict(self.best_model_state)

        training_time = time.time() - start_time
        print(f"Training completed in {training_time/60:.2f} minutes")

        return self.final_evaluation()

    def evaluate_loader(self, loader):
        self.model.eval()
        total_loss = 0
        all_preds = []
        all_targets = []
        valid_batches = 0

        with torch.no_grad():
            for X, y in loader:
                X, y = X.to(device), y.to(device)

                if torch.isnan(X).any() or torch.isnan(y).any():
                    continue

                outputs = self.model(X)

                if torch.isnan(outputs).any():
                    continue

                loss = self.criterion(outputs, y)

                if not torch.isnan(loss):
                    total_loss += loss.item()
                    valid_batches += 1

                all_preds.extend(outputs.cpu().numpy())
                all_targets.extend(y.cpu().numpy())

        avg_loss = total_loss / max(1, valid_batches)
        return avg_loss, all_preds, all_targets

    def calculate_core_metrics(self, y_true, y_pred):
        """
        Calculate only core metrics: MSE, RMSE, MAE, R¬≤
        """
        if len(y_true) == 0 or len(y_pred) == 0:
            return {'MSE': float('nan'), 'RMSE': float('nan'), 'MAE': float('nan'), 'R2': float('nan')}

        y_true, y_pred = np.array(y_true), np.array(y_pred)

        mse = mean_squared_error(y_true, y_pred)
        rmse = np.sqrt(mse)
        mae = mean_absolute_error(y_true, y_pred)
        r2 = r2_score(y_true, y_pred)
        acc = regression_accuracy(y_true, y_pred, tolerance=0.02)

        return {
            'targets': y_true,
            'predictions': y_pred,
            'metrics': {
                'MSE': mse,
                'RMSE': rmse,
                'MAE': mae,
                'R2': r2,
                'Accuracy': acc
            }
        }


    def final_evaluation(self):
        """
        Final evaluation with core metrics only
        """
        print("\n" + "="*60)
        print("FINAL EVALUATION - CORE METRICS")
        print("="*60)

        results = {}
        for name, loader in [('train', self.train_loader), ('val', self.val_loader), ('test', self.test_loader)]:
            loss, preds, targets = self.evaluate_loader(loader)
            metrics_result = self.calculate_core_metrics(targets, preds)
            metrics = metrics_result['metrics']

            results[name] = {
                'loss': loss,
                'predictions': preds,
                'targets': targets,
                'metrics': metrics
            }

            print(f"{name.capitalize():5} | MSE: {metrics['MSE']:.6f} | RMSE: {metrics['RMSE']:.6f} | "
                  f"MAE: {metrics['MAE']:.6f} | R¬≤: {metrics['R2']:.4f} | Accuracy: {metrics['Accuracy']:.2%}")

        return results

def print_formatted_metrics(results):
    """
    Format dan tampilkan metrik evaluasi model dalam bentuk tabel rapi
    """
    print("\n" + "="*66)
    print("FINAL METRICS SUMMARY")
    print("="*66)
    print(f"{'Dataset':<10} | {'MSE':>8} | {'RMSE':>8} | {'MAE':>8} | {'R¬≤':>8} | {'Accuracy':>9}")
    print("-" * 66)

    for name in ['train', 'val', 'test']:
        metrics = results[name]['metrics']
        mse = metrics['MSE']
        rmse = metrics['RMSE']
        mae = metrics['MAE']
        r2 = metrics['R2']
        acc = metrics['Accuracy']

        print(f"{name.capitalize():<10} | {mse:8.5f} | {rmse:8.4f} | {mae:8.4f} | {r2:8.4f} | {acc*100:8.2f}%")

    print("="*66)

# ================================
# 4. SIMPLE VISUALIZATION
# ================================

# def create_simple_visualizations(trainer, results, save_dir=None):
#     """
#     Create simple visualizations focusing on core metrics
#     """
#     # 1. Training curves
#     fig, axes = plt.subplots(1, 2, figsize=(12, 5))
#     fig.suptitle('Training Progress', fontsize=14, fontweight='bold')

#     epochs = range(1, len(trainer.train_losses) + 1)

#     # Loss curves
#     axes[0].plot(epochs, trainer.train_losses, 'b-', label='Train Loss', linewidth=2)
#     axes[0].plot(epochs, trainer.val_losses, 'r-', label='Val Loss', linewidth=2)
#     axes[0].set_title('Loss Curves')
#     axes[0].set_xlabel('Epoch')
#     axes[0].set_ylabel('Loss')
#     axes[0].legend()
#     axes[0].grid(True, alpha=0.3)

#     # Core metrics comparison
#     splits = ['Train', 'Val', 'Test']
#     r2_scores = [results[s.lower()]['metrics']['R2'] for s in splits]

#     bars = axes[1].bar(splits, r2_scores, color=['blue', 'orange', 'red'], alpha=0.7)
#     axes[1].set_title('R¬≤ Scores')
#     axes[1].set_ylabel('R¬≤ Score')
#     axes[1].set_ylim(0, 1)

#     for bar, score in zip(bars, r2_scores):
#         axes[1].text(bar.get_x() + bar.get_width()/2, bar.get_height() + 0.01,
#                     f'{score:.3f}', ha='center', va='bottom', fontweight='bold')

#     plt.tight_layout()
#     if save_dir:
#         plt.savefig(os.path.join(save_dir, 'training_results.png'), dpi=300, bbox_inches='tight')
#     # plt.show()
#     plt.close()

#     # 2. Predictions scatter plot
#     fig, axes = plt.subplots(1, 3, figsize=(15, 5))
#     fig.suptitle('Predictions vs Actual', fontsize=14, fontweight='bold')

#     for i, (split, data) in enumerate(results.items()):
#         targets = np.array(data['targets'])
#         predictions = np.array(data['predictions'])

#         if len(targets) == 0:
#             continue

#         axes[i].scatter(targets, predictions, alpha=0.6, s=20)
#         min_val, max_val = min(targets.min(), predictions.min()), max(targets.max(), predictions.max())
#         axes[i].plot([min_val, max_val], [min_val, max_val], 'r--', linewidth=2)
#         axes[i].set_xlabel('Actual')
#         axes[i].set_ylabel('Predicted')
#         axes[i].set_title(f'{split.capitalize()} (R¬≤={data["metrics"]["R2"]:.3f})')
#         axes[i].grid(True, alpha=0.3)

#     plt.tight_layout()
#     if save_dir:
#         plt.savefig(os.path.join(save_dir, 'predictions_scatter.png'), dpi=300, bbox_inches='tight')
#     # plt.show()
#     plt.close()

def create_simple_visualizations(trainer, results, save_dir=None):
    """
    Create and save simple visualizations focusing on core metrics.
    This function avoids display and directly saves images.
    """
    if save_dir is None:
        print("[‚ùó] Save directory not provided. Skipping visualizations.")
        return

    os.makedirs(save_dir, exist_ok=True)
    plt.ioff()  # Turn off interactive mode

    # === 1. Training Loss & R¬≤ Bar Plot ===
    # Visualisasi Loss
    try:
        fig1 = plt.figure(figsize=(6, 4), dpi=100)
        plt.plot(trainer.train_losses, label='Train Loss', linewidth=2)
        plt.plot(trainer.val_losses, label='Val Loss', linewidth=2)
        plt.title('Training & Validation Loss')
        plt.xlabel('Epoch')
        plt.ylabel('Loss')
        plt.grid(True, alpha=0.3)
        plt.legend()
        save_path1 = os.path.join(save_dir, 'loss_plot.png')
        fig1.savefig(save_path1, bbox_inches='tight')
        print(f"[‚úî] Saved loss plot to: {save_path1}")
        plt.close(fig1)
    except Exception as e:
        print(f"[‚ùå] Failed to save loss plot: {e}")
        plt.close()

    # Visualisasi R¬≤
    try:
        fig2 = plt.figure(figsize=(5, 4), dpi=100)
        r2_scores = [results[s.lower()]['metrics']['R2'] for s in ['Train', 'Val', 'Test']]
        plt.bar(['Train', 'Val', 'Test'], r2_scores, color=['blue', 'orange', 'red'], alpha=0.7)
        for i, score in enumerate(r2_scores):
            plt.text(i, score + 0.01, f'{score:.3f}', ha='center', fontweight='bold')
        plt.title('R¬≤ Scores')
        plt.ylim(min(0, min(r2_scores)), 1)
        plt.ylabel('R¬≤ Score')
        save_path2 = os.path.join(save_dir, 'r2_scores.png')
        fig2.savefig(save_path2, bbox_inches='tight')
        print(f"[‚úî] Saved R¬≤ plot to: {save_path2}")
        plt.close(fig2)
    except Exception as e:
        print(f"[‚ùå] Failed to save R¬≤ plot: {e}")
        plt.close()


    # === 2. Predictions vs Actual Scatter Plots ===
    try:
        fig, axes = plt.subplots(1, 3, figsize=(15, 5))
        fig.suptitle('Predictions vs Actual', fontsize=14, fontweight='bold')

        for i, (split, data) in enumerate(results.items()):
            targets = np.array(data['targets'])
            predictions = np.array(data['predictions'])

            if len(targets) == 0 or len(predictions) == 0:
                axes[i].set_title(f"{split.capitalize()} (No Data)")
                axes[i].axis('off')
                continue

            axes[i].scatter(targets, predictions, alpha=0.6, s=20)
            min_val = min(targets.min(), predictions.min())
            max_val = max(targets.max(), predictions.max())
            axes[i].plot([min_val, max_val], [min_val, max_val], 'r--', linewidth=2)
            axes[i].set_xlabel('Actual')
            axes[i].set_ylabel('Predicted')
            axes[i].set_title(f'{split.capitalize()} (R¬≤={data["metrics"]["R2"]:.3f})')
            axes[i].grid(True, alpha=0.3)

        plt.tight_layout()
        save_path2 = os.path.join(save_dir, 'predictions_scatter.png')
        plt.savefig(save_path2, dpi=300, bbox_inches='tight')
        print(f"[‚úî] Saved prediction scatter plot to: {save_path2}")
        plt.close()
    except Exception as e:
        print(f"[‚ùå] Failed to create/save scatter plot: {e}")
        plt.close()


# ================================
# 5. CONFIGURATION
# ================================

def get_simple_config(data_info, risk_level='medium'):
    """
    Get simple configuration based on data characteristics
    """
    n_samples = data_info.get('n_train_samples', 1000)
    n_features = data_info.get('n_features', 10)
    n_timesteps = data_info.get('n_timesteps', 10)

    total_features = n_features * n_timesteps
    sample_feature_ratio = n_samples / total_features

    print(f"Data analysis:")
    print(f"  Samples: {n_samples}")
    print(f"  Features per timestep: {n_features}")
    print(f"  Timesteps: {n_timesteps}")
    print(f"  Sample/Feature ratio: {sample_feature_ratio:.2f}")

    # Simple configurations
    configs = {
        'low': {
            'd_model': 256,
            'n_heads': 8,
            'n_layers': 6,
            'd_ff': 1024,
            'dropout': 0.5,
            'learning_rate': 0.001,
            'weight_decay': 1e-5,
            'batch_size': 32,
            'num_epochs': 15,
            'patience': 5
        },
        'medium': {
            'd_model': 128,
            'n_heads': 8,
            'n_layers': 4,
            'd_ff': 512,
            'dropout': 0.5,
            'learning_rate': 0.0005,
            'weight_decay': 1e-4,
            'batch_size': 16,
            'num_epochs': 15,
            'patience': 5
        },
        'high': {
           'd_model': 32,
            'n_heads': 2,
            'n_layers': 2,
            'd_ff': 128,
            'dropout': 0.5,
            'learning_rate': 0.0001,
            'weight_decay': 1e-3,
            'batch_size': 8,
            'num_epochs': 30,
            'patience': 5
        }
    }

    # Auto-detect risk level
    if sample_feature_ratio < 5:
        auto_risk = 'high'
    elif sample_feature_ratio < 15:
        auto_risk = 'medium'
    else:
        auto_risk = 'low'

    if risk_level == 'auto':
        risk_level = auto_risk

    config = configs[risk_level].copy()
    config['batch_size'] = min(config['batch_size'], max(4, n_samples // 10))

    print(f"Using '{risk_level}' risk configuration")

    return config

# ================================
# 6. MAIN PIPELINE
# ================================

def run_simple_lob_transformer(data_dir, risk_level='auto', save_dir=None):
    """
    Main pipeline for simple LOB transformer training
    """
    print("="*80)
    print("SIMPLE LOB TRANSFORMER - CORE METRICS ONLY")
    print("="*80)

    # Create save directory
    if save_dir is None:
        save_dir = '/kaggle/working/dataset_setelah_modeling_no_dropout/TPIA'
    os.makedirs(save_dir, exist_ok=True)

    # 1. Load data
    print("\n1. Loading data...")
    data = load_preprocessed_csv_data(data_dir, debug=True)

    # 2. Get configuration
    print("\n2. Configuration...")
    data_info = {
        'n_train_samples': len(data['X_train']),
        'n_features': data['n_features'],
        'n_timesteps': data['n_timesteps']
    }

    config = get_simple_config(data_info, risk_level)

    print(f"\nFinal configuration:")
    for key, value in config.items():
        print(f"  {key}: {value}")

    # 3. Create data loaders
    print("\n3. Creating data loaders...")
    train_dataset = TensorDataset(data['X_train'], data['y_train'])
    val_dataset = TensorDataset(data['X_val'], data['y_val'])
    test_dataset = TensorDataset(data['X_test'], data['y_test'])

    train_loader = DataLoader(train_dataset, batch_size=config['batch_size'], shuffle=True)
    val_loader = DataLoader(val_dataset, batch_size=config['batch_size'], shuffle=False)
    test_loader = DataLoader(test_dataset, batch_size=config['batch_size'], shuffle=False)

    print(f"  Train batches: {len(train_loader)}")
    print(f"  Val batches: {len(val_loader)}")
    print(f"  Test batches: {len(test_loader)}")

    # 4. Create model
    print("\n4. Creating model...")
    model = SimpleLOBTransformer(
        input_dim=data['n_features'],
        d_model=config['d_model'],
        n_heads=config['n_heads'],
        n_layers=config['n_layers'],
        d_ff=config['d_ff'],
        dropout=config['dropout'],
        max_seq_len=data['n_timesteps']
    )

    total_params = sum(p.numel() for p in model.parameters())
    print(f"  Total parameters: {total_params:,}")

    # 5. Train model
    print("\n5. Training...")
    trainer = SimpleTrainer(model, train_loader, val_loader, test_loader, config)
    results = trainer.train()
    print_formatted_metrics(results)


    # 6. Save model
    print("\n6. Saving model...")
    model_path = os.path.join(save_dir, 'simple_lob_transformer.pth')
    torch.save({
        'model_state_dict': model.state_dict(),
        'config': config,
        'data_info': data_info,
        'model_architecture': {
            'input_dim': data['n_features'],
            'd_model': config['d_model'],
            'n_heads': config['n_heads'],
            'n_layers': config['n_layers'],
            'd_ff': config['d_ff'],
            'dropout': config['dropout'],
            'max_seq_len': data['n_timesteps']
        }
    }, model_path)

    if data['scaler'] is not None:
        scaler_path = os.path.join(save_dir, 'model_scaler.pkl')
        with open(scaler_path, 'wb') as f:
            pickle.dump(data['scaler'], f)

    print(f"Model saved to: {model_path}")

    # 7. Create visualizations
    print("\n7. Creating visualizations...")
    create_simple_visualizations(trainer, results, save_dir)

    # 8. Final summary
    print("\n" + "="*80)
    print("FINAL PERFORMANCE SUMMARY")
    print("="*80)

    test_metrics = results['test']['metrics']
    print(f"\nTest Set Performance:")
    print(f"  MSE:  {test_metrics['MSE']:.6f}")
    print(f"  RMSE: {test_metrics['RMSE']:.6f}")
    print(f"  MAE:  {test_metrics['MAE']:.6f}")
    print(f"  R¬≤:   {test_metrics['R2']:.4f}")
    print(f"Accuracy: {test_metrics['Accuracy']:.2%}")  # Ini oke, karena .2% akan dikali 100


    # Overfitting analysis
    train_r2 = results['train']['metrics']['R2']
    val_r2 = results['val']['metrics']['R2']
    test_r2 = results['test']['metrics']['R2']

    print(f"\nGeneralization Analysis:")
    print(f"  Train R¬≤: {train_r2:.4f}")
    print(f"  Val R¬≤:   {val_r2:.4f}")
    print(f"  Test R¬≤:  {test_r2:.4f}")

    overfitting_gap = train_r2 - val_r2
    if overfitting_gap > 0.1:
        print(f"  ‚ö†Ô∏è  Overfitting detected (gap: {overfitting_gap:.4f})")
    elif overfitting_gap < -0.05:
        print(f"  ‚ö†Ô∏è  Underfitting detected (gap: {overfitting_gap:.4f})")
    else:
        print(f"  ‚úÖ Good generalization (gap: {overfitting_gap:.4f})")

    return model, trainer, results

# ================================
# 7. PREDICTION INTERFACE
# ================================

class SimpleLOBPredictor:
    """
    Simple prediction interface for the LOB model
    """
    def __init__(self, model_path, scaler_path=None):
        # Load model
        checkpoint = torch.load(model_path, map_location=device)

        # Get model architecture
        arch = checkpoint['model_architecture']
        self.model = SimpleLOBTransformer(
            input_dim=arch['input_dim'],
            d_model=arch['d_model'],
            n_heads=arch['n_heads'],
            n_layers=arch['n_layers'],
            d_ff=arch['d_ff'],
            dropout=arch['dropout'],
            max_seq_len=arch['max_seq_len']
        )

        self.model.load_state_dict(checkpoint['model_state_dict'])
        self.model.to(device)
        self.model.eval()

        # Load scaler
        self.scaler = None
        if scaler_path and os.path.exists(scaler_path):
            with open(scaler_path, 'rb') as f:
                self.scaler = pickle.load(f)

        self.config = checkpoint['config']
        print(f"Model loaded successfully!")
        print(f"Input shape expected: (batch_size, {arch['max_seq_len']}, {arch['input_dim']})")

    def predict(self, X):
        """
        Make predictions on new data
        """
        if not torch.is_tensor(X):
            X = torch.FloatTensor(X)

        if len(X.shape) == 2:
            X = X.unsqueeze(0)

        X = X.to(device)

        self.model.eval()
        with torch.no_grad():
            prediction = self.model(X)
            return prediction.cpu().numpy().squeeze()

    def predict_next_price(self, lob_sequence):
        """
        Predict the next mid-price given a LOB sequence
        """
        prediction = self.predict(lob_sequence)
        return float(prediction)

# ================================
# 8. UTILITY FUNCTIONS
# ================================

def compare_models_performance(results_dict):
    """
    Compare performance of multiple models
    """
    print("\n" + "="*80)
    print("MODEL PERFORMANCE COMPARISON")
    print("="*80)

    metrics_df = pd.DataFrame()

    for model_name, results in results_dict.items():
        test_metrics = results['test']['metrics']
        metrics_df[model_name] = [
            test_metrics['MSE'],
            test_metrics['RMSE'],
            test_metrics['MAE'],
            test_metrics['R2'],
            test_metrics['Accuracy']
        ]

    metrics_df.index = ['MSE', 'RMSE', 'MAE', 'R¬≤', 'Accuracy']
    print(metrics_df.round(6))

    return metrics_df

def save_results_to_csv(results, save_path):
    """
    Save predictions and targets to CSV for further analysis
    """
    test_data = results['test']

    df = pd.DataFrame({
        'actual': test_data['targets'],
        'predicted': test_data['predictions'],
        'error': np.array(test_data['targets']) - np.array(test_data['predictions'])
    })

    df.to_csv(save_path, index=False)
    print(f"Results saved to: {save_path}")

def calculate_percentage_improvement(baseline_r2, new_r2):
    """
    Calculate percentage improvement in R¬≤ score
    """
    if baseline_r2 <= 0:
        return float('inf') if new_r2 > 0 else 0

    improvement = ((new_r2 - baseline_r2) / abs(baseline_r2)) * 100
    return improvement


def regression_accuracy(y_true, y_pred, tolerance=0.2, min_absolute_tolerance=0.1):
    y_true = np.array(y_true).squeeze()
    y_pred = np.array(y_pred).squeeze()

    # Pastikan 1D array
    if y_true.ndim != 1 or y_pred.ndim != 1:
        y_true = y_true.reshape(-1)
        y_pred = y_pred.reshape(-1)

    # Filter NaN dan Inf
    mask = ~np.isnan(y_true) & ~np.isnan(y_pred) & ~np.isinf(y_true) & ~np.isinf(y_pred)
    y_true = y_true[mask]
    y_pred = y_pred[mask]

    if len(y_true) == 0:
        return 0.0

    # Hitung toleransi
    tolerance_values = np.maximum(tolerance * np.abs(y_true), min_absolute_tolerance)
    absolute_errors = np.abs(y_pred - y_true)

    # Validasi ukuran array
    if tolerance_values.shape != absolute_errors.shape:
        print("WARNING: Shape mismatch after broadcasting")
        print("absolute_errors.shape:", absolute_errors.shape)
        print("tolerance_values.shape:", tolerance_values.shape)

    correct = absolute_errors <= tolerance_values
    accuracy = np.mean(correct)  # Persen

    return accuracy


# ================================
# 9. BATCH EVALUATION
# ================================

def evaluate_multiple_configurations(data_dir, configs_to_test=None, save_dir=None):
    """
    Test multiple configurations and return best performing one
    """
    if configs_to_test is None:
        configs_to_test = ['low', 'medium', 'high']

    if save_dir is None:
        save_dir = './model_comparison'
    os.makedirs(save_dir, exist_ok=True)

    results_dict = {}

    for risk_level in configs_to_test:
        print(f"\n{'='*60}")
        print(f"TESTING CONFIGURATION: {risk_level.upper()}")
        print(f"{'='*60}")

        try:
            model_save_dir = os.path.join(save_dir, f'model_{risk_level}')
            model, trainer, results = run_simple_lob_transformer(
                data_dir=data_dir,
                risk_level=risk_level,
                save_dir=model_save_dir
            )
            results_dict[risk_level] = results

        except Exception as e:
            print(f"Error with {risk_level} configuration: {e}")
            continue

    # Compare results
    if results_dict:
        print(f"\n{'='*80}")
        print("CONFIGURATION COMPARISON")
        print(f"{'='*80}")

        comparison_df = compare_models_performance(results_dict)

        # Find best configuration
        best_config = comparison_df.loc['R¬≤'].idxmax()
        best_r2 = comparison_df.loc['R¬≤'].max()

        print(f"\nBest Configuration: {best_config}")
        print(f"Best R¬≤ Score: {best_r2:.4f}")

        # Save comparison
        comparison_df.to_csv(os.path.join(save_dir, 'configuration_comparison.csv'))

        return results_dict, best_config
    else:
        print("No successful configurations to compare!")
        return {}, None

# ================================
# 10. EXAMPLE USAGE
# ================================

def example_usage():
    """
    Example of how to use the simplified LOB transformer
    """
    print("="*60)
    print("EXAMPLE USAGE")
    print("="*60)

    example_code = '''
    # 1. Simple training
    model, trainer, results = run_simple_lob_transformer(
        data_dir='/path/to/preprocessed_data',
        risk_level='auto',  # or 'low', 'medium', 'high'
        save_dir='/path/to/save/model'
    )

    # 2. View core metrics
    test_metrics = results['test']['metrics']
    print(f"MSE: {test_metrics['MSE']:.6f}")
    print(f"RMSE: {test_metrics['RMSE']:.6f}")
    print(f"MAE: {test_metrics['MAE']:.6f}")
    print(f"R¬≤: {test_metrics['R2']:.4f}")

    # 3. Make predictions
    predictor = SimpleLOBPredictor(
        model_path='/path/to/save/model/simple_lob_transformer.pth',
        scaler_path='/path/to/save/model/model_scaler.pkl'
    )

    # Predict on new data
    next_price = predictor.predict_next_price(new_lob_sequence)
    print(f"Predicted next mid-price: {next_price}")

    # 4. Test multiple configurations
    results_dict, best_config = evaluate_multiple_configurations(
        data_dir='/path/to/preprocessed_data',
        configs_to_test=['low', 'medium', 'high']
    )

    # 5. Save results for analysis
    save_results_to_csv(results['test'], '/path/to/predictions.csv')
    '''

    print(example_code)

# ================================
# 11. MAIN EXECUTION
# ================================

if __name__ == "__main__":
    print("Simplified LOB Transformer Module Loaded!")

    # Show example usage
    # example_usage()

    # For direct usage, uncomment and modify the path:
    data_dir = '/content/drive/MyDrive/SKRIPSI/dataset_setelah_preprocessing/minMaxScaller/preprocessed_data_minmax_scaller_TPIA'

    # Single model training
    model, trainer, results = run_simple_lob_transformer(
        data_dir=data_dir,
        risk_level='auto'
    )

    # Display final metrics clearly
    print("\n" + "="*50)
    print("CORE METRICS SUMMARY")
    print("="*50)
    test_metrics = results['test']['metrics']
    print(f"MSE:  {test_metrics['MSE']:.6f}")
    print(f"RMSE: {test_metrics['RMSE']:.6f}")
    print(f"MAE:  {test_metrics['MAE']:.6f}")
    print(f"R¬≤:   {test_metrics['R2']:.4f}")
    print("="*50)

Using device: cpu
Simplified LOB Transformer Module Loaded!
SIMPLE LOB TRANSFORMER - CORE METRICS ONLY

1. Loading data...
Loading preprocessed data from: /content/drive/MyDrive/SKRIPSI/dataset_setelah_preprocessing/minMaxScaller/preprocessed_data_minmax_scaller_TPIA
Metadata loaded: ['original_data_shape', 'preprocessed_data_shape', 'window_size', 'n_features', 'n_samples_train', 'n_samples_val', 'n_samples_test', 'scaling_method', 'feature_names', 'timestamp_included', 'creation_date']
Scaler loaded successfully
Train data: 1080 samples from train_data.csv
Val data: 231 samples from val_data.csv
Test data: 232 samples from test_data.csv
train - Timesteps: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
train - Features: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53]
train - Shape will be: (1080, 10, 54)
validation - Timesteps: [0, 1, 2, 3, 4, 

## **Modeling Menggunakan Dropout**

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from scipy import stats
import time
import math
import pandas as pd
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
import copy
import os
import json
import pickle
import re
import warnings

# Additional imports for visualization
try:
    import plotly.graph_objects as go
    from plotly.subplots import make_subplots
    import plotly.express as px
    PLOTLY_AVAILABLE = True
except ImportError:
    PLOTLY_AVAILABLE = False
    print("Plotly not available. Interactive visualizations will be skipped.")

warnings.filterwarnings('ignore')

# Setup device
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Using device: {device}")

# Set visualization style
plt.style.use('default')
try:
    sns.set_palette("husl")
except:
    pass

# ================================
# 1. SIMPLIFIED DATA LOADER
# ================================

def load_preprocessed_csv_data(data_dir, debug=True):
    """
    Simplified data loader focusing on core functionality
    """
    print(f"Loading preprocessed data from: {data_dir}")

    # Load metadata
    metadata_path = os.path.join(data_dir, 'metadata.json')
    if not os.path.exists(metadata_path):
        raise FileNotFoundError(f"Metadata file not found at {metadata_path}")

    with open(metadata_path, 'r') as f:
        metadata = json.load(f)

    print(f"Metadata loaded: {list(metadata.keys())}")

    # Load scaler
    scaler_path = os.path.join(data_dir, 'scaler.pkl')
    scaler = None
    if os.path.exists(scaler_path):
        with open(scaler_path, 'rb') as f:
            scaler = pickle.load(f)
        print("Scaler loaded successfully")

    # Load data files with flexible naming
    datasets = {}
    file_mappings = {
        'train': ['train_data.csv'],
        'val': ['val_data.csv', 'validation_data.csv'],
        'test': ['test_data.csv']
    }

    for split, possible_names in file_mappings.items():
        found = False
        for file_name in possible_names:
            file_path = os.path.join(data_dir, file_name)
            if os.path.exists(file_path):
                datasets[split] = pd.read_csv(file_path)
                print(f"{split.capitalize()} data: {len(datasets[split])} samples from {file_name}")
                found = True
                break

        if not found:
            raise FileNotFoundError(f"{split.capitalize()} data not found. Tried: {possible_names}")

    # Extract features and targets
    def extract_features_targets(df, debug_name=""):
        # Find target column
        target_col = None
        if 'target' in df.columns:
            target_col = 'target'
        else:
            target_candidates = [col for col in df.columns if any(x in col.lower() for x in ['mid_price', 'target', 'price'])]
            if target_candidates:
                target_col = target_candidates[0]
                print(f"Using '{target_col}' as target for {debug_name}")

        if target_col is None:
            raise ValueError(f"No target column found in {debug_name} data")

        y = df[target_col].values

        # Extract features (look for time-feature pattern)
        feature_pattern = re.compile(r't(\d+)_f(\d+)')
        feature_cols = []
        timesteps = set()
        features = set()

        for col in df.columns:
            match = feature_pattern.search(col)
            if match:
                t, f = int(match.group(1)), int(match.group(2))
                timesteps.add(t)
                features.add(f)
                feature_cols.append((col, t, f))

        if not feature_cols:
            # Fallback: use all non-target columns
            exclude_cols = {'target', 'timestamp', 'index', target_col}
            feature_cols = [(col, 0, i) for i, col in enumerate(df.columns)
                           if col not in exclude_cols and not any(x in col.lower() for x in ['timestamp', 'index'])]
            print(f"Warning: No time-feature pattern found, using {len(feature_cols)} columns as features")
            timesteps = {0}
            features = set(range(len(feature_cols)))

        # Determine dimensions
        n_timesteps = len(timesteps)
        n_features = len(features)
        n_samples = len(df)

        if debug:
            print(f"{debug_name} - Timesteps: {sorted(timesteps)}")
            print(f"{debug_name} - Features: {sorted(features)}")
            print(f"{debug_name} - Shape will be: ({n_samples}, {n_timesteps}, {n_features})")

        # Create 3D array
        X = np.zeros((n_samples, n_timesteps, n_features))

        # Fill array
        for col_name, t, f in feature_cols:
            if t in timesteps and f in features:
                t_idx = sorted(timesteps).index(t)
                f_idx = sorted(features).index(f)

                # Ensure column exists and has valid data
                if col_name in df.columns:
                    col_data = df[col_name].values
                    # Replace inf values with finite numbers
                    col_data = np.where(np.isinf(col_data), 0, col_data)
                    X[:, t_idx, f_idx] = col_data

        # Handle NaN and inf values
        X = np.nan_to_num(X, nan=0.0, posinf=1e6, neginf=-1e6)

        # Clip extreme values to prevent gradient issues
        X = np.clip(X, -1e6, 1e6)
        y = np.clip(y, -1e6, 1e6)

        # Clean NaN/inf in targets
        if np.any(np.isnan(y)) or np.any(np.isinf(y)):
            print(f"Warning: Cleaning NaN/inf in {debug_name} targets")
            y = np.nan_to_num(y, nan=0.0, posinf=1e6, neginf=-1e6)

        return X, y, n_timesteps, n_features

    # Process all datasets
    X_train, y_train, n_timesteps, n_features = extract_features_targets(datasets['train'], "train")
    X_val, y_val, _, _ = extract_features_targets(datasets['val'], "validation")
    X_test, y_test, _, _ = extract_features_targets(datasets['test'], "test")

    # Data validation
    print(f"\nData validation:")
    print(f"Train: X{X_train.shape}, y{y_train.shape}")
    print(f"Val:   X{X_val.shape}, y{y_val.shape}")
    print(f"Test:  X{X_test.shape}, y{y_test.shape}")

    # Convert to tensors
    data = {
        'X_train': torch.FloatTensor(X_train),
        'y_train': torch.FloatTensor(y_train),
        'X_val': torch.FloatTensor(X_val),
        'y_val': torch.FloatTensor(y_val),
        'X_test': torch.FloatTensor(X_test),
        'y_test': torch.FloatTensor(y_test),
        'scaler': scaler,
        'n_timesteps': n_timesteps,
        'n_features': n_features
    }

    return data

# ================================
# 2. ENHANCED TRANSFORMER MODEL WITH COMPREHENSIVE DROPOUT
# ================================

class EnhancedPositionalEncoding(nn.Module):
    """
    Enhanced positional encoding with dropout
    """
    def __init__(self, d_model, max_len=1000, dropout=0.1):
        super(EnhancedPositionalEncoding, self).__init__()
        self.dropout = nn.Dropout(p=dropout)
        self.embedding_dropout = nn.Dropout(p=dropout * 0.5)  # Additional embedding dropout

        pe = torch.zeros(max_len, d_model)
        position = torch.arange(0, max_len, dtype=torch.float).unsqueeze(1)
        div_term = torch.exp(torch.arange(0, d_model, 2).float() * (-math.log(10000.0) / d_model))

        pe[:, 0::2] = torch.sin(position * div_term)
        if d_model % 2 == 1:
            pe[:, 1::2] = torch.cos(position * div_term[:-1])
        else:
            pe[:, 1::2] = torch.cos(position * div_term)
        pe = pe.unsqueeze(0)

        self.register_buffer('pe', pe)

    def forward(self, x):
        seq_len = x.size(1)
        # Apply embedding dropout before adding positional encoding
        x = self.embedding_dropout(x)
        # Add positional encoding and apply final dropout
        return self.dropout(x + self.pe[:, :seq_len, :])

class EnhancedLOBTransformer(nn.Module):
    """
    Enhanced LOB Transformer with comprehensive dropout for overfitting prevention
    """
    def __init__(self, input_dim, d_model=128, n_heads=8, n_layers=4, d_ff=512,
                 dropout=0.1, input_dropout=0.1, layer_dropout=0.1, output_dropout=0.2,
                 max_seq_len=100):
        super(EnhancedLOBTransformer, self).__init__()

        self.input_dim = input_dim
        self.d_model = d_model

        # Input processing with dropout
        self.input_dropout = nn.Dropout(p=input_dropout)
        self.input_norm = nn.LayerNorm(input_dim)
        self.input_projection = nn.Linear(input_dim, d_model)
        self.projection_dropout = nn.Dropout(p=dropout)

        # Enhanced positional encoding with dropout
        self.pos_encoding = EnhancedPositionalEncoding(d_model, max_seq_len, dropout)

        # Transformer encoder with enhanced dropout
        encoder_layer = nn.TransformerEncoderLayer(
            d_model=d_model,
            nhead=n_heads,
            dim_feedforward=d_ff,
            dropout=dropout,
            batch_first=True,
            activation='gelu'
        )

        self.transformer_encoder = nn.TransformerEncoder(encoder_layer, num_layers=n_layers)

        # Additional dropout between transformer layers
        self.inter_layer_dropout = nn.Dropout(p=layer_dropout)

        # Enhanced output layers with comprehensive dropout
        self.global_pool = nn.AdaptiveAvgPool1d(1)
        self.pool_dropout = nn.Dropout(p=output_dropout * 0.5)

        self.output_head = nn.Sequential(
            nn.Linear(d_model, d_model // 2),
            nn.LayerNorm(d_model // 2),  # Layer normalization for stability
            nn.GELU(),
            nn.Dropout(output_dropout),

            nn.Linear(d_model // 2, d_model // 4),
            nn.LayerNorm(d_model // 4),
            nn.GELU(),
            nn.Dropout(output_dropout * 0.8),

            nn.Linear(d_model // 4, d_model // 8),
            nn.LayerNorm(d_model // 8),
            nn.GELU(),
            nn.Dropout(output_dropout * 0.6),

            nn.Linear(d_model // 8, 1)
        )

        # Initialize weights
        self.apply(self._init_weights)

    def _init_weights(self, module):
        if isinstance(module, nn.Linear):
            torch.nn.init.xavier_uniform_(module.weight)
            if module.bias is not None:
                torch.nn.init.zeros_(module.bias)
        elif isinstance(module, nn.LayerNorm):
            torch.nn.init.ones_(module.weight)
            torch.nn.init.zeros_(module.bias)

    def forward(self, x, mask=None):
        # Input processing with dropout
        x = self.input_dropout(x)  # Input-level dropout
        x = self.input_norm(x)
        x = self.input_projection(x)
        x = self.projection_dropout(x)  # Post-projection dropout

        # Add positional encoding (includes its own dropout)
        x = self.pos_encoding(x)

        # Transformer encoding with inter-layer dropout
        x = self.transformer_encoder(x, src_key_padding_mask=mask)
        x = self.inter_layer_dropout(x)  # Additional dropout after transformer

        # Global average pooling with dropout
        x = x.transpose(1, 2)  # [batch, features, seq_len]
        x = self.global_pool(x).squeeze(-1)  # [batch, features]
        x = self.pool_dropout(x)  # Post-pooling dropout

        # Output prediction (includes multiple dropout layers)
        output = self.output_head(x).squeeze(-1)

        return output

# ================================
# 3. ENHANCED TRAINER WITH REGULARIZATION
# ================================

class EnhancedTrainer:
    """
    Enhanced training system with additional regularization techniques
    """
    def __init__(self, model, train_loader, val_loader, test_loader, config):
        self.model = model.to(device)
        self.train_loader = train_loader
        self.val_loader = val_loader
        self.test_loader = test_loader
        self.config = config

        # Loss function with label smoothing option
        self.criterion = nn.MSELoss()

        # L1 regularization coefficient
        self.l1_lambda = config.get('l1_lambda', 0.0)

        # Optimizer with gradient clipping
        self.optimizer = optim.AdamW(
            model.parameters(),
            lr=config['learning_rate'],
            weight_decay=config['weight_decay'],
            eps=1e-8,
            betas=(0.9, 0.999)
        )

        # Enhanced learning rate scheduler
        self.scheduler = optim.lr_scheduler.ReduceLROnPlateau(
            self.optimizer,
            mode='min',
            factor=0.5,
            patience=5,
            verbose=True,
            min_lr=1e-7
        )

        # Tracking
        self.train_losses = []
        self.val_losses = []
        self.best_val_loss = float('inf')
        self.best_model_state = None
        self.patience_counter = 0

    def calculate_l1_loss(self):
        """Calculate L1 regularization loss"""
        l1_loss = 0
        for param in self.model.parameters():
            l1_loss += torch.norm(param, 1)
        return l1_loss

    def train_epoch(self):
        self.model.train()
        total_loss = 0
        all_preds = []
        all_targets = []
        valid_batches = 0

        for batch_idx, (X, y) in enumerate(self.train_loader):
            X, y = X.to(device), y.to(device)

            # Skip problematic batches
            if torch.isnan(X).any() or torch.isnan(y).any():
                continue

            self.optimizer.zero_grad()

            # Forward pass
            outputs = self.model(X)

            # Skip if output contains NaN
            if torch.isnan(outputs).any():
                continue

            # Calculate primary loss
            loss = self.criterion(outputs, y)

            # Add L1 regularization if specified
            if self.l1_lambda > 0:
                l1_loss = self.calculate_l1_loss()
                loss = loss + self.l1_lambda * l1_loss

            # Skip if loss is NaN
            if torch.isnan(loss):
                continue

            # Backward pass with gradient clipping
            try:
                loss.backward()
                torch.nn.utils.clip_grad_norm_(self.model.parameters(), max_norm=1.0)
                self.optimizer.step()

                total_loss += loss.item()
                all_preds.extend(outputs.detach().cpu().numpy())
                all_targets.extend(y.detach().cpu().numpy())
                valid_batches += 1

            except RuntimeError as e:
                print(f"Skipping batch {batch_idx} due to error: {e}")
                continue

        # Calculate metrics
        avg_loss = total_loss / max(1, valid_batches)
        return avg_loss, all_preds, all_targets

    def validate(self):
        self.model.eval()
        total_loss = 0
        all_preds = []
        all_targets = []
        valid_batches = 0

        with torch.no_grad():
            for X, y in self.val_loader:
                X, y = X.to(device), y.to(device)

                if torch.isnan(X).any() or torch.isnan(y).any():
                    continue

                outputs = self.model(X)

                if torch.isnan(outputs).any():
                    continue

                loss = self.criterion(outputs, y)

                if torch.isnan(loss):
                    continue

                total_loss += loss.item()
                all_preds.extend(outputs.cpu().numpy())
                all_targets.extend(y.cpu().numpy())
                valid_batches += 1

        avg_loss = total_loss / max(1, valid_batches)
        return avg_loss, all_preds, all_targets

    def train(self):
        print("Starting enhanced training with comprehensive dropout...")
        start_time = time.time()

        for epoch in range(self.config['num_epochs']):
            # Training
            train_loss, train_preds, train_targets = self.train_epoch()
            self.train_losses.append(train_loss)

            # Validation
            val_loss, val_preds, val_targets = self.validate()
            self.val_losses.append(val_loss)

            # Learning rate scheduling
            self.scheduler.step(val_loss)

            # Calculate R¬≤ for monitoring
            train_r2 = r2_score(train_targets, train_preds) if len(train_targets) > 0 else 0
            val_r2 = r2_score(val_targets, val_preds) if len(val_targets) > 0 else 0

            # Calculate overfitting indicator
            overfitting_gap = train_r2 - val_r2

            # Early stopping check
            if val_loss < self.best_val_loss:
                self.best_val_loss = val_loss
                self.best_model_state = copy.deepcopy(self.model.state_dict())
                self.patience_counter = 0
            else:
                self.patience_counter += 1

            # Print progress with overfitting monitoring
            print(f"Epoch {epoch+1:3d}/{self.config['num_epochs']} | "
                  f"Train Loss: {train_loss:.6f} | Train R¬≤: {train_r2:.4f} | "
                  f"Val Loss: {val_loss:.6f} | Val R¬≤: {val_r2:.4f} | "
                  f"Gap: {overfitting_gap:.4f}")

            # Early stopping
            if self.patience_counter >= self.config['patience']:
                print(f"Early stopping at epoch {epoch+1}")
                break

        # Load best model
        if self.best_model_state is not None:
            self.model.load_state_dict(self.best_model_state)

        training_time = time.time() - start_time
        print(f"Training completed in {training_time/60:.2f} minutes")

        return self.final_evaluation()

    def evaluate_loader(self, loader):
        self.model.eval()
        total_loss = 0
        all_preds = []
        all_targets = []
        valid_batches = 0

        with torch.no_grad():
            for X, y in loader:
                X, y = X.to(device), y.to(device)

                if torch.isnan(X).any() or torch.isnan(y).any():
                    continue

                outputs = self.model(X)

                if torch.isnan(outputs).any():
                    continue

                loss = self.criterion(outputs, y)

                if not torch.isnan(loss):
                    total_loss += loss.item()
                    valid_batches += 1

                all_preds.extend(outputs.cpu().numpy())
                all_targets.extend(y.cpu().numpy())

        avg_loss = total_loss / max(1, valid_batches)
        return avg_loss, all_preds, all_targets

    def calculate_core_metrics(self, y_true, y_pred):
        """
        Calculate only core metrics: MSE, RMSE, MAE, R¬≤
        """
        if len(y_true) == 0 or len(y_pred) == 0:
            return {'MSE': float('nan'), 'RMSE': float('nan'), 'MAE': float('nan'), 'R2': float('nan')}

        y_true, y_pred = np.array(y_true), np.array(y_pred)

        mse = mean_squared_error(y_true, y_pred)
        rmse = np.sqrt(mse)
        mae = mean_absolute_error(y_true, y_pred)
        r2 = r2_score(y_true, y_pred)

        return {
            'MSE': mse,
            'RMSE': rmse,
            'MAE': mae,
            'R2': r2
        }

    def final_evaluation(self):
        """
        Final evaluation with core metrics only
        """
        print("\n" + "="*60)
        print("FINAL EVALUATION - ENHANCED MODEL WITH DROPOUT")
        print("="*60)

        results = {}
        for name, loader in [('train', self.train_loader), ('val', self.val_loader), ('test', self.test_loader)]:
            loss, preds, targets = self.evaluate_loader(loader)
            metrics = self.calculate_core_metrics(targets, preds)

            results[name] = {
                'loss': loss,
                'predictions': preds,
                'targets': targets,
                'metrics': metrics
            }

            print(f"{name.capitalize():5} | MSE: {metrics['MSE']:.6f} | RMSE: {metrics['RMSE']:.6f} | "
                  f"MAE: {metrics['MAE']:.6f} | R¬≤: {metrics['R2']:.4f}")

        # Overfitting analysis
        train_r2 = results['train']['metrics']['R2']
        val_r2 = results['val']['metrics']['R2']
        test_r2 = results['test']['metrics']['R2']
        overfitting_gap = train_r2 - val_r2

        print(f"\nOverfitting Analysis:")
        print(f"Train-Val Gap: {overfitting_gap:.4f}")
        if overfitting_gap > 0.1:
            print("‚ö†Ô∏è  High overfitting detected")
        elif overfitting_gap > 0.05:
            print("‚ö†Ô∏è  Moderate overfitting detected")
        else:
            print("‚úÖ Good generalization")

        return results

# ================================
# 4. ENHANCED CONFIGURATION
# ================================

def get_enhanced_config(data_info, risk_level='medium'):
    """
    Get enhanced configuration with comprehensive dropout settings
    """
    n_samples = data_info.get('n_train_samples', 1000)
    n_features = data_info.get('n_features', 10)
    n_timesteps = data_info.get('n_timesteps', 10)

    total_features = n_features * n_timesteps
    sample_feature_ratio = n_samples / total_features

    print(f"Data analysis:")
    print(f"  Samples: {n_samples}")
    print(f"  Features per timestep: {n_features}")
    print(f"  Timesteps: {n_timesteps}")
    print(f"  Sample/Feature ratio: {sample_feature_ratio:.2f}")

    # Enhanced configurations with comprehensive dropout
    configs = {
        'low': {
            'd_model': 256,
            'n_heads': 8,
            'n_layers': 6,
            'd_ff': 1024,
            'dropout': 0.3,
            'input_dropout': 0.2,
            'layer_dropout': 0.3,
            'output_dropout': 0.3,
            'learning_rate': 0.001,
            'weight_decay': 1e-5,
            'l1_lambda': 0.0,
            'batch_size': 32,
            'num_epochs': 15,
            'patience': 5
        },
        'medium': {
            'd_model': 128,
            'n_heads': 8,
            'n_layers': 4,
            'd_ff': 512,
            'dropout': 0.4,
            'input_dropout': 0.25,
            'layer_dropout': 0.35,
            'output_dropout': 0.4,
            'learning_rate': 0.0005,
            'weight_decay': 1e-4,
            'l1_lambda': 1e-6,
            'batch_size': 16,
            'num_epochs': 15,
            'patience': 5
        },
        'high': {
            'd_model': 32,
            'n_heads': 2,
            'n_layers': 2,
            'd_ff': 128,
            'dropout': 0.3,
            'input_dropout': 0.2,
            'layer_dropout': 0.25,
            'output_dropout': 0.3,
            'learning_rate': 0.0005,
            'weight_decay': 1e-3,
            'l1_lambda': 1e-5,
            'batch_size': 8,
            'num_epochs': 30,
            'patience': 5
        }
    }

    # Auto-detect risk level based on sample/feature ratio
    if sample_feature_ratio < 5:
        auto_risk = 'high'
    elif sample_feature_ratio < 15:
        auto_risk = 'medium'
    else:
        auto_risk = 'low'

    if risk_level == 'auto':
        risk_level = auto_risk

    config = configs[risk_level].copy()
    config['batch_size'] = min(config['batch_size'], max(4, n_samples // 10))

    print(f"Using '{risk_level}' risk configuration")
    print(f"Dropout settings:")
    print(f"  General dropout: {config['dropout']}")
    print(f"  Input dropout: {config['input_dropout']}")
    print(f"  Layer dropout: {config['layer_dropout']}")
    print(f"  Output dropout: {config['output_dropout']}")

    return config

# ================================
# 5. ENHANCED PREDICTION INTERFACE
# ================================

class EnhancedLOBPredictor:
    """
    Enhanced prediction interface for the LOB model with dropout control
    """
    def __init__(self, model_path, scaler_path=None):
        # Load model
        checkpoint = torch.load(model_path, map_location=device)

        # Get model architecture
        arch = checkpoint['model_architecture']
        self.model = EnhancedLOBTransformer(
            input_dim=arch['input_dim'],
            d_model=arch['d_model'],
            n_heads=arch['n_heads'],
            n_layers=arch['n_layers'],
            d_ff=arch['d_ff'],
            dropout=arch['dropout'],
            input_dropout=arch['input_dropout'],
            layer_dropout=arch['layer_dropout'],
            output_dropout=arch['output_dropout'],
            max_seq_len=arch['max_seq_len']
        )

        self.model.load_state_dict(checkpoint['model_state_dict'])
        self.model.to(device)
        self.model.eval()  # Set to evaluation mode (disables dropout)

        # Load scaler
        self.scaler = None
        if scaler_path and os.path.exists(scaler_path):
            with open(scaler_path, 'rb') as f:
                self.scaler = pickle.load(f)

        self.config = checkpoint['config']
        print(f"Enhanced model loaded successfully!")
        print(f"Input shape expected: (batch_size, {arch['max_seq_len']}, {arch['input_dim']})")
        print(f"Dropout configuration loaded: {arch['dropout']:.2f} (disabled during inference)")

    def predict(self, X):
        """
        Make predictions on new data (dropout disabled automatically in eval mode)
        """
        if not torch.is_tensor(X):
            X = torch.FloatTensor(X)

        if len(X.shape) == 2:
            X = X.unsqueeze(0)

        X = X.to(device)

        self.model.eval()  # Ensure dropout is disabled
        with torch.no_grad():
            prediction = self.model(X)
            return prediction.cpu().numpy().squeeze()

    def predict_with_uncertainty(self, X, n_samples=10):
        """
        Predict with uncertainty estimation using Monte Carlo Dropout
        """
        if not torch.is_tensor(X):
            X = torch.FloatTensor(X)

        if len(X.shape) == 2:
            X = X.unsqueeze(0)

        X = X.to(device)

        # Enable dropout for uncertainty estimation
        self.model.train()

        predictions = []
        with torch.no_grad():
            for _ in range(n_samples):
                pred = self.model(X)
                predictions.append(pred.cpu().numpy().squeeze())

        # Disable dropout again
        self.model.eval()

        predictions = np.array(predictions)
        mean_pred = np.mean(predictions)
        std_pred = np.std(predictions)

        return mean_pred, std_pred

# ================================
# 6. COMPREHENSIVE VISUALIZATION MODULE
# ================================

def create_comprehensive_visualization(model, trainer, results, config, save_dir=None):
    """
    Membuat visualisasi komprehensif untuk hasil training LOB Transformer
    """
    print("\n" + "="*60)
    print("CREATING COMPREHENSIVE VISUALIZATIONS")
    print("="*60)

    # Create figure with subplots
    fig = plt.figure(figsize=(20, 24))

    # 1. Training Loss Curves
    plt.subplot(4, 3, 1)
    epochs = range(1, len(trainer.train_losses) + 1)
    plt.plot(epochs, trainer.train_losses, 'b-', label='Training Loss', linewidth=2)
    plt.plot(epochs, trainer.val_losses, 'r-', label='Validation Loss', linewidth=2)
    plt.xlabel('Epoch')
    plt.ylabel('Loss')
    plt.title('Training vs Validation Loss', fontsize=14, fontweight='bold')
    plt.legend()
    plt.grid(True, alpha=0.3)

    # Add early stopping point if applicable
    if trainer.patience_counter >= config['patience']:
        best_epoch = len(trainer.train_losses) - trainer.patience_counter
        plt.axvline(x=best_epoch, color='green', linestyle='--', alpha=0.7,
                   label=f'Early Stop (Epoch {best_epoch})')
        plt.legend()

    # 2. Predictions vs Actual - Training Set
    plt.subplot(4, 3, 2)
    train_preds = results['train']['predictions']
    train_targets = results['train']['targets']

    plt.scatter(train_targets, train_preds, alpha=0.6, s=20, color='blue')
    min_val = min(min(train_targets), min(train_preds))
    max_val = max(max(train_targets), max(train_preds))
    plt.plot([min_val, max_val], [min_val, max_val], 'r--', linewidth=2)
    plt.xlabel('Actual Values')
    plt.ylabel('Predicted Values')
    test_r2 = results["test"]["metrics"]["R2"]
    r2_text = f"R¬≤ = {test_r2:.4f}" if -10 < test_r2 < 1 else "R¬≤ = N/A"

    plt.title(f'Test: Predictions vs Actual\n{r2_text}', fontsize=12, fontweight='bold')

    plt.grid(True, alpha=0.3)

    # 3. Predictions vs Actual - Validation Set
    plt.subplot(4, 3, 3)
    val_preds = results['val']['predictions']
    val_targets = results['val']['targets']

    plt.scatter(val_targets, val_preds, alpha=0.6, s=20, color='orange')
    min_val = min(min(val_targets), min(val_preds))
    max_val = max(max(val_targets), max(val_preds))
    plt.plot([min_val, max_val], [min_val, max_val], 'r--', linewidth=2)
    plt.xlabel('Actual Values')
    plt.ylabel('Predicted Values')
    plt.title(f'Validation: Predictions vs Actual\nR¬≤ = {results["val"]["metrics"]["R2"]:.4f}',
              fontsize=12, fontweight='bold')
    plt.grid(True, alpha=0.3)

    # 4. Predictions vs Actual - Test Set
    plt.subplot(4, 3, 4)
    test_preds = results['test']['predictions']
    test_targets = results['test']['targets']

    plt.scatter(test_targets, test_preds, alpha=0.6, s=20, color='green')
    min_val = min(min(test_targets), min(test_preds))
    max_val = max(max(test_targets), max(test_preds))
    plt.plot([min_val, max_val], [min_val, max_val], 'r--', linewidth=2)
    plt.xlabel('Actual Values')
    plt.ylabel('Predicted Values')
    plt.title(f'Test: Predictions vs Actual\nR¬≤ = {results["test"]["metrics"]["R2"]:.4f}',
              fontsize=12, fontweight='bold')
    plt.grid(True, alpha=0.3)

    # 5. Residual Analysis - Test Set
    plt.subplot(4, 3, 5)
    test_residuals = np.array(test_preds) - np.array(test_targets)
    plt.scatter(test_preds, test_residuals, alpha=0.6, s=20, color='purple')
    plt.axhline(y=0, color='r', linestyle='--', linewidth=2)
    plt.xlabel('Predicted Values')
    plt.ylabel('Residuals')
    plt.title('Residual Analysis (Test Set)', fontsize=12, fontweight='bold')
    plt.grid(True, alpha=0.3)

    # 6. Residual Distribution
    plt.subplot(4, 3, 6)
    plt.hist(test_residuals, bins=50, density=True, alpha=0.7, color='skyblue', edgecolor='black')
    mu, sigma = stats.norm.fit(test_residuals)
    x = np.linspace(min(test_residuals), max(test_residuals), 100)
    plt.plot(x, stats.norm.pdf(x, mu, sigma), 'r-', linewidth=2, label=f'Normal Fit (Œº={mu:.4f}, œÉ={sigma:.4f})')
    plt.xlabel('Residuals')
    plt.ylabel('Density')
    plt.title('Residual Distribution', fontsize=12, fontweight='bold')
    plt.legend()
    plt.grid(True, alpha=0.3)

    # 7. Performance Metrics Comparison
    plt.subplot(4, 3, 7)
    datasets = ['Train', 'Validation', 'Test']
    r2_scores = [results['train']['metrics']['R2'],
                 results['val']['metrics']['R2'],
                 results['test']['metrics']['R2']]

    bars = plt.bar(datasets, r2_scores, color=['blue', 'orange', 'green'], alpha=0.7)
    plt.ylabel('R¬≤ Score')
    plt.title('R¬≤ Score Comparison', fontsize=12, fontweight='bold')
    plt.ylim(0, max(1, max(r2_scores) * 1.1))

    # Add value labels on bars
    for bar, score in zip(bars, r2_scores):
        plt.text(bar.get_x() + bar.get_width()/2, bar.get_height() + 0.01,
                f'{score:.4f}', ha='center', va='bottom', fontweight='bold')
    plt.grid(True, alpha=0.3)

    # 8. Overfitting Analysis
    plt.subplot(4, 3, 8)
    train_r2 = results['train']['metrics']['R2']
    val_r2 = results['val']['metrics']['R2']
    test_r2 = results['test']['metrics']['R2']

    overfitting_gap = train_r2 - val_r2
    generalization_gap = val_r2 - test_r2

    gaps = ['Train-Val Gap', 'Val-Test Gap']
    gap_values = [overfitting_gap, generalization_gap]
    colors = ['red' if gap > 0.1 else 'orange' if gap > 0.05 else 'green' for gap in gap_values]

    bars = plt.bar(gaps, gap_values, color=colors, alpha=0.7)
    plt.ylabel('Performance Gap')
    plt.title('Overfitting Analysis', fontsize=12, fontweight='bold')
    plt.axhline(y=0.05, color='orange', linestyle='--', alpha=0.7, label='Moderate Threshold')
    plt.axhline(y=0.1, color='red', linestyle='--', alpha=0.7, label='High Threshold')

    # Add value labels
    for bar, gap in zip(bars, gap_values):
        plt.text(bar.get_x() + bar.get_width()/2, bar.get_height() + 0.005,
                f'{gap:.4f}', ha='center', va='bottom', fontweight='bold')
    plt.legend()
    plt.grid(True, alpha=0.3)

    # 9. Error Metrics Comparison
    plt.subplot(4, 3, 9)
    metrics = ['MSE', 'RMSE', 'MAE']
    test_metrics = results['test']['metrics']
    metric_values = [test_metrics['MSE'], test_metrics['RMSE'], test_metrics['MAE']]

    bars = plt.bar(metrics, metric_values, color='lightcoral', alpha=0.7)
    plt.ylabel('Error Value')
    plt.title('Test Set Error Metrics', fontsize=12, fontweight='bold')
    plt.yscale('log')  # Log scale for better visualization

    # Add value labels
    for bar, value in zip(bars, metric_values):
        plt.text(bar.get_x() + bar.get_width()/2, bar.get_height() * 1.1,
                f'{value:.6f}', ha='center', va='bottom', fontweight='bold', rotation=45)
    plt.grid(True, alpha=0.3)

    # 10. Time Series Prediction Sample (first 100 points)
    plt.subplot(4, 3, 10)
    n_points = min(100, len(test_targets))
    indices = range(n_points)

    plt.plot(indices, test_targets[:n_points], 'b-', label='Actual', linewidth=2, alpha=0.8)
    plt.plot(indices, test_preds[:n_points], 'r-', label='Predicted', linewidth=2, alpha=0.8)
    plt.xlabel('Sample Index')
    plt.ylabel('Value')
    plt.title('Time Series Prediction Sample', fontsize=12, fontweight='bold')
    plt.legend()
    plt.grid(True, alpha=0.3)

    # 11. Learning Rate Schedule (placeholder)
    plt.subplot(4, 3, 11)
    plt.plot(epochs, [config['learning_rate']] * len(epochs), 'g-', linewidth=2)
    plt.xlabel('Epoch')
    plt.ylabel('Learning Rate')
    plt.title('Learning Rate Schedule', fontsize=12, fontweight='bold')
    plt.yscale('log')
    plt.grid(True, alpha=0.3)

    # 12. Dropout Configuration Visualization
    plt.subplot(4, 3, 12)
    dropout_types = ['Input', 'Projection', 'Layer', 'Output']
    dropout_values = [
        config.get('input_dropout', 0),
        config.get('dropout', 0),
        config.get('layer_dropout', 0),
        config.get('output_dropout', 0)
    ]

    bars = plt.bar(dropout_types, dropout_values, color='steelblue', alpha=0.7)
    plt.ylabel('Dropout Rate')
    plt.title('Dropout Configuration', fontsize=12, fontweight='bold')
    plt.ylim(0, max(dropout_values) * 1.2)

    # Add value labels
    for bar, value in zip(bars, dropout_values):
        plt.text(bar.get_x() + bar.get_width()/2, bar.get_height() + 0.01,
                f'{value:.2f}', ha='center', va='bottom', fontweight='bold')
    plt.grid(True, alpha=0.3)

    plt.tight_layout()

    # Save the comprehensive visualization
    if save_dir:
        plt.savefig(f'{save_dir}/comprehensive_analysis.png', dpi=300, bbox_inches='tight')
        print(f"Comprehensive visualization saved to: {save_dir}/comprehensive_analysis.png")

    # plt.show()
    plt.close()

def create_interactive_visualization(results, save_dir=None):
    """
    Membuat visualisasi interaktif menggunakan Plotly
    """
    if not PLOTLY_AVAILABLE:
        print("Plotly not available. Skipping interactive visualization.")
        return

    print("\nCreating interactive visualizations...")

    # Create subplot structure
    fig = make_subplots(
        rows=2, cols=2,
        subplot_titles=('Training vs Validation Predictions', 'Test Set Predictions',
                       'Residual Analysis', 'Performance Metrics'),
        specs=[[{"secondary_y": False}, {"secondary_y": False}],
               [{"secondary_y": False}, {"secondary_y": False}]]
    )

    # 1. Training vs Validation Predictions
    train_targets = results['train']['targets']
    train_preds = results['train']['predictions']
    val_targets = results['val']['targets']
    val_preds = results['val']['predictions']

    # Perfect prediction line
    min_val = min(min(train_targets + val_targets), min(train_preds + val_preds))
    max_val = max(max(train_targets + val_targets), max(train_preds + val_preds))

    fig.add_trace(
        go.Scatter(x=[min_val, max_val], y=[min_val, max_val],
                  mode='lines', name='Perfect Prediction',
                  line=dict(color='red', dash='dash')),
        row=1, col=1
    )

    fig.add_trace(
        go.Scatter(x=train_targets, y=train_preds, mode='markers',
                  name=f'Train (R¬≤={results["train"]["metrics"]["R2"]:.4f})',
                  marker=dict(color='blue', opacity=0.6)),
        row=1, col=1
    )

    fig.add_trace(
        go.Scatter(x=val_targets, y=val_preds, mode='markers',
                  name=f'Validation (R¬≤={results["val"]["metrics"]["R2"]:.4f})',
                  marker=dict(color='orange', opacity=0.6)),
        row=1, col=1
    )

    # 2. Test Set Predictions
    test_targets = results['test']['targets']
    test_preds = results['test']['predictions']

    fig.add_trace(
        go.Scatter(x=[min(test_targets), max(test_targets)],
                  y=[min(test_targets), max(test_targets)],
                  mode='lines', name='Perfect Prediction',
                  line=dict(color='red', dash='dash'), showlegend=False),
        row=1, col=2
    )

    fig.add_trace(
        go.Scatter(x=test_targets, y=test_preds, mode='markers',
                  name=f'Test (R¬≤={results["test"]["metrics"]["R2"]:.4f})',
                  marker=dict(color='green', opacity=0.6)),
        row=1, col=2
    )

    # 3. Residual Analysis
    test_residuals = np.array(test_preds) - np.array(test_targets)

    fig.add_trace(
        go.Scatter(x=test_preds, y=test_residuals, mode='markers',
                  name='Residuals', marker=dict(color='purple', opacity=0.6)),
        row=2, col=1
    )

    fig.add_trace(
        go.Scatter(x=[min(test_preds), max(test_preds)], y=[0, 0],
                  mode='lines', name='Zero Line',
                  line=dict(color='red', dash='dash'), showlegend=False),
        row=2, col=1
    )

    # 4. Performance Metrics
    datasets = ['Train', 'Validation', 'Test']
    r2_scores = [results['train']['metrics']['R2'],
                 results['val']['metrics']['R2'],
                 results['test']['metrics']['R2']]

    fig.add_trace(
        go.Bar(x=datasets, y=r2_scores, name='R¬≤ Scores',
               marker=dict(color=['blue', 'orange', 'green'])),
        row=2, col=2
    )

    # Update layout
    fig.update_layout(
        title_text="Enhanced LOB Transformer - Interactive Analysis",
        title_x=0.5,
        showlegend=True,
        height=800,
        font=dict(size=12)
    )

    # Update axes labels
    fig.update_xaxes(title_text="Actual Values", row=1, col=1)
    fig.update_yaxes(title_text="Predicted Values", row=1, col=1)
    fig.update_xaxes(title_text="Actual Values", row=1, col=2)
    fig.update_yaxes(title_text="Predicted Values", row=1, col=2)
    fig.update_xaxes(title_text="Predicted Values", row=2, col=1)
    fig.update_yaxes(title_text="Residuals", row=2, col=1)
    fig.update_xaxes(title_text="Dataset", row=2, col=2)
    fig.update_yaxes(title_text="R¬≤ Score", row=2, col=2)

    if save_dir:
        fig.write_html(f'{save_dir}/interactive_analysis.html')
        print(f"Interactive visualization saved to: {save_dir}/interactive_analysis.html")

    # fig.show()


def create_uncertainty_visualization(model_predictor, X_test, y_test, n_samples=20, save_dir=None):
    """
    Membuat visualisasi uncertainty estimation menggunakan Monte Carlo Dropout
    """
    print("\nCreating uncertainty estimation visualization...")

    # Select subset for uncertainty analysis (computational efficiency)
    n_points = min(100, len(X_test))
    indices = np.random.choice(len(X_test), n_points, replace=False)
    X_subset = X_test[indices]
    y_subset = y_test[indices]

    predictions = []
    uncertainties = []

    # Generate predictions with uncertainty
    for i in range(len(X_subset)):
        mean_pred, std_pred = model_predictor.predict_with_uncertainty(
            X_subset[i:i+1], n_samples=n_samples
        )
        predictions.append(mean_pred)
        uncertainties.append(std_pred)

    predictions = np.array(predictions)
    uncertainties = np.array(uncertainties)

    # Create uncertainty visualization
    fig, axes = plt.subplots(2, 2, figsize=(15, 12))

    # 1. Predictions with uncertainty bands
    axes[0, 0].scatter(y_subset, predictions, alpha=0.6, color='blue', s=30)
    axes[0, 0].errorbar(y_subset, predictions, yerr=uncertainties,
                       fmt='none', alpha=0.3, color='red')
    min_val = min(min(y_subset), min(predictions))
    max_val = max(max(y_subset), max(predictions))
    axes[0, 0].plot([min_val, max_val], [min_val, max_val], 'r--', linewidth=2)
    axes[0, 0].set_xlabel('Actual Values')
    axes[0, 0].set_ylabel('Predicted Values')
    axes[0, 0].set_title('Predictions with Uncertainty Bands')
    axes[0, 0].grid(True, alpha=0.3)

    # 2. Uncertainty vs Prediction Error
    prediction_errors = np.abs(predictions - y_subset)
    axes[0, 1].scatter(uncertainties, prediction_errors, alpha=0.6, color='green', s=30)
    axes[0, 1].set_xlabel('Prediction Uncertainty (Std)')
    axes[0, 1].set_ylabel('Prediction Error')
    axes[0, 1].set_title('Uncertainty vs Prediction Error')
    axes[0, 1].grid(True, alpha=0.3)

    # Add correlation coefficient
    corr_coef = np.corrcoef(uncertainties, prediction_errors)[0, 1]
    axes[0, 1].text(0.05, 0.95, f'Correlation: {corr_coef:.3f}',
                   transform=axes[0, 1].transAxes, bbox=dict(boxstyle="round", facecolor='wheat'))

    # 3. Uncertainty distribution
    axes[1, 0].hist(uncertainties, bins=20, density=True, alpha=0.7, color='skyblue', edgecolor='black')
    axes[1, 0].set_xlabel('Prediction Uncertainty (Std)')
    axes[1, 0].set_ylabel('Density')
    axes[1, 0].set_title('Distribution of Prediction Uncertainties')
    axes[1, 0].grid(True, alpha=0.3)

    # 4. Confidence intervals analysis
    confidence_levels = [0.68, 0.95]  # 1œÉ and 2œÉ
    coverage_rates = []

    for conf_level in confidence_levels:
        z_score = stats.norm.ppf((1 + conf_level) / 2)
        lower_bound = predictions - z_score * uncertainties
        upper_bound = predictions + z_score * uncertainties

        coverage = np.mean((y_subset >= lower_bound) & (y_subset <= upper_bound))
        coverage_rates.append(coverage)

    axes[1, 1].bar([f'{int(conf*100)}%' for conf in confidence_levels],
                   coverage_rates, alpha=0.7, color=['orange', 'red'])
    axes[1, 1].axhline(y=confidence_levels[0], color='orange', linestyle='--', alpha=0.7)
    axes[1, 1].axhline(y=confidence_levels[1], color='red', linestyle='--', alpha=0.7)
    axes[1, 1].set_ylabel('Actual Coverage Rate')
    axes[1, 1].set_title('Confidence Interval Coverage')
    axes[1, 1].grid(True, alpha=0.3)

    # Add value labels
    for i, (conf, rate) in enumerate(zip(confidence_levels, coverage_rates)):
        axes[1, 1].text(i, rate + 0.02, f'{rate:.3f}', ha='center', va='bottom', fontweight='bold')

    plt.tight_layout()

    if save_dir:
        plt.savefig(f'{save_dir}/uncertainty_analysis.png', dpi=300, bbox_inches='tight')
        print(f"Uncertainty visualization saved to: {save_dir}/uncertainty_analysis.png")

    # plt.show()
    plt.close()

    return predictions, uncertainties

def print_detailed_results_summary(results, config):
    """
    Mencetak ringkasan hasil yang detail dan terstruktur
    """
    print("\n" + "="*80)
    print("DETAILED RESULTS SUMMARY - ENHANCED LOB TRANSFORMER")
    print("="*80)

    # Model Configuration Summary
    print(f"\nüìä MODEL CONFIGURATION:")
    print(f"   Architecture: Enhanced LOB Transformer")
    print(f"   Model Dimension: {config['d_model']}")
    print(f"   Attention Heads: {config['n_heads']}")
    print(f"   Encoder Layers: {config['n_layers']}")
    print(f"   Feed Forward Dim: {config['d_ff']}")

    # Dropout Configuration
    print(f"\nüõ°Ô∏è  DROPOUT CONFIGURATION:")
    print(f"   Input Dropout: {config['input_dropout']:.3f}")
    print(f"   General Dropout: {config['dropout']:.3f}")
    print(f"   Layer Dropout: {config['layer_dropout']:.3f}")
    print(f"   Output Dropout: {config['output_dropout']:.3f}")

    # Performance Metrics
    # print(f"\nüìà PERFORMANCE METRICS:")
    # print(f"   {'Dataset':<12} {'MSE':<12} {'RMSE':<12} {'MAE':<12} {'R¬≤':<8}")
    # print(f"   {'-'*60}")
    print(f"\nüìà PERFORMANCE METRICS:")
    print(f"   {'Dataset':<12} {'MSE':<12} {'RMSE':<12} {'MAE':<12} {'R¬≤':<8} {'Accuracy':<10}")
    print(f"   {'-'*72}")


    for dataset_name in ['train', 'val', 'test']:
          metrics = results[dataset_name]['metrics']
          acc_percent = metrics.get('ThresholdAccuracy', 0) * 100
          print(f"   {dataset_name.capitalize():<12} "
                f"{metrics['MSE']:<12.6f} "
                f"{metrics['RMSE']:<12.6f} "
                f"{metrics['MAE']:<12.6f} "
                f"{metrics['R2']:<8.4f} "
                f"{acc_percent:<.2f}%")


    # Overfitting Analysis
    train_r2 = results['train']['metrics']['R2']
    val_r2 = results['val']['metrics']['R2']
    test_r2 = results['test']['metrics']['R2']

    overfitting_gap = train_r2 - val_r2
    generalization_gap = val_r2 - test_r2

    print(f"\nüîç OVERFITTING ANALYSIS:")
    print(f"   Train R¬≤: {train_r2:.4f}")
    print(f"   Val R¬≤:   {val_r2:.4f}")
    print(f"   Test R¬≤:  {test_r2:.4f}")
    print(f"   Train-Val Gap: {overfitting_gap:.4f}")
    print(f"   Val-Test Gap:  {generalization_gap:.4f}")

    # Interpretation
    print(f"\nüí° INTERPRETATION:")
    if overfitting_gap > 0.1:
        print(f"   ‚ö†Ô∏è  HIGH OVERFITTING DETECTED")
        print(f"      Consider increasing dropout rates or reducing model complexity")
    elif overfitting_gap > 0.05:
        print(f"   ‚ö†Ô∏è  MODERATE OVERFITTING")
        print(f"      Dropout is helping but could be increased")
    else:
        print(f"   ‚úÖ EXCELLENT REGULARIZATION")
        print(f"      Dropout configuration is working well")

    if abs(generalization_gap) < 0.03:
        print(f"   ‚úÖ Good generalization from validation to test")
    else:
        print(f"   ‚ö†Ô∏è  Significant val-test gap detected")

    print(f"\nüéØ MODEL QUALITY ASSESSMENT:")
    if test_r2 > 0.8:
        print(f"   üåü EXCELLENT performance (R¬≤ > 0.8)")
    elif test_r2 > 0.6:
        print(f"   ‚úÖ GOOD performance (R¬≤ > 0.6)")
    elif test_r2 > 0.4:
        print(f"   ‚ö†Ô∏è  FAIR performance (R¬≤ > 0.4)")
    else:
        print(f"   ‚ùå POOR performance (R¬≤ < 0.4)")

    print("="*80)

def create_all_visualizations(model, trainer, results, config, save_dir=None):
    """
    Fungsi utama untuk membuat semua visualisasi
    """
    print("\nüé® CREATING ALL VISUALIZATIONS...")

    # 1. Print detailed summary
    print_detailed_results_summary(results, config)

    # 2. Create comprehensive static visualization
    create_comprehensive_visualization(model, trainer, results, config, save_dir)

    # 3. Create interactive visualization
    try:
        create_interactive_visualization(results, save_dir)
    except Exception as e:
        print(f"Interactive visualization skipped: {e}")

    print("\n‚úÖ All standard visualizations completed!")


def calculate_threshold_accuracy(y_true, y_pred, threshold=0.01):
    """
    Hitung akurasi prediksi dengan toleransi error tertentu (default: ¬±1%)
    """
    y_true = np.array(y_true)
    y_pred = np.array(y_pred)

    # Hindari pembagian nol
    denominator = np.where(y_true == 0, 1e-8, y_true)
    relative_error = np.abs(y_true - y_pred) / denominator
    correct = relative_error <= threshold
    return np.mean(correct)


# ================================
# 7. MAIN PIPELINE WITH ENHANCED MODEL
# ================================

def run_enhanced_lob_transformer(data_dir, risk_level='auto', save_dir=None):
    """
    Main pipeline for enhanced LOB transformer training with comprehensive dropout
    """
    print("="*80)
    print("ENHANCED LOB TRANSFORMER - COMPREHENSIVE DROPOUT FOR OVERFITTING PREVENTION")
    print("="*80)

    # Create save directory
    if save_dir is None:
        save_dir = '/content/drive/MyDrive/SKRIPSI/dataset_setelah_modeling/minMaxScaller/BRPT'
    os.makedirs(save_dir, exist_ok=True)

    # 1. Load data
    print("\n1. Loading data...")
    data = load_preprocessed_csv_data(data_dir, debug=True)

    # 2. Get enhanced configuration
    print("\n2. Enhanced Configuration...")
    data_info = {
        'n_train_samples': len(data['X_train']),
        'n_features': data['n_features'],
        'n_timesteps': data['n_timesteps']
    }

    config = get_enhanced_config(data_info, risk_level)

    print(f"\nFinal enhanced configuration:")
    for key, value in config.items():
        print(f"  {key}: {value}")

    # 3. Create data loaders
    print("\n3. Creating data loaders...")
    train_dataset = TensorDataset(data['X_train'], data['y_train'])
    val_dataset = TensorDataset(data['X_val'], data['y_val'])
    test_dataset = TensorDataset(data['X_test'], data['y_test'])

    train_loader = DataLoader(train_dataset, batch_size=config['batch_size'], shuffle=True)
    val_loader = DataLoader(val_dataset, batch_size=config['batch_size'], shuffle=False)
    test_loader = DataLoader(test_dataset, batch_size=config['batch_size'], shuffle=False)

    print(f"  Train batches: {len(train_loader)}")
    print(f"  Val batches: {len(val_loader)}")
    print(f"  Test batches: {len(test_loader)}")

    # 4. Create enhanced model
    print("\n4. Creating enhanced model with comprehensive dropout...")
    model = EnhancedLOBTransformer(
        input_dim=data['n_features'],
        d_model=config['d_model'],
        n_heads=config['n_heads'],
        n_layers=config['n_layers'],
        d_ff=config['d_ff'],
        dropout=config['dropout'],
        input_dropout=config['input_dropout'],
        layer_dropout=config['layer_dropout'],
        output_dropout=config['output_dropout'],
        max_seq_len=data['n_timesteps']
    )

    total_params = sum(p.numel() for p in model.parameters())
    print(f"  Total parameters: {total_params:,}")

    # 5. Train enhanced model
    print("\n5. Training enhanced model...")
    trainer = EnhancedTrainer(model, train_loader, val_loader, test_loader, config)
    results = trainer.train()
    for split in ['train', 'val', 'test']:
        y_true = results[split]['targets']
        y_pred = results[split]['predictions']
        acc = calculate_threshold_accuracy(y_true, y_pred, threshold=0.01)
        results[split]['metrics']['ThresholdAccuracy'] = acc
    print("\nüé® CREATING CORE METRICS VISUALIZATION...")
    try:
        create_core_metrics_visualization(results, trainer, save_dir)
        print("‚úÖ Core metrics visualization completed!")
    except Exception as e:
        print(f"‚ö†Ô∏è Core metrics visualization failed: {e}")
    # 6. Save enhanced model
    print("\n6. Saving enhanced model...")
    model_path = os.path.join(save_dir, 'enhanced_lob_transformer.pth')
    torch.save({
        'model_state_dict': model.state_dict(),
        'config': config,
        'data_info': data_info,
        'model_architecture': {
            'input_dim': data['n_features'],
            'd_model': config['d_model'],
            'n_heads': config['n_heads'],
            'n_layers': config['n_layers'],
            'd_ff': config['d_ff'],
            'dropout': config['dropout'],
            'input_dropout': config['input_dropout'],
            'layer_dropout': config['layer_dropout'],
            'output_dropout': config['output_dropout'],
            'max_seq_len': data['n_timesteps']
        }
    }, model_path)

    if data['scaler'] is not None:
        scaler_path = os.path.join(save_dir, 'enhanced_model_scaler.pkl')
        with open(scaler_path, 'wb') as f:
            pickle.dump(data['scaler'], f)

    print(f"Enhanced model saved to: {model_path}")

    # 7. Final summary
    print("\n" + "="*80)
    print("ENHANCED MODEL PERFORMANCE SUMMARY")
    print("="*80)

    test_metrics = results['test']['metrics']
    print(f"\nTest Set Performance:")
    print(f"  MSE:  {test_metrics['MSE']:.6f}")
    print(f"  RMSE: {test_metrics['RMSE']:.6f}")
    print(f"  MAE:  {test_metrics['MAE']:.6f}")
    print(f"  R¬≤:   {test_metrics['R2']:.4f}")
    print(f"  Accuracy (¬±1% error): {test_metrics['ThresholdAccuracy']*100:.2f}%")


    # Enhanced overfitting analysis
    train_r2 = results['train']['metrics']['R2']
    val_r2 = results['val']['metrics']['R2']
    test_r2 = results['test']['metrics']['R2']

    print(f"\nEnhanced Generalization Analysis:")
    print(f"  Train R¬≤: {train_r2:.4f}")
    print(f"  Val R¬≤:   {val_r2:.4f}")
    print(f"  Test R¬≤:  {test_r2:.4f}")

    overfitting_gap = train_r2 - val_r2
    generalization_gap = val_r2 - test_r2

    print(f"\nDropout Effectiveness:")
    print(f"  Train-Val Gap: {overfitting_gap:.4f}")
    print(f"  Val-Test Gap:  {generalization_gap:.4f}")

    if overfitting_gap > 0.1:
        print(f"  ‚ö†Ô∏è  High overfitting (consider increasing dropout)")
    elif overfitting_gap > 0.05:
        print(f"  ‚ö†Ô∏è  Moderate overfitting (dropout is helping)")
    else:
        print(f"  ‚úÖ Excellent regularization (dropout working well)")

    # ================================
    # 8. CREATE COMPREHENSIVE VISUALIZATIONS
    # ================================

    print("\nüé® GENERATING COMPREHENSIVE VISUALIZATIONS...")

    # Create all visualizations
    create_all_visualizations(model, trainer, results, config, save_dir)

    # Optional: Create uncertainty visualization
    try:
        print("\nüìä Creating uncertainty analysis...")
        model_path = os.path.join(save_dir, 'enhanced_lob_transformer.pth')
        scaler_path = os.path.join(save_dir, 'enhanced_model_scaler.pkl')
        predictor = EnhancedLOBPredictor(model_path, scaler_path)

        create_uncertainty_visualization(
            predictor,
            data['X_test'].numpy(),
            data['y_test'].numpy(),
            n_samples=20,
            save_dir=save_dir
        )
    except Exception as e:
        print(f"‚ö†Ô∏è  Uncertainty visualization skipped: {e}")

    print("\n‚úÖ ALL VISUALIZATIONS COMPLETED!")
    print(f"üìÅ Results saved to: {save_dir}")

    return model, trainer, results

# ================================
# 8. EXAMPLE USAGE
# ================================

if __name__ == "__main__":
    print("LOB Transformer with Comprehensive Dropout & Visualization Loaded!")

    # Example usage
    data_dir = '/content/drive/MyDrive/SKRIPSI/dataset_setelah_preprocessing/minMaxScaller/preprocessed_data_minmax_scaller_BRPT'

    # Train enhanced model with automatic visualization
    model, trainer, results = run_enhanced_lob_transformer(
        data_dir=data_dir,
        risk_level='auto',  # Will automatically select appropriate dropout levels
        save_dir='/content/drive/MyDrive/SKRIPSI/dataset_setelah_modeling/minMaxScaller/BRPT'
    )

    # Display enhanced metrics
    print("\n" + "="*50)
    print("MODEL METRICS SUMMARY")
    print("="*50)
    test_metrics = results['test']['metrics']
    print(f"MSE:  {test_metrics['MSE']:.6f}")
    print(f"RMSE: {test_metrics['RMSE']:.6f}")
    print(f"MAE:  {test_metrics['MAE']:.6f}")
    print(f"R¬≤:   {test_metrics['R2']:.4f}")
    print("="*50)

    print("üìÅ All files saved to the specified directory.")

Using device: cpu
LOB Transformer with Comprehensive Dropout & Visualization Loaded!
ENHANCED LOB TRANSFORMER - COMPREHENSIVE DROPOUT FOR OVERFITTING PREVENTION

1. Loading data...
Loading preprocessed data from: /content/drive/MyDrive/SKRIPSI/dataset_setelah_preprocessing/minMaxScaller/preprocessed_data_minmax_scaller_BRPT
Metadata loaded: ['original_data_shape', 'preprocessed_data_shape', 'window_size', 'n_features', 'n_samples_train', 'n_samples_val', 'n_samples_test', 'scaling_method', 'feature_names', 'timestamp_included', 'creation_date']
Scaler loaded successfully
Train data: 745 samples from train_data.csv
Val data: 159 samples from val_data.csv
Test data: 161 samples from test_data.csv
train - Timesteps: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
train - Features: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53]
train - Shape will be