In [5]:
# ต้องมาก่อนการใช้ TensorFlow ทุกอย่าง!
import os
import random
import numpy as np
import tensorflow as tf
from itertools import product

# ===== ตั้งค่าความเสถียรและ reproducibility =====
os.environ['PYTHONHASHSEED'] = '42'
random.seed(42)
np.random.seed(42)
tf.random.set_seed(42)

# ปิด multi-threading ของ TensorFlow
tf.config.threading.set_intra_op_parallelism_threads(1)
tf.config.threading.set_inter_op_parallelism_threads(1)

from typing import Tuple, List, Dict, Any
import pandas as pd
from scipy.stats import ks_2samp, mannwhitneyu
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from sklearn.model_selection import TimeSeriesSplit
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, GRU, SimpleRNN, Dense, Dropout
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.optimizers import Adam
import warnings
warnings.filterwarnings('ignore')

# Preprocess data
df = pd.read_csv("nvidia_10yr_data.csv", parse_dates=["Date"])
df['Date'] = pd.to_datetime(df['Date'], format="%d/%m/%Y")
df = df.sort_values("Date")

# Feature engineering
df['Return'] = df['Close'].pct_change()
df['Volatility'] = df['Close'].rolling(10).std()
df['Price_Diff'] = df['High'] - df['Low']
df['Volume_Log'] = np.log1p(df['Volume'])
df['LogReturn'] = np.log(df['Close']).diff()

# Drop NaN หลัง rolling
df.dropna(inplace=True)

X = df[['Return', 'Volatility', 'Price_Diff', 'Volume_Log']]
y = df['LogReturn']

class SequenceGenerator:
    """
    สร้าง sequence data สำหรับ RNN-based models
    """
    def __init__(self, sequence_length: int = 30):
        self.sequence_length = sequence_length
        self.scaler_X = StandardScaler()
        self.scaler_y = StandardScaler()
        
    def create_sequences(self, X: pd.DataFrame, y: pd.Series, fit_scalers: bool = True):
        """
        สร้าง sequence data สำหรับ RNN-based models
        """
        # Scale features
        if fit_scalers:
            X_scaled = self.scaler_X.fit_transform(X)
            y_scaled = self.scaler_y.fit_transform(y.values.reshape(-1, 1)).flatten()
        else:
            X_scaled = self.scaler_X.transform(X)
            y_scaled = self.scaler_y.transform(y.values.reshape(-1, 1)).flatten()
        
        # Create sequences
        X_seq, y_seq = [], []
        for i in range(self.sequence_length, len(X_scaled)):
            X_seq.append(X_scaled[i-self.sequence_length:i])
            y_seq.append(y_scaled[i])
        
        return np.array(X_seq), np.array(y_seq)
    
    def inverse_transform_y(self, y_scaled):
        """
        แปลงค่า y กลับเป็นสเกลเดิม
        """
        return self.scaler_y.inverse_transform(y_scaled.reshape(-1, 1)).flatten()

class RNNRegressor:
    """
    Universal RNN Regressor ที่รองรับ RNN, LSTM, และ GRU
    """
    def __init__(self, model_type: str = 'LSTM', sequence_length: int = 30, 
                 units: int = 50, dropout_rate: float = 0.2, 
                 learning_rate: float = 0.01, epochs: int = 100, 
                 batch_size: int = 32, verbose: int = 0):
        
        self.model_type = model_type.upper()
        self.sequence_length = sequence_length
        self.units = units
        self.dropout_rate = dropout_rate
        self.learning_rate = learning_rate
        self.epochs = epochs
        self.batch_size = batch_size
        self.verbose = verbose
        self.model = None
        self.seq_generator = SequenceGenerator(sequence_length)
        
        # ตรวจสอบว่า model_type ถูกต้อง
        if self.model_type not in ['RNN', 'LSTM', 'GRU']:
            raise ValueError("model_type must be 'RNN', 'LSTM', or 'GRU'")
        
    def _get_layer_type(self):
        """
        เลือก layer type ตาม model_type
        """
        if self.model_type == 'RNN':
            return SimpleRNN
        elif self.model_type == 'LSTM':
            return LSTM
        elif self.model_type == 'GRU':
            return GRU
        
    def _build_model(self, input_shape):
        """
        สร้างโมเดล RNN ตาม model_type
        """
        LayerType = self._get_layer_type()
        
        model = Sequential([
            LayerType(self.units, return_sequences=True, input_shape=input_shape),
            Dropout(self.dropout_rate),
            LayerType(self.units // 2, return_sequences=False),
            Dropout(self.dropout_rate),
            Dense(25, activation='relu'),
            Dense(1)
        ])
        
        model.compile(
            optimizer=Adam(learning_rate=self.learning_rate),
            loss='mse',
            metrics=['mae']
        )
        
        return model
    
    def fit(self, X: pd.DataFrame, y: pd.Series):
        """
        Train RNN model
        """
        # Create sequences
        X_seq, y_seq = self.seq_generator.create_sequences(X, y, fit_scalers=True)
        
        if len(X_seq) == 0:
            raise ValueError("Not enough data to create sequences")
        
        # Build model
        self.model = self._build_model((X_seq.shape[1], X_seq.shape[2]))
        
        # Early stopping
        early_stopping = EarlyStopping(
            monitor='loss',
            patience=10,
            restore_best_weights=True
        )
        
        # Train model
        self.model.fit(
            X_seq, y_seq,
            epochs=self.epochs,
            batch_size=self.batch_size,
            callbacks=[early_stopping],
            verbose=self.verbose
        )
        
        return self
    
    def predict(self, X: pd.DataFrame):
        """
        Make predictions
        """
        if self.model is None:
            raise ValueError("Model not fitted yet")
        
        # Create sequences (don't fit scalers)
        X_seq, _ = self.seq_generator.create_sequences(
            X, pd.Series([0] * len(X)), fit_scalers=False
        )
        
        if len(X_seq) == 0:
            # Return predictions for available data points
            return np.array([])
        
        # Predict
        y_pred_scaled = self.model.predict(X_seq, verbose=0)
        
        # Inverse transform
        y_pred = self.seq_generator.inverse_transform_y(y_pred_scaled)
        
        return y_pred

class LinearRegressionModel:
    """
    Linear Regression model with standardization
    """
    def __init__(self, fit_intercept: bool = True):
        self.fit_intercept = fit_intercept
        self.scaler_X = StandardScaler()
        self.scaler_y = StandardScaler()
        self.model = LinearRegression(fit_intercept=fit_intercept)
        self.is_fitted = False
        
    def fit(self, X: pd.DataFrame, y: pd.Series):
        """
        Train Linear Regression model
        """
        # Scale features
        X_scaled = self.scaler_X.fit_transform(X)
        y_scaled = self.scaler_y.fit_transform(y.values.reshape(-1, 1)).flatten()
        
        # Train model
        self.model.fit(X_scaled, y_scaled)
        self.is_fitted = True
        
        return self
    
    def predict(self, X: pd.DataFrame):
        """
        Make predictions
        """
        if not self.is_fitted:
            raise ValueError("Model not fitted yet")
        
        # Scale features
        X_scaled = self.scaler_X.transform(X)
        
        # Predict
        y_pred_scaled = self.model.predict(X_scaled)
        
        # Inverse transform
        y_pred = self.scaler_y.inverse_transform(y_pred_scaled.reshape(-1, 1)).flatten()
        
        return y_pred

class HyperparameterTuner:
    """
    Hyperparameter tuning สำหรับ RNN models
    """
    def __init__(self, model_type: str = 'LSTM', n_splits: int = 3):
        self.model_type = model_type.upper()
        self.n_splits = n_splits
        self.best_params = {}
        self.best_score = float('inf')
        self.tuning_results = []
        
    def define_param_grid(self):
        """
        กำหนด parameter grid สำหรับการ tuning
        """
        param_grid = {
            'sequence_length': [20, 30, 45],
            'learning_rate': [0.001, 0.01, 0.05],
            'batch_size': [16, 32, 64],
            'units': [32, 50, 64],
            'dropout_rate': [0.2, 0.3, 0.4],
            'epochs': [30, 50, 70]
        }
        return param_grid
    
    def cross_validate_params(self, X: pd.DataFrame, y: pd.Series, params: dict):
        """
        Cross-validation สำหรับ parameter set เดียว
        """
        tscv = TimeSeriesSplit(n_splits=self.n_splits)
        scores = []
        
        for train_idx, test_idx in tscv.split(X):
            try:
                X_train, X_test = X.iloc[train_idx], X.iloc[test_idx]
                y_train, y_test = y.iloc[train_idx], y.iloc[test_idx]
                
                # สร้างโมเดลด้วย parameters ที่ระบุ
                model = RNNRegressor(
                    model_type=self.model_type,
                    sequence_length=params['sequence_length'],
                    learning_rate=params['learning_rate'],
                    batch_size=params['batch_size'],
                    units=params['units'],
                    dropout_rate=params['dropout_rate'],
                    epochs=params['epochs'],
                    verbose=0
                )
                
                model.fit(X_train, y_train)
                y_pred = model.predict(X_test)
                
                if len(y_pred) > 0:
                    # Align predictions with test data
                    y_test_aligned = y_test.iloc[model.seq_generator.sequence_length:]
                    y_test_aligned = y_test_aligned.iloc[:len(y_pred)]
                    
                    rmse = np.sqrt(mean_squared_error(y_test_aligned, y_pred))
                    scores.append(rmse)
                    
            except Exception as e:
                continue
        
        return np.mean(scores) if scores else float('inf')
    
    def grid_search(self, X: pd.DataFrame, y: pd.Series, max_combinations: int = 50):
        """
        Grid search สำหรับหา hyperparameters ที่ดีที่สุด
        """
        param_grid = self.define_param_grid()
        
        # สร้างทุกการผสมผสานที่เป็นไปได้
        param_names = list(param_grid.keys())
        param_values = list(param_grid.values())
        all_combinations = list(product(*param_values))
        
        # จำกัดจำนวนการผสมผสานเพื่อประหยัดเวลา
        if len(all_combinations) > max_combinations:
            selected_combinations = random.sample(all_combinations, max_combinations)
        else:
            selected_combinations = all_combinations
        
        print(f"\n[Hyperparameter Tuning] Testing {len(selected_combinations)} parameter combinations for {self.model_type}...")
        
        for i, combination in enumerate(selected_combinations):
            params = dict(zip(param_names, combination))
            
            print(f"\n[{i+1}/{len(selected_combinations)}] Testing: {params}")
            
            score = self.cross_validate_params(X, y, params)
            
            self.tuning_results.append({
                'params': params.copy(),
                'score': score
            })
            
            if score < self.best_score:
                self.best_score = score
                self.best_params = params.copy()
                print(f"New best score: {score:.4f}")
        
        print(f"\n[Hyperparameter Tuning] Best score: {self.best_score:.4f}")
        print(f"Best parameters: {self.best_params}")
        
        return self.best_params, self.best_score
    
    def get_tuning_summary(self):
        """
        สรุปผลการ tuning
        """
        if not self.tuning_results:
            return "No tuning results available"
        
        # เรียงผลลัพธ์ตาม score
        sorted_results = sorted(self.tuning_results, key=lambda x: x['score'])
        
        summary = f"\n{'='*60}\n"
        summary += f"HYPERPARAMETER TUNING SUMMARY - {self.model_type}\n"
        summary += f"{'='*60}\n"
        summary += f"Total combinations tested: {len(self.tuning_results)}\n"
        summary += f"Best score (RMSE): {self.best_score:.4f}\n"
        summary += f"Best parameters:\n"
        
        for param, value in self.best_params.items():
            summary += f"  {param}: {value}\n"
        
        summary += f"\nTop 5 parameter combinations:\n"
        summary += f"{'-'*60}\n"
        
        for i, result in enumerate(sorted_results[:5]):
            summary += f"{i+1}. Score: {result['score']:.4f}\n"
            for param, value in result['params'].items():
                summary += f"   {param}: {value}\n"
            summary += "\n"
        
        return summary

class DriftPointDetector:
    """
    ตรวจจับจุดเกิด concept drift ในข้อมูล time series ด้วยการใช้
    หลายวิธีทดสอบและป้องกันการจับ pattern ที่ผิดพลาด
    """
    def __init__(self, window_size: int = 120, threshold: float = 0.001, 
                 step_size: int = 30, min_effect_size: float = 0.3,
                 stability_window: int = 60, confirmation_tests: int = 2): 
        self.window_size = window_size
        self.threshold = threshold
        self.step_size = step_size
        self.min_effect_size = min_effect_size
        self.stability_window = stability_window
        self.confirmation_tests = confirmation_tests
        self.drift_points_: List[int] = []

    def _calculate_effect_size(self, window1: pd.Series, window2: pd.Series) -> float:
        """คำนวณขนาดผลกระทบ (Cohen's d)"""
        mean1, mean2 = window1.mean(), window2.mean()
        std1, std2 = window1.std(), window2.std()
        
        pooled_std = np.sqrt(((len(window1) - 1) * std1**2 + (len(window2) - 1) * std2**2) / 
                           (len(window1) + len(window2) - 2))
        
        if pooled_std == 0:
            return 0
        
        return abs(mean1 - mean2) / pooled_std

    def _test_multiple_statistics(self, window1: pd.DataFrame, window2: pd.DataFrame) -> Tuple[int, float]:
        """ทดสอบหลายวิธีเพื่อยืนยัน drift"""
        passed_tests = 0
        min_p_value = 1.0
        
        for col in window1.columns:
            col_tests = 0
            col_p_values = []
            
            # Test 1: Kolmogorov-Smirnov test
            try:
                stat, p_value = ks_2samp(window1[col], window2[col])
                col_p_values.append(p_value)
                if p_value < self.threshold:
                    col_tests += 1
            except:
                pass
            
            # Test 2: Mann-Whitney U test
            try:
                stat, p_value = mannwhitneyu(window1[col], window2[col], alternative='two-sided')
                col_p_values.append(p_value)
                if p_value < self.threshold:
                    col_tests += 1
            except:
                pass
            
            # Test 3: Effect size check
            effect_size = self._calculate_effect_size(window1[col], window2[col])
            if effect_size > self.min_effect_size:
                col_tests += 1
            
            if col_p_values:
                min_p_value = min(min_p_value, min(col_p_values))
            
            if col_tests >= self.confirmation_tests:
                passed_tests += 1
        
        return passed_tests, min_p_value

    def _check_stability_before_drift(self, X: pd.DataFrame, position: int) -> bool:
        """ตรวจสอบว่าช่วงก่อนหน้ามีเสถียรภาพหรือไม่"""
        if position < self.stability_window + self.window_size:
            return True
        
        stable_start = position - self.stability_window - self.window_size
        stable_end = position - self.window_size
        stable_window = X.iloc[stable_start:stable_end]
        
        mid_point = len(stable_window) // 2
        stable_part1 = stable_window.iloc[:mid_point]
        stable_part2 = stable_window.iloc[mid_point:]
        
        for col in X.columns:
            if len(stable_part1) > 0 and len(stable_part2) > 0:
                try:
                    stat, p_value = ks_2samp(stable_part1[col], stable_part2[col])
                    if p_value < self.threshold * 10:
                        return False
                except:
                    pass
        
        return True

    def _remove_pattern_drifts(self, drift_candidates: List[Tuple[int, float]]) -> List[int]:
        """กรองจุด drift ที่อาจเป็น pattern"""
        if len(drift_candidates) < 3:
            return [pos for pos, _ in drift_candidates]
        
        drift_candidates.sort(key=lambda x: x[0])
        
        intervals = []
        for i in range(1, len(drift_candidates)):
            interval = drift_candidates[i][0] - drift_candidates[i-1][0]
            intervals.append(interval)
        
        filtered_drifts = []
        if len(intervals) > 1:
            interval_std = np.std(intervals)
            interval_mean = np.mean(intervals)
            
            if interval_std / interval_mean < 0.3: 
                drift_candidates.sort(key=lambda x: x[1])
                keep_count = max(1, len(drift_candidates) // 3)
                filtered_drifts = [pos for pos, _ in drift_candidates[:keep_count]]
            else:
                filtered_drifts = [pos for pos, _ in drift_candidates]
        else:
            filtered_drifts = [pos for pos, _ in drift_candidates]
        
        final_drifts = []
        min_distance = self.window_size * 2
        
        for pos in sorted(filtered_drifts):
            if not final_drifts or pos - final_drifts[-1] >= min_distance:
                final_drifts.append(pos)
        
        return final_drifts

    def detect(self, X: pd.DataFrame) -> List[int]:
        self.drift_points_ = []
        n = len(X)
        drift_candidates = []
        
        for i in range(self.window_size, n - self.window_size, self.step_size):
            if not self._check_stability_before_drift(X, i):
                continue
            
            window1 = X.iloc[i - self.window_size:i]
            window2 = X.iloc[i:i + self.window_size]
            
            passed_tests, min_p_value = self._test_multiple_statistics(window1, window2)
            
            if passed_tests >= 1:
                drift_candidates.append((i, min_p_value))
        
        self.drift_points_ = self._remove_pattern_drifts(drift_candidates)
        
        return self.drift_points_

class AdaptiveFoldGenerator:
    """
    สร้าง train/test folds โดยแบ่งตาม drift points ที่ตรวจจับได้
    """
    def __init__(self, min_fold_size: int = 120, test_ratio: float = 0.2):
        self.min_fold_size = min_fold_size
        self.test_ratio = test_ratio

    def split(self, X: pd.DataFrame, drift_points: List[int]) -> List[Tuple[np.ndarray, np.ndarray]]:
        folds = []
        points = [0] + drift_points + [len(X)]
        
        for i in range(len(points) - 1):
            start, end = points[i], points[i + 1]
            fold_length = end - start

            if fold_length < self.min_fold_size:
                continue

            split = int(start + (1 - self.test_ratio) * fold_length)
            train_idx = np.arange(start, split)
            test_idx = np.arange(split, end)

            if len(train_idx) > 100 and len(test_idx) > 50:
                folds.append((train_idx, test_idx))
        
        return folds

class DriftAdaptiveTimeSeriesCV:
    """
    ทำ cross-validation โดยใช้ fold ที่แบ่งตาม drift points สำหรับ RNN models และ Linear Regression
    """
    def __init__(self, model_type: str = 'LSTM', model_params: dict = None):
        self.model_type = model_type.upper()
        self.model_params = model_params or {
            'sequence_length': 30,
            'units': 50,
            'dropout_rate': 0.3,
            'learning_rate': 0.001,
            'epochs': 50,
            'batch_size': 32,
            'verbose': 0
        }

    def run(self, X: pd.DataFrame, y: pd.Series, drift_points: List[int]) -> Tuple[List[float], List[float]]:
        fold_gen = AdaptiveFoldGenerator()
        metrics_rmse, metrics_mae = [], []

        folds = fold_gen.split(X, drift_points)
        if not folds:
            print("Warning: No valid folds generated by AdaptiveFoldGenerator!")
            return [], []

        for i, (train_idx, test_idx) in enumerate(folds):
            print(f"\n[Adaptive Fold {i+1}] Training {self.model_type}...")
            
            X_train, X_test = X.iloc[train_idx], X.iloc[test_idx]
            y_train, y_test = y.iloc[train_idx], y.iloc[test_idx]

            if self.model_type in ['RNN', 'LSTM', 'GRU']:
                model = RNNRegressor(model_type=self.model_type, **self.model_params)
            elif self.model_type == 'LINEAR':
                model = LinearRegressionModel(**self.model_params)
            else:
                raise ValueError(f"Unknown model type: {self.model_type}")
            
            try:
                model.fit(X_train, y_train)
                y_pred = model.predict(X_test)
                
                if self.model_type in ['RNN', 'LSTM', 'GRU']:
                    if len(y_pred) > 0:
                        y_test_aligned = y_test.iloc[model.seq_generator.sequence_length:]
                        y_test_aligned = y_test_aligned.iloc[:len(y_pred)]
                        
                        rmse = np.sqrt(mean_squared_error(y_test_aligned, y_pred))
                        mae = mean_absolute_error(y_test_aligned, y_pred)
                    else:
                        print(f"[Adaptive Fold {i+1}] No predictions generated (insufficient data)")
                        continue
                else:
                    rmse = np.sqrt(mean_squared_error(y_test, y_pred))
                    mae = mean_absolute_error(y_test, y_pred)
                
                print(f"[Adaptive Fold {i+1}] RMSE={rmse:.3f}, MAE={mae:.3f}")
                
                metrics_rmse.append(rmse)
                metrics_mae.append(mae)
                    
            except Exception as e:
                print(f"[Adaptive Fold {i+1}] Error: {e}")
                continue

        return metrics_rmse, metrics_mae

class BaselineTimeSeriesCV:
    """
    ทำ cross-validation แบบ TimeSeriesSplit ปกติ สำหรับ RNN models และ Linear Regression
    """
    def __init__(self, model_type: str = 'LSTM', model_params: dict = None, n_splits: int = 5):
        self.model_type = model_type.upper()
        self.model_params = model_params or {
            'sequence_length': 30,
            'units': 50,
            'dropout_rate': 0.3,
            'learning_rate': 0.001,
            'epochs': 50,
            'batch_size': 32,
            'verbose': 0
        }
        self.n_splits = n_splits

    def run(self, X: pd.DataFrame, y: pd.Series) -> Tuple[List[float], List[float]]:
        tscv = TimeSeriesSplit(n_splits=self.n_splits)
        metrics_rmse, metrics_mae = [], []

        for i, (train_idx, test_idx) in enumerate(tscv.split(X)):
            print(f"\n[Baseline Fold {i+1}] Training {self.model_type}...")
            
            X_train, X_test = X.iloc[train_idx], X.iloc[test_idx]
            y_train, y_test = y.iloc[train_idx], y.iloc[test_idx]

            if self.model_type in ['RNN', 'LSTM', 'GRU']:
                model = RNNRegressor(model_type=self.model_type, **self.model_params)
            elif self.model_type == 'LINEAR':
                model = LinearRegressionModel(**self.model_params)
            else:
                raise ValueError(f"Unknown model type: {self.model_type}")
            
            try:
                model.fit(X_train, y_train)
                y_pred = model.predict(X_test)
                
                if self.model_type in ['RNN', 'LSTM', 'GRU']:
                    if len(y_pred) > 0:
                        y_test_aligned = y_test.iloc[model.seq_generator.sequence_length:]
                        y_test_aligned = y_test_aligned.iloc[:len(y_pred)]
                        
                        rmse = np.sqrt(mean_squared_error(y_test_aligned, y_pred))
                        mae = mean_absolute_error(y_test_aligned, y_pred)
                    else:
                        print(f"[Baseline Fold {i+1}] No predictions generated (insufficient data)")
                        continue
                else:
                    rmse = np.sqrt(mean_squared_error(y_test, y_pred))
                    mae = mean_absolute_error(y_test, y_pred)
                
                print(f"[Baseline Fold {i+1}] RMSE={rmse:.3f}, MAE={mae:.3f}")
                
                metrics_rmse.append(rmse)
                metrics_mae.append(mae)
                    
            except Exception as e:
                print(f"[Baseline Fold {i+1}] Error: {e}")
                continue

        return metrics_rmse, metrics_mae

class EnhancedModelComparison:
    """
    เปรียบเทียบประสิทธิภาพของ RNN, LSTM, GRU และ Linear Regression พร้อม hyperparameter tuning
    """
    def __init__(self, enable_tuning: bool = True, tuning_max_combinations: int = 30):
        self.enable_tuning = enable_tuning
        self.tuning_max_combinations = tuning_max_combinations
        self.models = ['RNN', 'LSTM', 'GRU', 'LINEAR']
        self.optimized_params = {}
        self.tuning_summaries = {}
        
    def tune_hyperparameters(self, X: pd.DataFrame, y: pd.Series):
        """
        ทำ hyperparameter tuning สำหรับ RNN models ทั้งหมด
        """
        print("\n" + "="*80)
        print("STARTING HYPERPARAMETER TUNING")
        print("="*80)
        
        for model_type in ['RNN', 'LSTM', 'GRU']:
            print(f"\n[TUNING] Starting hyperparameter tuning for {model_type}...")
            
            tuner = HyperparameterTuner(model_type=model_type, n_splits=3)
            best_params, best_score = tuner.grid_search(X, y, max_combinations=self.tuning_max_combinations)
            
            self.optimized_params[model_type] = best_params
            self.tuning_summaries[model_type] = tuner.get_tuning_summary()
            
            print(f"[TUNING] Completed tuning for {model_type}")
        
        # Linear Regression ไม่ต้อง tune
        self.optimized_params['LINEAR'] = {'fit_intercept': True}
        
        print("\n" + "="*80)
        print("HYPERPARAMETER TUNING COMPLETED")
        print("="*80)
        
    def compare_models(self, X: pd.DataFrame, y: pd.Series, drift_points: List[int]):
        """
        เปรียบเทียบโมเดลทั้งหมดด้วย adaptive CV พร้อมการใช้ optimized parameters
        """
        # ทำ hyperparameter tuning ก่อนถ้าเปิดใช้งาน
        if self.enable_tuning:
            self.tune_hyperparameters(X, y)
        else:
            # ใช้ default parameters
            default_rnn_params = {
                'sequence_length': 30,
                'units': 50,
                'dropout_rate': 0.3,
                'learning_rate': 0.001,
                'epochs': 50,
                'batch_size': 32,
                'verbose': 0
            }
            for model_type in ['RNN', 'LSTM', 'GRU']:
                self.optimized_params[model_type] = default_rnn_params
            self.optimized_params['LINEAR'] = {'fit_intercept': True}
        
        results = {}
        
        for model_type in self.models:
            print(f"\n{'='*50}")
            print(f"Testing {model_type} Model with Optimized Parameters")
            print(f"{'='*50}")
            
            params = self.optimized_params[model_type]
            print(f"Using parameters: {params}")
            
            # Adaptive CV
            drift_cv = DriftAdaptiveTimeSeriesCV(model_type, params)
            drift_rmse, drift_mae = drift_cv.run(X, y, drift_points)
            
            # Baseline CV
            baseline_cv = BaselineTimeSeriesCV(model_type, params, n_splits=5)
            base_rmse, base_mae = baseline_cv.run(X, y)
            
            results[model_type] = {
                'adaptive_rmse': drift_rmse,
                'adaptive_mae': drift_mae,
                'baseline_rmse': base_rmse,
                'baseline_mae': base_mae,
                'optimized_params': params
            }
        
        return results
    
    def print_summary(self, results: dict):
        """
        พิมพ์สรุปผลลัพธ์การเปรียบเทียบพร้อมแสดง optimized parameters
        """
        print("\n" + "="*80)
        print("MODEL COMPARISON SUMMARY WITH OPTIMIZED PARAMETERS")
        print("="*80)
        
        # แสดงผลการ tuning ก่อน
        if self.enable_tuning:
            print("\n" + "="*60)
            print("HYPERPARAMETER TUNING RESULTS")
            print("="*60)
            
            for model_type in ['RNN', 'LSTM', 'GRU']:
                if model_type in self.tuning_summaries:
                    print(self.tuning_summaries[model_type])
        
        # แสดงผลการเปรียบเทียบโมเดล
        print("\n" + "="*60)
        print("MODEL PERFORMANCE COMPARISON")
        print("="*60)
        
        performance_summary = []
        
        for model_type in self.models:
            if model_type in results:
                print(f"\n{model_type} Results:")
                print("-" * 40)
                
                # แสดง optimized parameters
                print(f"Optimized Parameters: {results[model_type]['optimized_params']}")
                
                # Adaptive results
                if results[model_type]['adaptive_rmse'] and results[model_type]['adaptive_mae']:
                    avg_rmse = np.mean(results[model_type]['adaptive_rmse'])
                    avg_mae = np.mean(results[model_type]['adaptive_mae'])
                    print(f"Adaptive CV - Avg RMSE: {avg_rmse:.4f}, Avg MAE: {avg_mae:.4f}")
                    
                    performance_summary.append({
                        'model': model_type,
                        'adaptive_rmse': avg_rmse,
                        'adaptive_mae': avg_mae,
                        'params': results[model_type]['optimized_params']
                    })
                else:
                    print("Adaptive CV - No valid results")
                
                # Baseline results
                if results[model_type]['baseline_rmse'] and results[model_type]['baseline_mae']:
                    avg_rmse = np.mean(results[model_type]['baseline_rmse'])
                    avg_mae = np.mean(results[model_type]['baseline_mae'])
                    print(f"Baseline CV - Avg RMSE: {avg_rmse:.4f}, Avg MAE: {avg_mae:.4f}")
                else:
                    print("Baseline CV - No valid results")
        
        # แสดง ranking ของโมเดล
        if performance_summary:
            print("\n" + "="*60)
            print("MODEL RANKING (Based on Adaptive CV RMSE)")
            print("="*60)
            
            sorted_models = sorted(performance_summary, key=lambda x: x['adaptive_rmse'])
            
            for i, model_info in enumerate(sorted_models, 1):
                print(f"\n{i}. {model_info['model']}")
                print(f"   RMSE: {model_info['adaptive_rmse']:.4f}")
                print(f"   MAE: {model_info['adaptive_mae']:.4f}")
                print(f"   Best Parameters: {model_info['params']}")
        
        # หาโมเดลที่ดีที่สุด
        best_model_info = self._find_best_model_with_params(results)
        if best_model_info:
            print(f"\n" + "="*60)
            print(f"🏆 BEST MODEL: {best_model_info['model']}")
            print(f"   RMSE: {best_model_info['score']:.4f}")
            print(f"   Optimal Parameters: {best_model_info['params']}")
            print("="*60)
    
    def _find_best_model_with_params(self, results: dict):
        """
        หาโมเดลที่ดีที่สุดพร้อมแสดงพารามิเตอร์
        """
        best_model_info = None
        best_score = float('inf')
        
        for model_type in self.models:
            if model_type in results:
                # ใช้ adaptive RMSE เป็นเกณฑ์
                if results[model_type]['adaptive_rmse']:
                    avg_rmse = np.mean(results[model_type]['adaptive_rmse'])
                    if avg_rmse < best_score:
                        best_score = avg_rmse
                        best_model_info = {
                            'model': model_type,
                            'score': avg_rmse,
                            'params': results[model_type]['optimized_params']
                        }
                # ถ้าไม่มี adaptive results ใช้ baseline
                elif results[model_type]['baseline_rmse']:
                    avg_rmse = np.mean(results[model_type]['baseline_rmse'])
                    if avg_rmse < best_score:
                        best_score = avg_rmse
                        best_model_info = {
                            'model': model_type,
                            'score': avg_rmse,
                            'params': results[model_type]['optimized_params']
                        }
        
        return best_model_info
    
    def get_best_params_for_model(self, model_type: str):
        """
        ดึงพารามิเตอร์ที่ดีที่สุดสำหรับโมเดลที่ระบุ
        """
        if model_type.upper() in self.optimized_params:
            return self.optimized_params[model_type.upper()]
        else:
            return None

# ตัวอย่างการใช้งานที่ถูกต้องพร้อม hyperparameter tuning
if __name__ == "__main__":
    # 1) Detect drift points
    print("Starting Drift Detection...")
    detector = DriftPointDetector(
        window_size=120,
        threshold=0.001,  
        step_size=30,
        min_effect_size=0.3,
        stability_window=60,
        confirmation_tests=2
    )
    drift_points = detector.detect(X)
    print(f"Detected {len(drift_points)} drift points")
    
    # 2) เปรียบเทียบโมเดลทั้งหมดพร้อม hyperparameter tuning
    print("\nStarting Enhanced Model Comparison with Hyperparameter Tuning...")
    
    # เปิดใช้งาน hyperparameter tuning (ใช้เวลานานขึ้น แต่ได้ผลลัพธ์ที่ดีกว่า)
    enhanced_comparator = EnhancedModelComparison(
        enable_tuning=True, 
        tuning_max_combinations=25  # จำกัดจำนวนการทดสอบเพื่อประหยัดเวลา
    )
    
    # เปิดใช้งานด้านล่างนี้หากต้องการใช้ default parameters (เร็วกว่า)
    # enhanced_comparator = EnhancedModelComparison(enable_tuning=False)
    
    results = enhanced_comparator.compare_models(X, y, drift_points)
    enhanced_comparator.print_summary(results)
    
    # แสดงพารามิเตอร์ที่ดีที่สุดสำหรับแต่ละโมเดล
    print("\n" + "="*80)
    print("BEST PARAMETERS FOR EACH MODEL")
    print("="*80)
    
    for model_type in ['RNN', 'LSTM', 'GRU', 'LINEAR']:
        best_params = enhanced_comparator.get_best_params_for_model(model_type)
        if best_params:
            print(f"\n{model_type} Best Parameters:")
            for param, value in best_params.items():
                print(f"  {param}: {value}")

    # แปลง drift point index เป็นวันที่แบบ วัน/เดือน/ปี
    drift_dates_formatted = df.iloc[drift_points]['Date'].dt.strftime('%d/%m/%Y').tolist()
    print("\n" + "="*80)
    print("DRIFT DETECTION RESULTS")
    print("="*80)
    print(f"Detected Drift Points: {len(drift_points)}")
    print("Drift Dates:")
    for i, date in enumerate(drift_dates_formatted, 1):
        print(f"  {i}. {date}")
    print("="*80)

Starting Drift Detection...
Detected 8 drift points

Starting Enhanced Model Comparison with Hyperparameter Tuning...

STARTING HYPERPARAMETER TUNING

[TUNING] Starting hyperparameter tuning for RNN...

[Hyperparameter Tuning] Testing 25 parameter combinations for RNN...

[1/25] Testing: {'sequence_length': 45, 'learning_rate': 0.05, 'batch_size': 16, 'units': 32, 'dropout_rate': 0.4, 'epochs': 30}
New best score: 0.0312

[2/25] Testing: {'sequence_length': 20, 'learning_rate': 0.01, 'batch_size': 32, 'units': 32, 'dropout_rate': 0.4, 'epochs': 30}

[3/25] Testing: {'sequence_length': 20, 'learning_rate': 0.001, 'batch_size': 16, 'units': 64, 'dropout_rate': 0.4, 'epochs': 50}

[4/25] Testing: {'sequence_length': 30, 'learning_rate': 0.001, 'batch_size': 32, 'units': 50, 'dropout_rate': 0.2, 'epochs': 70}

[5/25] Testing: {'sequence_length': 30, 'learning_rate': 0.001, 'batch_size': 16, 'units': 32, 'dropout_rate': 0.4, 'epochs': 50}

[6/25] Testing: {'sequence_length': 20, 'learning_r

In [6]:
# ต้องมาก่อนการใช้ TensorFlow ทุกอย่าง!
import os
import random
import numpy as np
import tensorflow as tf
from itertools import product

# ===== ตั้งค่าความเสถียรและ reproducibility =====
os.environ['PYTHONHASHSEED'] = '42'
random.seed(42)
np.random.seed(42)
tf.random.set_seed(42)

# ปิด multi-threading ของ TensorFlow
tf.config.threading.set_intra_op_parallelism_threads(1)
tf.config.threading.set_inter_op_parallelism_threads(1)

from typing import Tuple, List, Dict, Any
import pandas as pd
from scipy.stats import ks_2samp, mannwhitneyu
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from sklearn.model_selection import TimeSeriesSplit
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, GRU, SimpleRNN, Dense, Dropout
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.optimizers import Adam
import warnings
warnings.filterwarnings('ignore')

# Preprocess data
df = pd.read_csv("nvidia_10yr_data.csv", parse_dates=["Date"])
df['Date'] = pd.to_datetime(df['Date'], format="%d/%m/%Y")
df = df.sort_values("Date")

# Feature engineering
df['Return'] = df['Close'].pct_change()
df['Volatility'] = df['Close'].rolling(10).std()
df['Price_Diff'] = df['High'] - df['Low']
df['Volume_Log'] = np.log1p(df['Volume'])

# Drop NaN หลัง rolling
df.dropna(inplace=True)

X = df[['Return', 'Volatility', 'Price_Diff', 'Volume_Log']]
y = df['Close']

class SequenceGenerator:
    """
    สร้าง sequence data สำหรับ RNN-based models
    """
    def __init__(self, sequence_length: int = 30):
        self.sequence_length = sequence_length
        self.scaler_X = StandardScaler()
        self.scaler_y = StandardScaler()
        
    def create_sequences(self, X: pd.DataFrame, y: pd.Series, fit_scalers: bool = True):
        """
        สร้าง sequence data สำหรับ RNN-based models
        """
        # Scale features
        if fit_scalers:
            X_scaled = self.scaler_X.fit_transform(X)
            y_scaled = self.scaler_y.fit_transform(y.values.reshape(-1, 1)).flatten()
        else:
            X_scaled = self.scaler_X.transform(X)
            y_scaled = self.scaler_y.transform(y.values.reshape(-1, 1)).flatten()
        
        # Create sequences
        X_seq, y_seq = [], []
        for i in range(self.sequence_length, len(X_scaled)):
            X_seq.append(X_scaled[i-self.sequence_length:i])
            y_seq.append(y_scaled[i])
        
        return np.array(X_seq), np.array(y_seq)
    
    def inverse_transform_y(self, y_scaled):
        """
        แปลงค่า y กลับเป็นสเกลเดิม
        """
        return self.scaler_y.inverse_transform(y_scaled.reshape(-1, 1)).flatten()

class RNNRegressor:
    """
    Universal RNN Regressor ที่รองรับ RNN, LSTM, และ GRU
    """
    def __init__(self, model_type: str = 'LSTM', sequence_length: int = 30, 
                 units: int = 50, dropout_rate: float = 0.2, 
                 learning_rate: float = 0.01, epochs: int = 100, 
                 batch_size: int = 32, verbose: int = 0):
        
        self.model_type = model_type.upper()
        self.sequence_length = sequence_length
        self.units = units
        self.dropout_rate = dropout_rate
        self.learning_rate = learning_rate
        self.epochs = epochs
        self.batch_size = batch_size
        self.verbose = verbose
        self.model = None
        self.seq_generator = SequenceGenerator(sequence_length)
        
        # ตรวจสอบว่า model_type ถูกต้อง
        if self.model_type not in ['RNN', 'LSTM', 'GRU']:
            raise ValueError("model_type must be 'RNN', 'LSTM', or 'GRU'")
        
    def _get_layer_type(self):
        """
        เลือก layer type ตาม model_type
        """
        if self.model_type == 'RNN':
            return SimpleRNN
        elif self.model_type == 'LSTM':
            return LSTM
        elif self.model_type == 'GRU':
            return GRU
        
    def _build_model(self, input_shape):
        """
        สร้างโมเดล RNN ตาม model_type
        """
        LayerType = self._get_layer_type()
        
        model = Sequential([
            LayerType(self.units, return_sequences=True, input_shape=input_shape),
            Dropout(self.dropout_rate),
            LayerType(self.units // 2, return_sequences=False),
            Dropout(self.dropout_rate),
            Dense(25, activation='relu'),
            Dense(1)
        ])
        
        model.compile(
            optimizer=Adam(learning_rate=self.learning_rate),
            loss='mse',
            metrics=['mae']
        )
        
        return model
    
    def fit(self, X: pd.DataFrame, y: pd.Series):
        """
        Train RNN model
        """
        # Create sequences
        X_seq, y_seq = self.seq_generator.create_sequences(X, y, fit_scalers=True)
        
        if len(X_seq) == 0:
            raise ValueError("Not enough data to create sequences")
        
        # Build model
        self.model = self._build_model((X_seq.shape[1], X_seq.shape[2]))
        
        # Early stopping
        early_stopping = EarlyStopping(
            monitor='loss',
            patience=10,
            restore_best_weights=True
        )
        
        # Train model
        self.model.fit(
            X_seq, y_seq,
            epochs=self.epochs,
            batch_size=self.batch_size,
            callbacks=[early_stopping],
            verbose=self.verbose
        )
        
        return self
    
    def predict(self, X: pd.DataFrame):
        """
        Make predictions
        """
        if self.model is None:
            raise ValueError("Model not fitted yet")
        
        # Create sequences (don't fit scalers)
        X_seq, _ = self.seq_generator.create_sequences(
            X, pd.Series([0] * len(X)), fit_scalers=False
        )
        
        if len(X_seq) == 0:
            # Return predictions for available data points
            return np.array([])
        
        # Predict
        y_pred_scaled = self.model.predict(X_seq, verbose=0)
        
        # Inverse transform
        y_pred = self.seq_generator.inverse_transform_y(y_pred_scaled)
        
        return y_pred

class LinearRegressionModel:
    """
    Linear Regression model with standardization
    """
    def __init__(self, fit_intercept: bool = True):
        self.fit_intercept = fit_intercept
        self.scaler_X = StandardScaler()
        self.scaler_y = StandardScaler()
        self.model = LinearRegression(fit_intercept=fit_intercept)
        self.is_fitted = False
        
    def fit(self, X: pd.DataFrame, y: pd.Series):
        """
        Train Linear Regression model
        """
        # Scale features
        X_scaled = self.scaler_X.fit_transform(X)
        y_scaled = self.scaler_y.fit_transform(y.values.reshape(-1, 1)).flatten()
        
        # Train model
        self.model.fit(X_scaled, y_scaled)
        self.is_fitted = True
        
        return self
    
    def predict(self, X: pd.DataFrame):
        """
        Make predictions
        """
        if not self.is_fitted:
            raise ValueError("Model not fitted yet")
        
        # Scale features
        X_scaled = self.scaler_X.transform(X)
        
        # Predict
        y_pred_scaled = self.model.predict(X_scaled)
        
        # Inverse transform
        y_pred = self.scaler_y.inverse_transform(y_pred_scaled.reshape(-1, 1)).flatten()
        
        return y_pred

class HyperparameterTuner:
    """
    Hyperparameter tuning สำหรับ RNN models
    """
    def __init__(self, model_type: str = 'LSTM', n_splits: int = 3):
        self.model_type = model_type.upper()
        self.n_splits = n_splits
        self.best_params = {}
        self.best_score = float('inf')
        self.tuning_results = []
        
    def define_param_grid(self):
        """
        กำหนด parameter grid สำหรับการ tuning
        """
        param_grid = {
            'sequence_length': [20, 30, 45],
            'learning_rate': [0.001, 0.01, 0.05],
            'batch_size': [16, 32, 64],
            'units': [32, 50, 64],
            'dropout_rate': [0.2, 0.3, 0.4],
            'epochs': [30, 50, 70]
        }
        return param_grid
    
    def cross_validate_params(self, X: pd.DataFrame, y: pd.Series, params: dict):
        """
        Cross-validation สำหรับ parameter set เดียว
        """
        tscv = TimeSeriesSplit(n_splits=self.n_splits)
        scores = []
        
        for train_idx, test_idx in tscv.split(X):
            try:
                X_train, X_test = X.iloc[train_idx], X.iloc[test_idx]
                y_train, y_test = y.iloc[train_idx], y.iloc[test_idx]
                
                # สร้างโมเดลด้วย parameters ที่ระบุ
                model = RNNRegressor(
                    model_type=self.model_type,
                    sequence_length=params['sequence_length'],
                    learning_rate=params['learning_rate'],
                    batch_size=params['batch_size'],
                    units=params['units'],
                    dropout_rate=params['dropout_rate'],
                    epochs=params['epochs'],
                    verbose=0
                )
                
                model.fit(X_train, y_train)
                y_pred = model.predict(X_test)
                
                if len(y_pred) > 0:
                    # Align predictions with test data
                    y_test_aligned = y_test.iloc[model.seq_generator.sequence_length:]
                    y_test_aligned = y_test_aligned.iloc[:len(y_pred)]
                    
                    rmse = np.sqrt(mean_squared_error(y_test_aligned, y_pred))
                    scores.append(rmse)
                    
            except Exception as e:
                continue
        
        return np.mean(scores) if scores else float('inf')
    
    def grid_search(self, X: pd.DataFrame, y: pd.Series, max_combinations: int = 50):
        """
        Grid search สำหรับหา hyperparameters ที่ดีที่สุด
        """
        param_grid = self.define_param_grid()
        
        # สร้างทุกการผสมผสานที่เป็นไปได้
        param_names = list(param_grid.keys())
        param_values = list(param_grid.values())
        all_combinations = list(product(*param_values))
        
        # จำกัดจำนวนการผสมผสานเพื่อประหยัดเวลา
        if len(all_combinations) > max_combinations:
            selected_combinations = random.sample(all_combinations, max_combinations)
        else:
            selected_combinations = all_combinations
        
        print(f"\n[Hyperparameter Tuning] Testing {len(selected_combinations)} parameter combinations for {self.model_type}...")
        
        for i, combination in enumerate(selected_combinations):
            params = dict(zip(param_names, combination))
            
            print(f"\n[{i+1}/{len(selected_combinations)}] Testing: {params}")
            
            score = self.cross_validate_params(X, y, params)
            
            self.tuning_results.append({
                'params': params.copy(),
                'score': score
            })
            
            if score < self.best_score:
                self.best_score = score
                self.best_params = params.copy()
                print(f"New best score: {score:.4f}")
        
        print(f"\n[Hyperparameter Tuning] Best score: {self.best_score:.4f}")
        print(f"Best parameters: {self.best_params}")
        
        return self.best_params, self.best_score
    
    def get_tuning_summary(self):
        """
        สรุปผลการ tuning
        """
        if not self.tuning_results:
            return "No tuning results available"
        
        # เรียงผลลัพธ์ตาม score
        sorted_results = sorted(self.tuning_results, key=lambda x: x['score'])
        
        summary = f"\n{'='*60}\n"
        summary += f"HYPERPARAMETER TUNING SUMMARY - {self.model_type}\n"
        summary += f"{'='*60}\n"
        summary += f"Total combinations tested: {len(self.tuning_results)}\n"
        summary += f"Best score (RMSE): {self.best_score:.4f}\n"
        summary += f"Best parameters:\n"
        
        for param, value in self.best_params.items():
            summary += f"  {param}: {value}\n"
        
        summary += f"\nTop 5 parameter combinations:\n"
        summary += f"{'-'*60}\n"
        
        for i, result in enumerate(sorted_results[:5]):
            summary += f"{i+1}. Score: {result['score']:.4f}\n"
            for param, value in result['params'].items():
                summary += f"   {param}: {value}\n"
            summary += "\n"
        
        return summary

class DriftPointDetector:
    """
    ตรวจจับจุดเกิด concept drift ในข้อมูล time series ด้วยการใช้
    หลายวิธีทดสอบและป้องกันการจับ pattern ที่ผิดพลาด
    """
    def __init__(self, window_size: int = 120, threshold: float = 0.001, 
                 step_size: int = 30, min_effect_size: float = 0.3,
                 stability_window: int = 60, confirmation_tests: int = 2): 
        self.window_size = window_size
        self.threshold = threshold
        self.step_size = step_size
        self.min_effect_size = min_effect_size
        self.stability_window = stability_window
        self.confirmation_tests = confirmation_tests
        self.drift_points_: List[int] = []

    def _calculate_effect_size(self, window1: pd.Series, window2: pd.Series) -> float:
        """คำนวณขนาดผลกระทบ (Cohen's d)"""
        mean1, mean2 = window1.mean(), window2.mean()
        std1, std2 = window1.std(), window2.std()
        
        pooled_std = np.sqrt(((len(window1) - 1) * std1**2 + (len(window2) - 1) * std2**2) / 
                           (len(window1) + len(window2) - 2))
        
        if pooled_std == 0:
            return 0
        
        return abs(mean1 - mean2) / pooled_std

    def _test_multiple_statistics(self, window1: pd.DataFrame, window2: pd.DataFrame) -> Tuple[int, float]:
        """ทดสอบหลายวิธีเพื่อยืนยัน drift"""
        passed_tests = 0
        min_p_value = 1.0
        
        for col in window1.columns:
            col_tests = 0
            col_p_values = []
            
            # Test 1: Kolmogorov-Smirnov test
            try:
                stat, p_value = ks_2samp(window1[col], window2[col])
                col_p_values.append(p_value)
                if p_value < self.threshold:
                    col_tests += 1
            except:
                pass
            
            # Test 2: Mann-Whitney U test
            try:
                stat, p_value = mannwhitneyu(window1[col], window2[col], alternative='two-sided')
                col_p_values.append(p_value)
                if p_value < self.threshold:
                    col_tests += 1
            except:
                pass
            
            # Test 3: Effect size check
            effect_size = self._calculate_effect_size(window1[col], window2[col])
            if effect_size > self.min_effect_size:
                col_tests += 1
            
            if col_p_values:
                min_p_value = min(min_p_value, min(col_p_values))
            
            if col_tests >= self.confirmation_tests:
                passed_tests += 1
        
        return passed_tests, min_p_value

    def _check_stability_before_drift(self, X: pd.DataFrame, position: int) -> bool:
        """ตรวจสอบว่าช่วงก่อนหน้ามีเสถียรภาพหรือไม่"""
        if position < self.stability_window + self.window_size:
            return True
        
        stable_start = position - self.stability_window - self.window_size
        stable_end = position - self.window_size
        stable_window = X.iloc[stable_start:stable_end]
        
        mid_point = len(stable_window) // 2
        stable_part1 = stable_window.iloc[:mid_point]
        stable_part2 = stable_window.iloc[mid_point:]
        
        for col in X.columns:
            if len(stable_part1) > 0 and len(stable_part2) > 0:
                try:
                    stat, p_value = ks_2samp(stable_part1[col], stable_part2[col])
                    if p_value < self.threshold * 10:
                        return False
                except:
                    pass
        
        return True

    def _remove_pattern_drifts(self, drift_candidates: List[Tuple[int, float]]) -> List[int]:
        """กรองจุด drift ที่อาจเป็น pattern"""
        if len(drift_candidates) < 3:
            return [pos for pos, _ in drift_candidates]
        
        drift_candidates.sort(key=lambda x: x[0])
        
        intervals = []
        for i in range(1, len(drift_candidates)):
            interval = drift_candidates[i][0] - drift_candidates[i-1][0]
            intervals.append(interval)
        
        filtered_drifts = []
        if len(intervals) > 1:
            interval_std = np.std(intervals)
            interval_mean = np.mean(intervals)
            
            if interval_std / interval_mean < 0.3: 
                drift_candidates.sort(key=lambda x: x[1])
                keep_count = max(1, len(drift_candidates) // 3)
                filtered_drifts = [pos for pos, _ in drift_candidates[:keep_count]]
            else:
                filtered_drifts = [pos for pos, _ in drift_candidates]
        else:
            filtered_drifts = [pos for pos, _ in drift_candidates]
        
        final_drifts = []
        min_distance = self.window_size * 2
        
        for pos in sorted(filtered_drifts):
            if not final_drifts or pos - final_drifts[-1] >= min_distance:
                final_drifts.append(pos)
        
        return final_drifts

    def detect(self, X: pd.DataFrame) -> List[int]:
        self.drift_points_ = []
        n = len(X)
        drift_candidates = []
        
        for i in range(self.window_size, n - self.window_size, self.step_size):
            if not self._check_stability_before_drift(X, i):
                continue
            
            window1 = X.iloc[i - self.window_size:i]
            window2 = X.iloc[i:i + self.window_size]
            
            passed_tests, min_p_value = self._test_multiple_statistics(window1, window2)
            
            if passed_tests >= 1:
                drift_candidates.append((i, min_p_value))
        
        self.drift_points_ = self._remove_pattern_drifts(drift_candidates)
        
        return self.drift_points_

class AdaptiveFoldGenerator:
    """
    สร้าง train/test folds โดยแบ่งตาม drift points ที่ตรวจจับได้
    """
    def __init__(self, min_fold_size: int = 120, test_ratio: float = 0.2):
        self.min_fold_size = min_fold_size
        self.test_ratio = test_ratio

    def split(self, X: pd.DataFrame, drift_points: List[int]) -> List[Tuple[np.ndarray, np.ndarray]]:
        folds = []
        points = [0] + drift_points + [len(X)]
        
        for i in range(len(points) - 1):
            start, end = points[i], points[i + 1]
            fold_length = end - start

            if fold_length < self.min_fold_size:
                continue

            split = int(start + (1 - self.test_ratio) * fold_length)
            train_idx = np.arange(start, split)
            test_idx = np.arange(split, end)

            if len(train_idx) > 100 and len(test_idx) > 50:
                folds.append((train_idx, test_idx))
        
        return folds

class DriftAdaptiveTimeSeriesCV:
    """
    ทำ cross-validation โดยใช้ fold ที่แบ่งตาม drift points สำหรับ RNN models และ Linear Regression
    """
    def __init__(self, model_type: str = 'LSTM', model_params: dict = None):
        self.model_type = model_type.upper()
        self.model_params = model_params or {
            'sequence_length': 30,
            'units': 50,
            'dropout_rate': 0.3,
            'learning_rate': 0.001,
            'epochs': 50,
            'batch_size': 32,
            'verbose': 0
        }

    def run(self, X: pd.DataFrame, y: pd.Series, drift_points: List[int]) -> Tuple[List[float], List[float]]:
        fold_gen = AdaptiveFoldGenerator()
        metrics_rmse, metrics_mae = [], []

        folds = fold_gen.split(X, drift_points)
        if not folds:
            print("Warning: No valid folds generated by AdaptiveFoldGenerator!")
            return [], []

        for i, (train_idx, test_idx) in enumerate(folds):
            print(f"\n[Adaptive Fold {i+1}] Training {self.model_type}...")
            
            X_train, X_test = X.iloc[train_idx], X.iloc[test_idx]
            y_train, y_test = y.iloc[train_idx], y.iloc[test_idx]

            if self.model_type in ['RNN', 'LSTM', 'GRU']:
                model = RNNRegressor(model_type=self.model_type, **self.model_params)
            elif self.model_type == 'LINEAR':
                model = LinearRegressionModel(**self.model_params)
            else:
                raise ValueError(f"Unknown model type: {self.model_type}")
            
            try:
                model.fit(X_train, y_train)
                y_pred = model.predict(X_test)
                
                if self.model_type in ['RNN', 'LSTM', 'GRU']:
                    if len(y_pred) > 0:
                        y_test_aligned = y_test.iloc[model.seq_generator.sequence_length:]
                        y_test_aligned = y_test_aligned.iloc[:len(y_pred)]
                        
                        rmse = np.sqrt(mean_squared_error(y_test_aligned, y_pred))
                        mae = mean_absolute_error(y_test_aligned, y_pred)
                    else:
                        print(f"[Adaptive Fold {i+1}] No predictions generated (insufficient data)")
                        continue
                else:
                    rmse = np.sqrt(mean_squared_error(y_test, y_pred))
                    mae = mean_absolute_error(y_test, y_pred)
                
                print(f"[Adaptive Fold {i+1}] RMSE={rmse:.3f}, MAE={mae:.3f}")
                
                metrics_rmse.append(rmse)
                metrics_mae.append(mae)
                    
            except Exception as e:
                print(f"[Adaptive Fold {i+1}] Error: {e}")
                continue

        return metrics_rmse, metrics_mae

class BaselineTimeSeriesCV:
    """
    ทำ cross-validation แบบ TimeSeriesSplit ปกติ สำหรับ RNN models และ Linear Regression
    """
    def __init__(self, model_type: str = 'LSTM', model_params: dict = None, n_splits: int = 5):
        self.model_type = model_type.upper()
        self.model_params = model_params or {
            'sequence_length': 30,
            'units': 50,
            'dropout_rate': 0.3,
            'learning_rate': 0.001,
            'epochs': 50,
            'batch_size': 32,
            'verbose': 0
        }
        self.n_splits = n_splits

    def run(self, X: pd.DataFrame, y: pd.Series) -> Tuple[List[float], List[float]]:
        tscv = TimeSeriesSplit(n_splits=self.n_splits)
        metrics_rmse, metrics_mae = [], []

        for i, (train_idx, test_idx) in enumerate(tscv.split(X)):
            print(f"\n[Baseline Fold {i+1}] Training {self.model_type}...")
            
            X_train, X_test = X.iloc[train_idx], X.iloc[test_idx]
            y_train, y_test = y.iloc[train_idx], y.iloc[test_idx]

            if self.model_type in ['RNN', 'LSTM', 'GRU']:
                model = RNNRegressor(model_type=self.model_type, **self.model_params)
            elif self.model_type == 'LINEAR':
                model = LinearRegressionModel(**self.model_params)
            else:
                raise ValueError(f"Unknown model type: {self.model_type}")
            
            try:
                model.fit(X_train, y_train)
                y_pred = model.predict(X_test)
                
                if self.model_type in ['RNN', 'LSTM', 'GRU']:
                    if len(y_pred) > 0:
                        y_test_aligned = y_test.iloc[model.seq_generator.sequence_length:]
                        y_test_aligned = y_test_aligned.iloc[:len(y_pred)]
                        
                        rmse = np.sqrt(mean_squared_error(y_test_aligned, y_pred))
                        mae = mean_absolute_error(y_test_aligned, y_pred)
                    else:
                        print(f"[Baseline Fold {i+1}] No predictions generated (insufficient data)")
                        continue
                else:
                    rmse = np.sqrt(mean_squared_error(y_test, y_pred))
                    mae = mean_absolute_error(y_test, y_pred)
                
                print(f"[Baseline Fold {i+1}] RMSE={rmse:.3f}, MAE={mae:.3f}")
                
                metrics_rmse.append(rmse)
                metrics_mae.append(mae)
                    
            except Exception as e:
                print(f"[Baseline Fold {i+1}] Error: {e}")
                continue

        return metrics_rmse, metrics_mae

class EnhancedModelComparison:
    """
    เปรียบเทียบประสิทธิภาพของ RNN, LSTM, GRU และ Linear Regression พร้อม hyperparameter tuning
    """
    def __init__(self, enable_tuning: bool = True, tuning_max_combinations: int = 30):
        self.enable_tuning = enable_tuning
        self.tuning_max_combinations = tuning_max_combinations
        self.models = ['RNN', 'LSTM', 'GRU', 'LINEAR']
        self.optimized_params = {}
        self.tuning_summaries = {}
        
    def tune_hyperparameters(self, X: pd.DataFrame, y: pd.Series):
        """
        ทำ hyperparameter tuning สำหรับ RNN models ทั้งหมด
        """
        print("\n" + "="*80)
        print("STARTING HYPERPARAMETER TUNING")
        print("="*80)
        
        for model_type in ['RNN', 'LSTM', 'GRU']:
            print(f"\n[TUNING] Starting hyperparameter tuning for {model_type}...")
            
            tuner = HyperparameterTuner(model_type=model_type, n_splits=3)
            best_params, best_score = tuner.grid_search(X, y, max_combinations=self.tuning_max_combinations)
            
            self.optimized_params[model_type] = best_params
            self.tuning_summaries[model_type] = tuner.get_tuning_summary()
            
            print(f"[TUNING] Completed tuning for {model_type}")
        
        # Linear Regression ไม่ต้อง tune
        self.optimized_params['LINEAR'] = {'fit_intercept': True}
        
        print("\n" + "="*80)
        print("HYPERPARAMETER TUNING COMPLETED")
        print("="*80)
        
    def compare_models(self, X: pd.DataFrame, y: pd.Series, drift_points: List[int]):
        """
        เปรียบเทียบโมเดลทั้งหมดด้วย adaptive CV พร้อมการใช้ optimized parameters
        """
        # ทำ hyperparameter tuning ก่อนถ้าเปิดใช้งาน
        if self.enable_tuning:
            self.tune_hyperparameters(X, y)
        else:
            # ใช้ default parameters
            default_rnn_params = {
                'sequence_length': 30,
                'units': 50,
                'dropout_rate': 0.3,
                'learning_rate': 0.001,
                'epochs': 50,
                'batch_size': 32,
                'verbose': 0
            }
            for model_type in ['RNN', 'LSTM', 'GRU']:
                self.optimized_params[model_type] = default_rnn_params
            self.optimized_params['LINEAR'] = {'fit_intercept': True}
        
        results = {}
        
        for model_type in self.models:
            print(f"\n{'='*50}")
            print(f"Testing {model_type} Model with Optimized Parameters")
            print(f"{'='*50}")
            
            params = self.optimized_params[model_type]
            print(f"Using parameters: {params}")
            
            # Adaptive CV
            drift_cv = DriftAdaptiveTimeSeriesCV(model_type, params)
            drift_rmse, drift_mae = drift_cv.run(X, y, drift_points)
            
            # Baseline CV
            baseline_cv = BaselineTimeSeriesCV(model_type, params, n_splits=5)
            base_rmse, base_mae = baseline_cv.run(X, y)
            
            results[model_type] = {
                'adaptive_rmse': drift_rmse,
                'adaptive_mae': drift_mae,
                'baseline_rmse': base_rmse,
                'baseline_mae': base_mae,
                'optimized_params': params
            }
        
        return results
    
    def print_summary(self, results: dict):
        """
        พิมพ์สรุปผลลัพธ์การเปรียบเทียบพร้อมแสดง optimized parameters
        """
        print("\n" + "="*80)
        print("MODEL COMPARISON SUMMARY WITH OPTIMIZED PARAMETERS")
        print("="*80)
        
        # แสดงผลการ tuning ก่อน
        if self.enable_tuning:
            print("\n" + "="*60)
            print("HYPERPARAMETER TUNING RESULTS")
            print("="*60)
            
            for model_type in ['RNN', 'LSTM', 'GRU']:
                if model_type in self.tuning_summaries:
                    print(self.tuning_summaries[model_type])
        
        # แสดงผลการเปรียบเทียบโมเดล
        print("\n" + "="*60)
        print("MODEL PERFORMANCE COMPARISON")
        print("="*60)
        
        performance_summary = []
        
        for model_type in self.models:
            if model_type in results:
                print(f"\n{model_type} Results:")
                print("-" * 40)
                
                # แสดง optimized parameters
                print(f"Optimized Parameters: {results[model_type]['optimized_params']}")
                
                # Adaptive results
                if results[model_type]['adaptive_rmse'] and results[model_type]['adaptive_mae']:
                    avg_rmse = np.mean(results[model_type]['adaptive_rmse'])
                    avg_mae = np.mean(results[model_type]['adaptive_mae'])
                    print(f"Adaptive CV - Avg RMSE: {avg_rmse:.4f}, Avg MAE: {avg_mae:.4f}")
                    
                    performance_summary.append({
                        'model': model_type,
                        'adaptive_rmse': avg_rmse,
                        'adaptive_mae': avg_mae,
                        'params': results[model_type]['optimized_params']
                    })
                else:
                    print("Adaptive CV - No valid results")
                
                # Baseline results
                if results[model_type]['baseline_rmse'] and results[model_type]['baseline_mae']:
                    avg_rmse = np.mean(results[model_type]['baseline_rmse'])
                    avg_mae = np.mean(results[model_type]['baseline_mae'])
                    print(f"Baseline CV - Avg RMSE: {avg_rmse:.4f}, Avg MAE: {avg_mae:.4f}")
                else:
                    print("Baseline CV - No valid results")
        
        # แสดง ranking ของโมเดล
        if performance_summary:
            print("\n" + "="*60)
            print("MODEL RANKING (Based on Adaptive CV RMSE)")
            print("="*60)
            
            sorted_models = sorted(performance_summary, key=lambda x: x['adaptive_rmse'])
            
            for i, model_info in enumerate(sorted_models, 1):
                print(f"\n{i}. {model_info['model']}")
                print(f"   RMSE: {model_info['adaptive_rmse']:.4f}")
                print(f"   MAE: {model_info['adaptive_mae']:.4f}")
                print(f"   Best Parameters: {model_info['params']}")
        
        # หาโมเดลที่ดีที่สุด
        best_model_info = self._find_best_model_with_params(results)
        if best_model_info:
            print(f"\n" + "="*60)
            print(f"🏆 BEST MODEL: {best_model_info['model']}")
            print(f"   RMSE: {best_model_info['score']:.4f}")
            print(f"   Optimal Parameters: {best_model_info['params']}")
            print("="*60)
    
    def _find_best_model_with_params(self, results: dict):
        """
        หาโมเดลที่ดีที่สุดพร้อมแสดงพารามิเตอร์
        """
        best_model_info = None
        best_score = float('inf')
        
        for model_type in self.models:
            if model_type in results:
                # ใช้ adaptive RMSE เป็นเกณฑ์
                if results[model_type]['adaptive_rmse']:
                    avg_rmse = np.mean(results[model_type]['adaptive_rmse'])
                    if avg_rmse < best_score:
                        best_score = avg_rmse
                        best_model_info = {
                            'model': model_type,
                            'score': avg_rmse,
                            'params': results[model_type]['optimized_params']
                        }
                # ถ้าไม่มี adaptive results ใช้ baseline
                elif results[model_type]['baseline_rmse']:
                    avg_rmse = np.mean(results[model_type]['baseline_rmse'])
                    if avg_rmse < best_score:
                        best_score = avg_rmse
                        best_model_info = {
                            'model': model_type,
                            'score': avg_rmse,
                            'params': results[model_type]['optimized_params']
                        }
        
        return best_model_info
    
    def get_best_params_for_model(self, model_type: str):
        """
        ดึงพารามิเตอร์ที่ดีที่สุดสำหรับโมเดลที่ระบุ
        """
        if model_type.upper() in self.optimized_params:
            return self.optimized_params[model_type.upper()]
        else:
            return None

# ตัวอย่างการใช้งานที่ถูกต้องพร้อม hyperparameter tuning
if __name__ == "__main__":
    # 1) Detect drift points
    print("Starting Drift Detection...")
    detector = DriftPointDetector(
        window_size=120,
        threshold=0.001,  
        step_size=30,
        min_effect_size=0.3,
        stability_window=60,
        confirmation_tests=2
    )
    drift_points = detector.detect(X)
    print(f"Detected {len(drift_points)} drift points")
    
    # 2) เปรียบเทียบโมเดลทั้งหมดพร้อม hyperparameter tuning
    print("\nStarting Enhanced Model Comparison with Hyperparameter Tuning...")
    
    # เปิดใช้งาน hyperparameter tuning (ใช้เวลานานขึ้น แต่ได้ผลลัพธ์ที่ดีกว่า)
    enhanced_comparator = EnhancedModelComparison(
        enable_tuning=True, 
        tuning_max_combinations=25  # จำกัดจำนวนการทดสอบเพื่อประหยัดเวลา
    )
    
    # เปิดใช้งานด้านล่างนี้หากต้องการใช้ default parameters (เร็วกว่า)
    # enhanced_comparator = EnhancedModelComparison(enable_tuning=False)
    
    results = enhanced_comparator.compare_models(X, y, drift_points)
    enhanced_comparator.print_summary(results)
    
    # แสดงพารามิเตอร์ที่ดีที่สุดสำหรับแต่ละโมเดล
    print("\n" + "="*80)
    print("BEST PARAMETERS FOR EACH MODEL")
    print("="*80)
    
    for model_type in ['RNN', 'LSTM', 'GRU', 'LINEAR']:
        best_params = enhanced_comparator.get_best_params_for_model(model_type)
        if best_params:
            print(f"\n{model_type} Best Parameters:")
            for param, value in best_params.items():
                print(f"  {param}: {value}")

    # แปลง drift point index เป็นวันที่แบบ วัน/เดือน/ปี
    drift_dates_formatted = df.iloc[drift_points]['Date'].dt.strftime('%d/%m/%Y').tolist()
    print("\n" + "="*80)
    print("DRIFT DETECTION RESULTS")
    print("="*80)
    print(f"Detected Drift Points: {len(drift_points)}")
    print("Drift Dates:")
    for i, date in enumerate(drift_dates_formatted, 1):
        print(f"  {i}. {date}")
    print("="*80)

Starting Drift Detection...
Detected 8 drift points

Starting Enhanced Model Comparison with Hyperparameter Tuning...

STARTING HYPERPARAMETER TUNING

[TUNING] Starting hyperparameter tuning for RNN...

[Hyperparameter Tuning] Testing 25 parameter combinations for RNN...

[1/25] Testing: {'sequence_length': 45, 'learning_rate': 0.05, 'batch_size': 16, 'units': 32, 'dropout_rate': 0.4, 'epochs': 30}
New best score: 26.9411

[2/25] Testing: {'sequence_length': 20, 'learning_rate': 0.01, 'batch_size': 32, 'units': 32, 'dropout_rate': 0.4, 'epochs': 30}
New best score: 23.8188

[3/25] Testing: {'sequence_length': 20, 'learning_rate': 0.001, 'batch_size': 16, 'units': 64, 'dropout_rate': 0.4, 'epochs': 50}
New best score: 22.9561

[4/25] Testing: {'sequence_length': 30, 'learning_rate': 0.001, 'batch_size': 32, 'units': 50, 'dropout_rate': 0.2, 'epochs': 70}

[5/25] Testing: {'sequence_length': 30, 'learning_rate': 0.001, 'batch_size': 16, 'units': 32, 'dropout_rate': 0.4, 'epochs': 50}

[6