In [None]:
import pandas as pd
import numpy as np
from sklearn.ensemble import RandomForestClassifier, ExtraTreesClassifier
from sklearn.model_selection import GridSearchCV, cross_val_score
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau
from tensorflow.keras.regularizers import l2
import lightgbm as lgb
import xgboost as xgb
from catboost import CatBoostClassifier

def build_lstm_model(input_shape):
    """
    Build LSTM model with regularization
    """
    model = Sequential([
        LSTM(200, use_bias=True, return_sequences=True, input_shape=input_shape, kernel_regularizer=l2(0.01)),
        Dropout(0.3),
        LSTM(250, use_bias=True, return_sequences=True, kernel_regularizer=l2(0.01)),
        Dropout(0.4),
        LSTM(150, use_bias=True, return_sequences=True, kernel_regularizer=l2(0.01)),
        Dropout(0.3),
        LSTM(100, return_sequences=False, kernel_regularizer=l2(0.01)),
        Dropout(0.3),
        Dense(50, kernel_regularizer=l2(0.01)),
        Dense(1, activation='sigmoid')
    ])
    model.compile(
        optimizer=Adam(learning_rate=0.0005),
        loss='binary_crossentropy',
        metrics=['accuracy']
    )
    return model

def train_lstm_model(X_lstm_train, y_lstm_train):
    """
    Train LSTM model
    """
    print("\n= LSTM Model Training =")
    lstm_model = build_lstm_model((X_lstm_train.shape[1], X_lstm_train.shape[2]))
    print(f"LSTM Architecture:")
    lstm_model.summary()
    early_stopping = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)
    lr_scheduler = ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=10, min_lr=1e-6)
    history = lstm_model.fit(
        X_lstm_train, y_lstm_train,
        epochs=100,
        batch_size=32,
        validation_split=0.2,
        callbacks=[early_stopping, lr_scheduler],
        verbose=1
    )
    return lstm_model, history

def train_random_forest(X_train, y_train):
    """
    Train Random Forest model with GridSearchCV
    """
    print("\n= Random Forest Model Training =")
    param_grid_rf = {
        'n_estimators': [50, 100, 200, 300],
        'max_depth': [5, 10, 20, 50],
        'min_samples_split': [2, 5, 10, 20]
    }
    grid_search_rf = GridSearchCV(
        RandomForestClassifier(n_estimators=50, random_state=42, class_weight='balanced'),
        param_grid_rf, cv=5, scoring='f1', n_jobs=-1
    )
    grid_search_rf.fit(X_train, y_train)
    rf_model = grid_search_rf.best_estimator_
    print(f"Best RF parameters: {grid_search_rf.best_params_}")
    rf_scores = cross_val_score(rf_model, X_train, y_train, cv=5, scoring='accuracy')
    print(f"Random Forest CV Accuracy: {rf_scores.mean():.3f} ± {rf_scores.std():.3f}")
    return rf_model

def train_lightgbm(X_train, y_train):
    """
    Train LightGBM model with GridSearchCV
    """
    print("\n= LightGBM Model Training =")
    lgb_model = lgb.LGBMClassifier(random_state=42, class_weight='balanced')
    param_grid_lgb = {
        'n_estimators': [50, 100, 200],
        'max_depth': [5, 10, 20],
        'learning_rate': [0.01, 0.1, 0.2]
    }
    grid_search_lgb = GridSearchCV(lgb_model, param_grid_lgb, cv=5, scoring='f1', n_jobs=-1)
    grid_search_lgb.fit(X_train, y_train)
    lgb_model = grid_search_lgb.best_estimator_
    print(f"Best LightGBM parameters: {grid_search_lgb.best_params_}")
    lgb_scores = cross_val_score(lgb_model, X_train, y_train, cv=5, scoring='accuracy')
    print(f"LightGBM CV Accuracy: {lgb_scores.mean():.3f} ± {lgb_scores.std():.3f}")
    return lgb_model

def train_xgboost(X_train, y_train):
    """
    Train XGBoost model with GridSearchCV
    """
    print("\n= XGBoost Model Training =")
    xgb_model = xgb.XGBClassifier(random_state=42, scale_pos_weight=sum(y_train==0)/sum(y_train==1))
    param_grid_xgb = {
        'n_estimators': [50, 100, 200],
        'max_depth': [5, 10, 20],
        'learning_rate': [0.01, 0.1, 0.2]
    }
    grid_search_xgb = GridSearchCV(xgb_model, param_grid_xgb, cv=5, scoring='f1', n_jobs=-1)
    grid_search_xgb.fit(X_train, y_train)
    xgb_model = grid_search_xgb.best_estimator_
    print(f"Best XGBoost parameters: {grid_search_xgb.best_params_}")
    xgb_scores = cross_val_score(xgb_model, X_train, y_train, cv=5, scoring='accuracy')
    print(f"XGBoost CV Accuracy: {xgb_scores.mean():.3f} ± {xgb_scores.std():.3f}")
    return xgb_model

def build_ann_model(input_dim):
    """
    Build ANN model
    """
    model = Sequential([
        Dense(128, activation='relu', input_dim=input_dim, use_bias=True),
        Dropout(0.3),
        Dense(64, activation='relu', use_bias=True),
        Dropout(0.3),
        Dense(75, activation='relu', use_bias=True),
        Dropout(0.3),
        Dense(32, activation='relu'),
        Dense(1, activation='sigmoid')
    ])
    model.compile(
        optimizer=Adam(learning_rate=0.001),
        loss='binary_crossentropy',
        metrics=['accuracy']
    )
    return model

def train_ann_model(X_train, y_train):
    """
    Train ANN model
    """
    print("\n= ANN Model Training =")
    ann_model = build_ann_model(X_train.shape[1])
    early_stopping = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)
    lr_scheduler = ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=10, min_lr=1e-6)
    ann_history = ann_model.fit(
        X_train, y_train,
        epochs=50,
        batch_size=64,
        validation_split=0.2,
        callbacks=[early_stopping, lr_scheduler],
        verbose=1
    )
    return ann_model, ann_history

def train_catboost(X_train, y_train):
    """
    Train CatBoost model with GridSearchCV
    """
    print("\n= CatBoost Model Training =")
    cat_model = CatBoostClassifier(random_state=42, verbose=0)
    param_grid_cat = {
        'iterations': [100, 200],
        'depth': [4, 6, 8],
        'learning_rate': [0.01, 0.1, 0.2]
    }
    grid_search_cat = GridSearchCV(cat_model, param_grid_cat, cv=5, scoring='f1', n_jobs=-1)
    grid_search_cat.fit(X_train, y_train)
    cat_model = grid_search_cat.best_estimator_
    print(f"Best CatBoost parameters: {grid_search_cat.best_params_}")
    cat_scores = cross_val_score(cat_model, X_train, y_train, cv=5, scoring='accuracy')
    print(f"CatBoost CV Accuracy: {cat_scores.mean():.3f} ± {cat_scores.std():.3f}")
    return cat_model

def train_extra_trees(X_train, y_train):
    """
    Train Extra Trees model with GridSearchCV
    """
    print("\n= Extra Trees Model Training =")
    et_model = ExtraTreesClassifier(random_state=42, class_weight='balanced')
    param_grid_et = {
        'n_estimators': [50, 100, 200],
        'max_depth': [5, 10, 20],
        'min_samples_split': [2, 5, 10]
    }
    grid_search_et = GridSearchCV(et_model, param_grid_et, cv=5, scoring='f1', n_jobs=-1)
    grid_search_et.fit(X_train, y_train)
    et_model = grid_search_et.best_estimator_
    print(f"Best Extra Trees parameters: {grid_search_et.best_params_}")
    et_scores = cross_val_score(et_model, X_train, y_train, cv=5, scoring='accuracy')
    print(f"Extra Trees CV Accuracy: {et_scores.mean():.3f} ± {et_scores.std():.3f}")
    return et_model