In [None]:
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from sklearn.metrics import f1_score, classification_report
from sklearn.utils import class_weight

# 1. Load Preprocessed Data
X_train = pd.read_csv('X_train_processed.csv')
y_train = pd.read_csv('y_train_processed.csv')
X_val = pd.read_csv('X_val_processed.csv')
y_val = pd.read_csv('y_val_processed.csv')
X_test = pd.read_csv('test_processed.csv')
test_ids = pd.read_csv('test_ids.csv')

# 2. Calculate Class Weights (Crucial for Imbalanced Data)
# This tells the model to pay more attention to the '1' class
weights = class_weight.compute_class_weight(
    class_weight='balanced',
    classes=np.unique(y_train['RiskFlag']),
    y=y_train['RiskFlag']
)
class_weight_dict = dict(enumerate(weights))
print(f"Class Weights: {class_weight_dict}")

# 3. Define Neural Network Architecture
def build_model(input_dim):
    model = keras.Sequential([
        # Input Layer
        layers.Dense(64, activation='relu', input_shape=[input_dim]),
        layers.BatchNormalization(),
        layers.Dropout(0.3), # Regularization to prevent overfitting
        
        # Hidden Layer
        layers.Dense(32, activation='relu'),
        layers.BatchNormalization(),
        layers.Dropout(0.3),
        
        # Output Layer (Sigmoid for binary classification)
        layers.Dense(1, activation='sigmoid')
    ])
    
    model.compile(
        optimizer='adam',
        loss='binary_crossentropy',
        metrics=['accuracy']
    )
    return model

model = build_model(X_train.shape[1])

# 4. Train the Model
early_stopping = keras.callbacks.EarlyStopping(
    patience=10, 
    restore_best_weights=True,
    monitor='val_loss'
)

print("Training model...")
history = model.fit(
    X_train, y_train,
    validation_data=(X_val, y_val),
    batch_size=512,
    epochs=50,
    callbacks=[early_stopping],
    class_weight=class_weight_dict,
    verbose=1
)

# 5. Optimize Threshold for F1 Score
# Standard threshold is 0.5, but shifting it often improves F1 in imbalanced data
val_probs = model.predict(X_val)
best_f1 = 0
best_thresh = 0.5

for thresh in np.arange(0.1, 0.9, 0.05):
    val_preds = (val_probs > thresh).astype(int)
    score = f1_score(y_val, val_preds)
    if score > best_f1:
        best_f1 = score
        best_thresh = thresh

print(f"Best Threshold: {best_thresh}")
print(f"Best Validation F1: {best_f1}")

# 6. Generate Submission
test_probs = model.predict(X_test)
test_preds = (test_probs > best_thresh).astype(int)

submission = pd.DataFrame({
    'ProfileID': test_ids['ProfileID'],
    'RiskFlag': test_preds.flatten()
})

submission.to_csv('submission_neural_network.csv', index=False)
print("Submission saved successfully.")

Class Weights: {0: np.float64(0.5657877426100444), 1: np.float64(4.300099989474792)}
Training model...
Epoch 1/100


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m320/320[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 7ms/step - accuracy: 0.6067 - loss: 0.6932 - val_accuracy: 0.6494 - val_loss: 0.6233
Epoch 2/100
[1m320/320[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 6ms/step - accuracy: 0.6544 - loss: 0.6183 - val_accuracy: 0.6660 - val_loss: 0.6055
Epoch 3/100
[1m320/320[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 6ms/step - accuracy: 0.6697 - loss: 0.6057 - val_accuracy: 0.6786 - val_loss: 0.5963
Epoch 4/100
[1m320/320[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 6ms/step - accuracy: 0.6777 - loss: 0.6013 - val_accuracy: 0.6813 - val_loss: 0.5956
Epoch 5/100
[1m320/320[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 6ms/step - accuracy: 0.6777 - loss: 0.5983 - val_accuracy: 0.6787 - val_loss: 0.5922
Epoch 6/100
[1m320/320[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 6ms/step - accuracy: 0.6793 - loss: 0.5969 - val_accuracy: 0.6819 - val_loss: 0.5910
Epoch 7/100
[1m320/320[0m [32m━

In [3]:
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from sklearn.metrics import f1_score
from sklearn.utils import class_weight
import optuna

# 1. Load Data
print("Loading data...")
X_train = pd.read_csv('X_train_processed.csv')
y_train = pd.read_csv('y_train_processed.csv')
X_val = pd.read_csv('X_val_processed.csv')
y_val = pd.read_csv('y_val_processed.csv')
X_test = pd.read_csv('test_processed.csv')
test_ids = pd.read_csv('test_ids.csv')

# 2. Compute Class Weights
weights = class_weight.compute_class_weight(
    class_weight='balanced',
    classes=np.unique(y_train['RiskFlag']),
    y=y_train['RiskFlag']
)
class_weight_dict = dict(enumerate(weights))

# 3. Define the Optuna Objective Function
def objective(trial):
    # --- Hyperparameters to Tune ---
    
    # Number of hidden layers (1 to 3)
    n_layers = trial.suggest_int('n_layers', 1, 3)
    
    model = keras.Sequential()
    model.add(layers.InputLayer(input_shape=(X_train.shape[1],)))
    
    for i in range(n_layers):
        # Number of neurons in this layer
        n_units = trial.suggest_int(f'n_units_l{i}', 32, 256)
        # Activation function
        activation = trial.suggest_categorical(f'activation_l{i}', ['relu', 'swish'])
        # Dropout rate
        dropout_rate = trial.suggest_float(f'dropout_l{i}', 0.1, 0.5)
        
        model.add(layers.Dense(n_units, activation=activation))
        model.add(layers.BatchNormalization())
        model.add(layers.Dropout(dropout_rate))
    
    # Output Layer
    model.add(layers.Dense(1, activation='sigmoid'))
    
    # Learning Rate
    lr = trial.suggest_float('learning_rate', 1e-4, 1e-2, log=True)
    
    model.compile(
        optimizer=keras.optimizers.Adam(learning_rate=lr),
        loss='binary_crossentropy',
        metrics=['accuracy']
    )
    
    # --- Training (Faster for Tuning) ---
    # We use a smaller patience here to speed up the search
    early_stopping = keras.callbacks.EarlyStopping(
        monitor='val_loss', patience=5, restore_best_weights=True
    )
    
    model.fit(
        X_train, y_train,
        validation_data=(X_val, y_val),
        epochs=20, # Keep epochs low for tuning trials
        batch_size=1024, # Larger batch size for speed
        callbacks=[early_stopping],
        class_weight=class_weight_dict,
        verbose=0
    )
    
    # --- Evaluation ---
    # We must find the best threshold for this specific trial model
    val_probs = model.predict(X_val, verbose=0)
    
    best_f1_trial = 0
    # Search for best threshold for this specific configuration
    for thresh in np.arange(0.3, 0.8, 0.05):
        preds = (val_probs > thresh).astype(int)
        score = f1_score(y_val, preds)
        if score > best_f1_trial:
            best_f1_trial = score
            
    return best_f1_trial

# 4. Run the Optimization
print("Starting Optuna optimization...")
study = optuna.create_study(direction='maximize')
study.optimize(objective, n_trials=15) # Increase n_trials (e.g., 50) for better results

print("\n------------------------------------------------")
print("Best Hyperparameters found:")
print(study.best_params)
print(f"Best Validation F1: {study.best_value}")
print("------------------------------------------------\n")

# 5. Retrain Final Model with Best Parameters
print("Retraining best model on full settings...")
best_params = study.best_params

final_model = keras.Sequential()
final_model.add(layers.InputLayer(input_shape=(X_train.shape[1],)))

for i in range(best_params['n_layers']):
    final_model.add(layers.Dense(
        best_params[f'n_units_l{i}'], 
        activation=best_params[f'activation_l{i}']
    ))
    final_model.add(layers.BatchNormalization())
    final_model.add(layers.Dropout(best_params[f'dropout_l{i}']))

final_model.add(layers.Dense(1, activation='sigmoid'))

final_model.compile(
    optimizer=keras.optimizers.Adam(learning_rate=best_params['learning_rate']),
    loss='binary_crossentropy',
    metrics=['accuracy']
)

# Train for longer now
early_stopping_final = keras.callbacks.EarlyStopping(
    monitor='val_loss', patience=12, restore_best_weights=True
)

final_model.fit(
    X_train, y_train,
    validation_data=(X_val, y_val),
    epochs=100, # More epochs for the final model
    batch_size=512,
    callbacks=[early_stopping_final],
    class_weight=class_weight_dict,
    verbose=1
)

# 6. Final Threshold Tuning & Submission
val_probs = final_model.predict(X_val)
best_thresh = 0.5
best_f1 = 0

for thresh in np.arange(0.2, 0.9, 0.01):
    preds = (val_probs > thresh).astype(int)
    score = f1_score(y_val, preds)
    if score > best_f1:
        best_f1 = score
        best_thresh = thresh

print(f"Final Best Threshold: {best_thresh}")
print(f"Final Validation F1: {best_f1}")

# Predict on Test
test_probs = final_model.predict(X_test)
test_preds = (test_probs > best_thresh).astype(int)

submission = pd.DataFrame({
    'ProfileID': test_ids['ProfileID'],
    'RiskFlag': test_preds.flatten()
})

submission.to_csv('submission_optuna_nn.csv', index=False)
print("Saved: submission_optuna_nn.csv")

  from .autonotebook import tqdm as notebook_tqdm


Loading data...


[I 2025-11-27 15:35:17,191] A new study created in memory with name: no-name-d05efe5f-2b62-4f41-81c3-e980d64ab485


Starting Optuna optimization...


[I 2025-11-27 15:35:27,035] Trial 0 finished with value: 0.3574620196604111 and parameters: {'n_layers': 1, 'n_units_l0': 81, 'activation_l0': 'relu', 'dropout_l0': 0.14954875264315076, 'learning_rate': 0.005138860819606371}. Best is trial 0 with value: 0.3574620196604111.
[I 2025-11-27 15:36:14,988] Trial 1 finished with value: 0.36357018054746654 and parameters: {'n_layers': 3, 'n_units_l0': 40, 'activation_l0': 'relu', 'dropout_l0': 0.2565649101756041, 'n_units_l1': 164, 'activation_l1': 'relu', 'dropout_l1': 0.46436062044423554, 'n_units_l2': 42, 'activation_l2': 'swish', 'dropout_l2': 0.49932418386929367, 'learning_rate': 0.0022020646344007833}. Best is trial 1 with value: 0.36357018054746654.
[I 2025-11-27 15:36:33,224] Trial 2 finished with value: 0.3536749831422792 and parameters: {'n_layers': 2, 'n_units_l0': 254, 'activation_l0': 'relu', 'dropout_l0': 0.2116607417097156, 'n_units_l1': 141, 'activation_l1': 'swish', 'dropout_l1': 0.3271720149355225, 'learning_rate': 0.00057870


------------------------------------------------
Best Hyperparameters found:
{'n_layers': 3, 'n_units_l0': 166, 'activation_l0': 'relu', 'dropout_l0': 0.36887637171147036, 'n_units_l1': 141, 'activation_l1': 'swish', 'dropout_l1': 0.17453414673573905, 'n_units_l2': 94, 'activation_l2': 'swish', 'dropout_l2': 0.49460188951906864, 'learning_rate': 0.0020909704067507326}
Best Validation F1: 0.3655300328149522
------------------------------------------------

Retraining best model on full settings...
Epoch 1/100
[1m320/320[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 15ms/step - accuracy: 0.6378 - loss: 0.6588 - val_accuracy: 0.6761 - val_loss: 0.6033
Epoch 2/100
[1m320/320[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 15ms/step - accuracy: 0.6777 - loss: 0.6008 - val_accuracy: 0.6700 - val_loss: 0.6134
Epoch 3/100
[1m320/320[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 15ms/step - accuracy: 0.6818 - loss: 0.5965 - val_accuracy: 0.6672 - val_loss: 0.6016
Epoch

In [4]:
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers, callbacks
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import accuracy_score

# 1. Load Data
train_df = pd.read_csv('train_updated.csv')
test_df = pd.read_csv('test_updated.csv')
test_ids = pd.read_csv('test_ids.csv')

# ---------------------------------------------------------
# 2. FEATURE ENGINEERING (Crucial for boosting score)
# ---------------------------------------------------------
def create_features(df):
    df = df.copy()
    # Ratio of Loan Amount to Income
    df['Loan_to_Income'] = df['RequestedSum'] / (df['AnnualEarnings'] + 1)
    
    # Monthly Burden approximation (Simple Interest assumption for feature)
    # (RequestedSum * (1 + Rate/100)) / Months
    total_repayment = df['RequestedSum'] * (1 + df['OfferRate'] / 100)
    df['Monthly_Burden'] = total_repayment / df['RepayPeriod']
    
    # Income per year of age (Career trajectory proxy)
    df['Income_per_Age'] = df['AnnualEarnings'] / (df['ApplicantYears'] + 1)
    
    # Credit worthiness interaction
    df['Trust_x_Accounts'] = df['TrustMetric'] * (df['ActiveAccounts'] + 1)
    
    return df

print("Creating features...")
train_df = create_features(train_df)
test_df = create_features(test_df)

# Separate Target
y = train_df['RiskFlag'].values
train_X_raw = train_df.drop(['RiskFlag', 'ProfileID'], axis=1)
test_X_raw = test_df.drop(['ProfileID'], axis=1)

# ---------------------------------------------------------
# 3. PREPROCESSING
# ---------------------------------------------------------
categorical_cols = train_X_raw.select_dtypes(include=['object']).columns
numerical_cols = train_X_raw.select_dtypes(exclude=['object']).columns

# Standard Scaling for Numerical, OneHot for Categorical
preprocessor = ColumnTransformer(
    transformers=[
        ('num', StandardScaler(), numerical_cols),
        ('cat', OneHotEncoder(handle_unknown='ignore', sparse_output=False), categorical_cols)
    ],
    verbose_feature_names_out=False
)

print("Preprocessing data...")
X = preprocessor.fit_transform(train_X_raw)
X_test = preprocessor.transform(test_X_raw)

# ---------------------------------------------------------
# 4. DEFINE MODEL ARCHITECTURE
# ---------------------------------------------------------
def get_model(input_dim):
    model = keras.Sequential([
        layers.InputLayer(input_shape=(input_dim,)),
        
        # Layer 1: Wide & Regularized
        layers.Dense(256, activation='swish'),
        layers.BatchNormalization(),
        layers.Dropout(0.3),
        
        # Layer 2
        layers.Dense(128, activation='swish'),
        layers.BatchNormalization(),
        layers.Dropout(0.3),
        
        # Layer 3
        layers.Dense(64, activation='swish'),
        layers.BatchNormalization(),
        layers.Dropout(0.2),
        
        # Output
        layers.Dense(1, activation='sigmoid')
    ])
    
    model.compile(
        optimizer=keras.optimizers.Adam(learning_rate=0.001),
        loss='binary_crossentropy',
        metrics=['accuracy']
    )
    return model

# ---------------------------------------------------------
# 5. STRATIFIED K-FOLD TRAINING (Ensembling)
# ---------------------------------------------------------
# Using 5 folds usually gives a 0.2% - 0.5% boost over a single split
k = 5
kf = StratifiedKFold(n_splits=k, shuffle=True, random_state=42)

test_predictions = np.zeros(len(X_test))
oof_predictions = np.zeros(len(X)) # Out-of-fold predictions
scores = []

print(f"\nStarting {k}-Fold Training...")

for fold, (train_idx, val_idx) in enumerate(kf.split(X, y)):
    print(f"\n--- Fold {fold+1} / {k} ---")
    
    X_train, X_val = X[train_idx], X[val_idx]
    y_train, y_val = y[train_idx], y[val_idx]
    
    model = get_model(X.shape[1])
    
    # Callbacks
    early_stopping = callbacks.EarlyStopping(
        monitor='val_accuracy', # Monitoring Accuracy as per your goal
        patience=8, 
        restore_best_weights=True,
        mode='max'
    )
    
    reduce_lr = callbacks.ReduceLROnPlateau(
        monitor='val_loss', 
        factor=0.5, 
        patience=3, 
        min_lr=1e-6
    )
    
    # Train
    # Note: NOT using class_weights to maximize Accuracy
    model.fit(
        X_train, y_train,
        validation_data=(X_val, y_val),
        epochs=60,
        batch_size=512,
        callbacks=[early_stopping, reduce_lr],
        verbose=0 
    )
    
    # Evaluate
    val_preds = model.predict(X_val).flatten()
    val_acc = accuracy_score(y_val, (val_preds > 0.5).astype(int))
    print(f"Fold {fold+1} Accuracy: {val_acc:.5f}")
    scores.append(val_acc)
    
    # Accumulate Test Predictions
    test_predictions += model.predict(X_test).flatten() / k

print(f"\nAverage Accuracy: {np.mean(scores):.5f}")

# ---------------------------------------------------------
# 6. SUBMISSION
# ---------------------------------------------------------
# Threshold 0.5 is standard for maximizing Accuracy on well-calibrated models
final_preds_binary = (test_predictions > 0.5).astype(int)

submission = pd.DataFrame({
    'ProfileID': test_ids['ProfileID'],
    'RiskFlag': final_preds_binary
})

submission.to_csv('submission_nn_ensemble_improved.csv', index=False)
print("File saved: submission_nn_ensemble_improved.csv")

Creating features...
Preprocessing data...

Starting 5-Fold Training...

--- Fold 1 / 5 ---




[1m1277/1277[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 3ms/step
Fold 1 Accuracy: 0.88704
[1m1596/1596[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 3ms/step

--- Fold 2 / 5 ---




[1m1277/1277[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 3ms/step
Fold 2 Accuracy: 0.88650
[1m1596/1596[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 2ms/step

--- Fold 3 / 5 ---




[1m1277/1277[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 3ms/step
Fold 3 Accuracy: 0.88694
[1m1596/1596[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 2ms/step

--- Fold 4 / 5 ---




[1m1277/1277[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 2ms/step
Fold 4 Accuracy: 0.88626
[1m1596/1596[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 2ms/step

--- Fold 5 / 5 ---




[1m1277/1277[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 2ms/step
Fold 5 Accuracy: 0.88604
[1m1596/1596[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 2ms/step

Average Accuracy: 0.88656
File saved: submission_nn_ensemble_improved.csv


In [7]:
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers, callbacks
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.model_selection import StratifiedKFold, train_test_split
from sklearn.metrics import accuracy_score
import optuna
import os

# Suppress TF logs
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'

# ==============================================================================
# 1. LOAD DATA & FEATURE ENGINEERING
# ==============================================================================
print("Loading Data...")
train_df = pd.read_csv('train_updated.csv')
test_df = pd.read_csv('test_updated.csv')

# Extract IDs for final submission
test_ids = test_df[['ProfileID']]

def create_features(df):
    """
    Creates new features based on financial and behavioral interactions.
    """
    df = df.copy()
    
    # Avoid division by zero by adding +1
    # 1. Loan to Income Ratio: Heavy loans relative to income are risky
    df['Loan_to_Income'] = df['RequestedSum'] / (df['AnnualEarnings'] + 1)
    
    # 2. Income Stability: Earnings per year of work duration
    df['Income_Stability'] = df['AnnualEarnings'] / (df['WorkDuration'] + 1)
    
    # 3. Monthly Burden: Estimated monthly payment vs repayment period
    # Assumption: Simple interest for feature creation purposes
    total_repay = df['RequestedSum'] * (1 + df['OfferRate'] / 100)
    df['Monthly_Burden'] = total_repay / df['RepayPeriod']
    
    # 4. Trust vs Accounts: Interaction between trust score and active accounts
    df['Trust_x_Accounts'] = df['TrustMetric'] * (df['ActiveAccounts'] + 1)
    
    return df

print("Engineering Features...")
train_df = create_features(train_df)
test_df = create_features(test_df)

# Separate Target and Features
y = train_df['RiskFlag'].values
train_X_raw = train_df.drop(['RiskFlag', 'ProfileID'], axis=1)
test_X_raw = test_df.drop(['ProfileID'], axis=1)

# ==============================================================================
# 2. PREPROCESSING (SCALING & ENCODING)
# ==============================================================================
print("Preprocessing Data...")

# Identify column types automatically
cat_cols = train_X_raw.select_dtypes(include=['object']).columns
num_cols = train_X_raw.select_dtypes(exclude=['object']).columns

# Create Transformer Pipeline
preprocessor = ColumnTransformer(
    transformers=[
        # Scale numerical features to 0-1 range (Standardization)
        ('num', StandardScaler(), num_cols),
        # One-Hot Encode categorical features (handle new categories gracefully)
        ('cat', OneHotEncoder(handle_unknown='ignore', sparse_output=False), cat_cols)
    ],
    verbose_feature_names_out=False
)

# Transform Data
X = preprocessor.fit_transform(train_X_raw)
X_test = preprocessor.transform(test_X_raw)

print(f"Processed Data Shape: {X.shape}")

# ==============================================================================
# 3. OPTUNA HYPERPARAMETER TUNING
# ==============================================================================
print("\n--- Starting Optuna Hyperparameter Tuning ---")

def objective(trial):
    """
    Objective function for Optuna to maximize Validation Accuracy.
    Optimizes: Layers, Neurons, Dropout, Learning Rate, and Class Weight.
    """
    # 1. Suggest Hyperparameters
    n_layers = trial.suggest_int('n_layers', 1, 3)
    dropout = trial.suggest_float('dropout', 0.1, 0.5)
    lr = trial.suggest_float('lr', 1e-4, 1e-2, log=True)
    
    # Critical: Search for the "Sweet Spot" class weight
    # 1.0 = No weighting, >1.0 = Focus more on Fraud class
    pos_weight = trial.suggest_float('pos_weight', 1.0, 4.0)
    
    # 2. Build Model
    model = keras.Sequential()
    model.add(layers.InputLayer(input_shape=(X.shape[1],)))
    
    for i in range(n_layers):
        # Tune neurons per layer
        units = trial.suggest_int(f'units_l{i}', 64, 512, step=64)
        # Tune activation function
        activation = trial.suggest_categorical(f'act_l{i}', ['relu', 'swish'])
        
        model.add(layers.Dense(units, activation=activation))
        model.add(layers.BatchNormalization())
        model.add(layers.Dropout(dropout))
        
    model.add(layers.Dense(1, activation='sigmoid'))
    
    # 3. Compile
    model.compile(
        optimizer=keras.optimizers.Adam(learning_rate=lr),
        loss='binary_crossentropy',
        metrics=['accuracy']
    )
    
    # 4. Train on a Split (Fast validation for tuning)
    X_tr, X_val, y_tr, y_val = train_test_split(X, y, test_size=0.2, stratify=y, random_state=42)
    
    # Pruning: Stops unpromising trials early to save time
    pruning_callback = optuna.integration.TFKerasPruningCallback(trial, 'val_accuracy')
    
    history = model.fit(
        X_tr, y_tr,
        validation_data=(X_val, y_val),
        epochs=15, 
        batch_size=1024,
        class_weight={0: 1.0, 1: pos_weight},
        callbacks=[pruning_callback],
        verbose=0
    )
    
    # Return best validation accuracy
    return max(history.history['val_accuracy'])

# Run the Study
study = optuna.create_study(direction='maximize', pruner=optuna.pruners.MedianPruner())
study.optimize(objective, n_trials=30) # Run 30 trials (increase to 50+ for better results)

print("\nBest Hyperparameters Found:")
print(study.best_params)

# ==============================================================================
# 4. FINAL ENSEMBLE TRAINING (STRATIFIED K-FOLD)
# ==============================================================================
print("\n--- Training Final Ensemble (Stratified K-Fold) ---")

# Retrieve best parameters
best_params = study.best_params
pos_weight_optimal = best_params.get('pos_weight', 1.0)
learning_rate_optimal = best_params.get('lr', 0.001)

def build_best_model():
    """Reconstructs the model using the winning hyperparameters."""
    model = keras.Sequential()
    model.add(layers.InputLayer(input_shape=(X.shape[1],)))
    
    for i in range(best_params['n_layers']):
        units = best_params[f'units_l{i}']
        activation = best_params[f'act_l{i}']
        dropout = best_params['dropout']
        
        model.add(layers.Dense(units, activation=activation))
        model.add(layers.BatchNormalization())
        model.add(layers.Dropout(dropout))
        
    model.add(layers.Dense(1, activation='sigmoid'))
    model.compile(optimizer=keras.optimizers.Adam(learning_rate=learning_rate_optimal),
                  loss='binary_crossentropy', metrics=['accuracy'])
    return model

# Setup 5-Fold Cross Validation
folds = 5
kf = StratifiedKFold(n_splits=folds, shuffle=True, random_state=42)

test_preds_accum = np.zeros(len(X_test))
cv_scores = []

for fold, (train_idx, val_idx) in enumerate(kf.split(X, y)):
    print(f"Training Fold {fold+1}/{folds}...")
    
    X_train_fold, X_val_fold = X[train_idx], X[val_idx]
    y_train_fold, y_val_fold = y[train_idx], y[val_idx]
    
    model = build_best_model()
    
    # Callbacks for final training
    early_stop = callbacks.EarlyStopping(monitor='val_accuracy', patience=10, restore_best_weights=True)
    reduce_lr = callbacks.ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=4)
    
    model.fit(
        X_train_fold, y_train_fold,
        validation_data=(X_val_fold, y_val_fold),
        epochs=100,
        batch_size=512,
        class_weight={0: 1.0, 1: pos_weight_optimal},
        callbacks=[early_stop, reduce_lr],
        verbose=0
    )
    
    # Evaluate
    val_probs = model.predict(X_val_fold, verbose=0)
    val_pred_binary = (val_probs > 0.5).astype(int)
    acc = accuracy_score(y_val_fold, val_pred_binary)
    cv_scores.append(acc)
    print(f"  > Fold {fold+1} Accuracy: {acc:.5f}")
    
    # Predict on Test Set (Add to accumulator)
    test_preds_accum += model.predict(X_test, verbose=0).flatten() / folds

print(f"\nAverage Ensemble Accuracy: {np.mean(cv_scores):.5f}")

# ==============================================================================
# 5. SUBMISSION GENERATION
# ==============================================================================
final_predictions = (test_preds_accum > 0.5).astype(int)

submission = pd.DataFrame({
    'ProfileID': test_ids['ProfileID'],
    'RiskFlag': final_predictions
})

filename = 'submission_final_ensemble.csv'
submission.to_csv(filename, index=False)
print(f"Done! Submission saved to '{filename}'")

Loading Data...
Engineering Features...
Preprocessing Data...


[I 2025-11-27 16:19:20,229] A new study created in memory with name: no-name-1a7e1e51-276f-41fb-abf8-091e103e8aa6


Processed Data Shape: (204277, 35)

--- Starting Optuna Hyperparameter Tuning ---


[I 2025-11-27 16:20:32,873] Trial 0 finished with value: 0.8862100839614868 and parameters: {'n_layers': 2, 'dropout': 0.26416284176820964, 'lr': 0.001204339050938255, 'pos_weight': 3.6057490363002174, 'units_l0': 320, 'act_l0': 'swish', 'units_l1': 256, 'act_l1': 'swish'}. Best is trial 0 with value: 0.8862100839614868.
[I 2025-11-27 16:21:07,037] Trial 1 finished with value: 0.8861122131347656 and parameters: {'n_layers': 1, 'dropout': 0.2466554072608799, 'lr': 0.0018769010906410377, 'pos_weight': 1.5679141901343998, 'units_l0': 256, 'act_l0': 'relu'}. Best is trial 0 with value: 0.8862100839614868.
[I 2025-11-27 16:22:26,896] Trial 2 finished with value: 0.8851086497306824 and parameters: {'n_layers': 3, 'dropout': 0.3663047618298919, 'lr': 0.0015843213254559988, 'pos_weight': 2.050945217264732, 'units_l0': 192, 'act_l0': 'relu', 'units_l1': 64, 'act_l1': 'swish', 'units_l2': 512, 'act_l2': 'swish'}. Best is trial 0 with value: 0.8862100839614868.
[I 2025-11-27 16:23:02,296] Trial 3


Best Hyperparameters Found:
{'n_layers': 2, 'dropout': 0.17596652935603746, 'lr': 0.00014122197921819941, 'pos_weight': 2.6105081009030013, 'units_l0': 512, 'act_l0': 'swish', 'units_l1': 448, 'act_l1': 'swish'}

--- Training Final Ensemble (Stratified K-Fold) ---
Training Fold 1/5...
  > Fold 1 Accuracy: 0.88364
Training Fold 2/5...




  > Fold 2 Accuracy: 0.86536
Training Fold 3/5...




  > Fold 3 Accuracy: 0.88552
Training Fold 4/5...




  > Fold 4 Accuracy: 0.88567
Training Fold 5/5...




  > Fold 5 Accuracy: 0.88327

Average Ensemble Accuracy: 0.88069
Done! Submission saved to 'submission_final_ensemble.csv'
