##### x_9,x_10,x_12,x_13 show data heavily compressed near zero, with long tails extending to extremely large values (confirming the 10 20 order seen in the initial inspection).

In [None]:
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from sklearn.model_selection import StratifiedKFold
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import roc_auc_score
import warnings

warnings.filterwarnings('ignore')

# --- 1. Configuration and Data Loading ---

# Check for GPU (CUDA/MPS)
DEVICE = torch.device('cuda' if torch.cuda.is_available() else 'mps' if torch.backends.mps.is_available() else 'cpu')
print(f"Using device: {DEVICE}")

try:
    df_train = pd.read_csv('train.csv')
except FileNotFoundError:
    print("Error: 'train.csv' not found.")
    exit()

# --- 2. PyTorch Components (MLP Architecture) ---

class TabularDataset(Dataset):
    def __init__(self, features, targets):
        self.features = torch.tensor(features, dtype=torch.float32)
        self.targets = torch.tensor(targets, dtype=torch.float32).unsqueeze(1)
    def __len__(self):
        return len(self.features)
    def __getitem__(self, idx):
        return self.features[idx], self.targets[idx]

class MLP(nn.Module):
    def __init__(self, input_size):
        super(MLP, self).__init__()
        self.layer_stack = nn.Sequential(
            nn.Linear(input_size, 128), nn.ReLU(), nn.Dropout(0.3),
            nn.Linear(128, 64), nn.ReLU(), nn.Dropout(0.3),
            nn.Linear(64, 32), nn.ReLU(),
            nn.Linear(32, 1)
        )
    def forward(self, x):
        return torch.sigmoid(self.layer_stack(x))

# --- 3. Preprocessing Pipeline ---

def preprocess_data(df):
    """Handles cleaning, feature engineering, scaling, and returns arrays."""
    df_clean = df.copy()
    df_clean['Y'] = df_clean['Y'].astype(int)
    df_clean.replace([np.inf, -np.inf], np.nan, inplace=True)
    X = df_clean.drop(['id', 'Y'], axis=1)

    # Feature Engineering
    cols_with_nan = X.columns[X.isnull().sum() > 0].tolist()
    for col in cols_with_nan:
        X[f'{col}_nan'] = X[col].isnull().astype(int)
    log_transform_cols = ['x_9', 'x_10', 'x_12', 'x_13']
    for col in log_transform_cols:
        X[col] = np.log1p(X[col].abs()) 

    # Imputation
    median_values = X.median()
    X_imputed = X.fillna(median_values)

    # Scaling (MANDATORY for NNs)
    scaler = StandardScaler()
    X_scaled = scaler.fit_transform(X_imputed)
    
    return X_scaled, df_clean['Y'].values, scaler

# --- 4. Training and Saving Logic ---

def train_and_save_model(X, y, scaler, epochs=100, batch_size=64, learning_rate=1e-4):
    """Performs CV and saves the model with the highest validation AUC."""
    skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)
    overall_best_auc = 0
    
    for fold, (train_index, val_index) in enumerate(skf.split(X, y)):
        print(f"\n--- Starting Fold {fold+1}/5 ---")
        
        X_train, X_val = X[train_index], X[val_index]
        y_train, y_val = y[train_index], y[val_index]

        train_dataset = TabularDataset(X_train, y_train)
        val_dataset = TabularDataset(X_val, y_val)
        
        train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
        val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)

        # Initialize model for THIS fold
        model = MLP(X.shape[1]).to(DEVICE)
        criterion = nn.BCELoss()
        optimizer = optim.Adam(model.parameters(), lr=learning_rate)

        fold_best_auc = 0
        patience_counter = 0
        
        for epoch in range(epochs):
            # Training
            model.train()
            for X_batch, y_batch in train_loader:
                X_batch, y_batch = X_batch.to(DEVICE), y_batch.to(DEVICE)
                optimizer.zero_grad()
                loss = criterion(model(X_batch), y_batch)
                loss.backward()
                optimizer.step()

            # Validation
            model.eval()
            val_targets, val_predictions = [], []
            with torch.no_grad():
                for X_batch, y_batch in val_loader:
                    X_batch = X_batch.to(DEVICE)
                    outputs = model(X_batch)
                    val_predictions.extend(outputs.cpu().numpy().flatten())
                    val_targets.extend(y_batch.cpu().numpy().flatten())

            val_auc = roc_auc_score(val_targets, val_predictions)
            
            # --- MODEL SAVING LOGIC ---
            if val_auc > fold_best_auc:
                fold_best_auc = val_auc
                patience_counter = 0
                
                # Check if this fold's best score is the overall best
                if fold_best_auc > overall_best_auc:
                    overall_best_auc = fold_best_auc
                    # **THIS IS WHERE THE MODEL IS SAVED**
                    torch.save(model.state_dict(), 'best_mlp_model.pth')
                    print(f"  -> NEW GLOBAL BEST AUC: {overall_best_auc:.4f} (Saved model state)")
            else:
                patience_counter += 1
                if patience_counter >= 15: # Early stopping patience
                    print(f"  -> Early stopping at epoch {epoch+1}. Best AUC: {fold_best_auc:.4f}")
                    break
        
    print("\n====================================")
    print(f"Training Complete. FINAL BEST AUC-ROC saved: {overall_best_auc:.4f}")
    print("====================================")
    
    return overall_best_auc, scaler

# --- Main Execution ---
if __name__ == '__main__':
    # 1. Preprocess Data
    X_scaled, y, scaler = preprocess_data(df_train)
    
    # 2. Train and Save Model
    # This function will generate 'best_mlp_model.pth'
    train_and_save_model(X_scaled, y, scaler)

Using device: cuda

--- Starting Fold 1/5 ---
  -> NEW GLOBAL BEST AUC: 0.7699 (Saved model state)
  -> NEW GLOBAL BEST AUC: 0.7784 (Saved model state)
  -> NEW GLOBAL BEST AUC: 0.7807 (Saved model state)
  -> NEW GLOBAL BEST AUC: 0.7827 (Saved model state)
  -> NEW GLOBAL BEST AUC: 0.7841 (Saved model state)
  -> NEW GLOBAL BEST AUC: 0.7851 (Saved model state)
  -> NEW GLOBAL BEST AUC: 0.7855 (Saved model state)
  -> NEW GLOBAL BEST AUC: 0.7862 (Saved model state)
  -> NEW GLOBAL BEST AUC: 0.7867 (Saved model state)
  -> NEW GLOBAL BEST AUC: 0.7869 (Saved model state)
  -> NEW GLOBAL BEST AUC: 0.7873 (Saved model state)
  -> NEW GLOBAL BEST AUC: 0.7879 (Saved model state)
  -> NEW GLOBAL BEST AUC: 0.7880 (Saved model state)
  -> NEW GLOBAL BEST AUC: 0.7881 (Saved model state)
  -> NEW GLOBAL BEST AUC: 0.7886 (Saved model state)
  -> NEW GLOBAL BEST AUC: 0.7888 (Saved model state)
  -> NEW GLOBAL BEST AUC: 0.7889 (Saved model state)
  -> NEW GLOBAL BEST AUC: 0.7892 (Saved model state)
