In [None]:
# Import necessary libraries for data analysis and visualization
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

# Configure visualization settings
sns.set_context("notebook", font_scale=1.5)

# Load the dataset
#file_path = 'C:/Users/pc/Desktop/politecnico/b-FINTECH/business cases/Fintech_BC/BC4/data/Dataset4_EWS.xlsx'
file_path = './data/Dataset4_EWS.xlsx'



## Real-World dataset

From Bloomberg, consisting of weekly observations of:

- Market and macroeconomic indicators (e.g., indices, rates).
- A response variable `Y` indicating **anomalous periods** (e.g., market stress events).

In [None]:
# Set visualization style
sns.set_theme(style="whitegrid")
plt.rcParams['figure.figsize'] = [10, 6]

# Load the data from the Excel file
# First sheet contains market data with dates and anomaly labels, second sheet contains metadata
data_df = pd.read_excel(file_path, sheet_name='Markets')
metadata_df = pd.read_excel(file_path, sheet_name='Metadata')

# Check the structure of the loaded data
print("Data columns:", data_df.columns.tolist())

# Extract date and anomaly label columns
date_col = 'Date' if 'Date' in data_df.columns else data_df.columns[0]
y_col = 'Y' if 'Y' in data_df.columns else None

# Convert dates to datetime format
data_df[date_col] = pd.to_datetime(data_df[date_col], dayfirst=True)  # Date format is dd/mm/yy

# Set date as index
data_df = data_df.set_index(date_col)

# Extract features (all columns except Y if it exists)
if y_col:
    X_df = data_df.drop(y_col, axis=1)
    y = data_df[y_col].values
else:
    X_df = data_df
    y = None

# Display basic information about the dataset
print(f"Data shape: {X_df.shape}")
print(f"Total number of records: {len(X_df)}")
print(f"Time period: from {X_df.index.min().strftime('%m/%d/%Y')} to {X_df.index.max().strftime('%m/%d/%Y')}")
print(f"Frequency: {pd.infer_freq(X_df.index) or 'Weekly'}")
print(f"Number of variables: {X_df.shape[1]}")
if y_col:
    print(f"Number of anomalies: {np.sum(y == 1)} ({np.mean(y == 1)*100:.2f}%)")

# Create a more comprehensive metadata table with additional statistics
enhanced_metadata = []

# Determine the correct column names for ticker and description
ticker_col = 'ticker' if 'ticker' in metadata_df.columns else metadata_df.columns[0]
desc_col = 'description' if 'description' in metadata_df.columns else metadata_df.columns[1] if len(metadata_df.columns) > 1 else ticker_col

for ticker in X_df.columns:
    # Get metadata for this ticker if available
    meta_row = metadata_df[metadata_df[ticker_col] == ticker] if ticker in metadata_df[ticker_col].values else pd.DataFrame()

    # Get description or use ticker if not found
    description = meta_row[desc_col].values[0] if not meta_row.empty and desc_col in meta_row.columns else ticker

    # Calculate statistics for this series
    series = X_df[ticker]

    enhanced_metadata.append({
        'Ticker': ticker,
        'Description': description,
        'Mean': series.mean(),
        'Std.Dev': series.std(),
        'Min': series.min(),
        'Max': series.max(),
        'Missing values': series.isna().sum(),
        'Missing (%)': f"{series.isna().mean()*100:.2f}%"
    })

# Create enhanced metadata dataframe
enhanced_meta_df = pd.DataFrame(enhanced_metadata)

# Display the enhanced metadata
print("\nMetadata and statistics:")
display(enhanced_meta_df)


# Mixture model of Gaussian distributions
- The data is assumed to be generated from a mixture of Gaussian distributions.

In [None]:
from sklearn.utils import shuffle
from sklearn.model_selection import train_test_split

# Make data stationary based on variable type
# So w define lists of variables by type
indices_currencies = [col for col in X_df.columns if col in [
    'XAUBGNL', 'BDIY', 'CRY', 'Cl1', 'DXY', 'EMUSTRUU', 'GBP', 'JPY', 'LF94TRUU',
    'LF98TRUU', 'LG30TRUU', 'LMBITR', 'LP01TREU', 'LUACTRUU', 'LUMSTRUU',
    'MXBR', 'MXCN', 'MXEU', 'MXIN', 'MXJP', 'MXRU', 'MXUS', 'VIX'
]]

interest_rates = [col for col in X_df.columns if col in [
    'EONIA', 'GTDEM10Y', 'GTDEM2Y', 'GTDEM30Y', 'GTGBP20Y', 'GTGBP2Y', 'GTGBP30Y',
    'GTITL10YR', 'GTITL2YR', 'GTITL30YR', 'GTJPY10YR', 'GTJPY2YR', 'GTJPY30YR',
    'US0001M', 'USGG3M', 'USGG2YR', 'GT10', 'USGG30YR'
]]

# Create a new dataframe for stationary data
stationary_df = pd.DataFrame(index=X_df.index[1:])

# Apply log-differences to indices and currencies (always positive)
for col in indices_currencies:
    if col in X_df.columns:
        stationary_df[col] = np.diff(np.log(X_df[col]))

# Apply first differences to interest rates (can be negative or very close to 0)
for col in interest_rates:
    if col in X_df.columns:
        stationary_df[col] = np.diff(X_df[col])

# Keep Bloomberg Economic US Surprise Index as is (already stationary)
if 'ECSURPUS' in X_df.columns:
    stationary_df['ECSURPUS'] = X_df['ECSURPUS'].values[1:]

# Adjust the response variable to match the new data length
if y is not None:
    y_stationary = y[1:]
else:
    y_stationary = None

# Convert to numpy arrays for easier manipulation
X = stationary_df.values
y = y_stationary

# Reshuffle the data (this will break down autocorrelation)
X_shuffled, y_shuffled = shuffle(X, y, random_state=42)

# Separate normal and anomalous examples
X_normal = X_shuffled[y_shuffled == 0]
X_anomaly = X_shuffled[y_shuffled == 1]

# Training and testing data
X_train, X_test, y_train, y_test = train_test_split(X_shuffled, y_shuffled, test_size=0.2, random_state=42)

print(f"Training data shape: {X_train.shape}")
print(f"Testing data shape: {X_test.shape}")


# Plot differences 
- The data is plotted to visualize the differences between the two classes.

As we can see there isn't a clear separation between the two classes differences. 

In [None]:
plt.figure(figsize=(20, 40))
# Calculate the number of rows and columns for the subplot grid
num_cols = 4
num_rows = -(-len(stationary_df.columns) // num_cols)  # Ceiling division

# For each column in the stationary dataframe, plot separate histograms for y=0 and y=1
for i, col in enumerate(stationary_df.columns):
    if col != 'y':  # Skip the target column itself
        plt.subplot(num_rows, num_cols, i + 1)
        
        # Class 0 (blue)
        plt.hist(stationary_df[y_stationary == 0][col].dropna(), 
                 bins=30, color='blue', alpha=0.5, label='y=0')
        
        # Class 1 (red)
        plt.hist(stationary_df[y_stationary == 1][col].dropna(), 
                 bins=30, color='red', alpha=0.5, label='y=1')
        
        plt.title(col)
        plt.xlabel('Value')
        plt.ylabel('Frequency')
        plt.legend()

plt.tight_layout()  # Adjust layout to prevent overlap
plt.show()

In [None]:
print("X_df shape", X_df.shape)

plt.figure(figsize=(20, 40))
# Calculate the number of rows and columns for the subplot grid
num_cols = 4
num_rows = -(-len(X_df.columns) // num_cols)  # Ceiling division
y = data_df[y_col].values
X_df = data_df.drop(y_col, axis=1)


# For each column in the stationary dataframe, plot separate histograms for y=0 and y=1
for i, col in enumerate(X_df.columns):
    if col != 'y':  # Skip the target column itself
        plt.subplot(num_rows, num_cols, i + 1)
        
        # Class 0 (blue)
        plt.hist(X_df[y == 0][col].dropna(), 
                 bins=30, color='blue', alpha=0.5, label='y=0')
        
        # Class 1 (red)
        plt.hist(X_df[y == 1][col].dropna(), 
                 bins=30, color='red', alpha=0.5, label='y=1')
        
        plt.title(col)
        plt.xlabel('Value')
        plt.ylabel('Frequency')
        plt.legend()

plt.tight_layout()  # Adjust layout to prevent overlap
plt.show()

In [None]:
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import TensorDataset, DataLoader
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, roc_curve, auc, confusion_matrix, precision_score, recall_score, f1_score
from scipy import stats
import pandas as pd

import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import TensorDataset, DataLoader
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, roc_curve, auc, confusion_matrix, precision_score, recall_score, f1_score
from scipy import stats
import pandas as pd

class BayesianLayer(nn.Module):
    def __init__(self, in_features, out_features, dropout_prob=0.1, activation='gelu', use_batch_norm=True):
        super(BayesianLayer, self).__init__()
        self.linear = nn.Linear(in_features, out_features)
        self.dropout = nn.Dropout(dropout_prob)
        self.use_batch_norm = use_batch_norm
        
        if use_batch_norm:
            self.batch_norm = nn.BatchNorm1d(out_features)
        
        # Activation function selection
        if activation == 'relu':
            self.activation = F.relu
        elif activation == 'leaky_relu':
            self.activation = F.leaky_relu
        elif activation == 'selu':
            self.activation = F.selu
        elif activation == 'gelu':
            self.activation = F.gelu
        elif activation == 'swish':
            self.activation = lambda x: x * torch.sigmoid(x)
        else:
            self.activation = F.gelu  # Default 
            
        # Initialize weights with He initialization for better gradient flow
        nn.init.kaiming_uniform_(self.linear.weight)
        
    def forward(self, x):
        x = self.linear(x)
        if self.use_batch_norm:
            if len(x.shape) == 2:
                x = self.batch_norm(x)
            else:
                # Handle case when batch size is 1 during inference
                x = x.unsqueeze(0) if len(x.shape) == 1 else x
                x = self.batch_norm(x)
                x = x.squeeze(0) if len(x.shape) == 3 else x
        x = self.activation(x)
        x = self.dropout(x)
        return x

# Residual block for better gradient flow
class ResidualBlock(nn.Module):
    def __init__(self, features, dropout_prob=0.1, activation='gelu', use_batch_norm=True):
        super(ResidualBlock, self).__init__()
        self.layer1 = BayesianLayer(features, features, dropout_prob, activation, use_batch_norm)
        self.layer2 = BayesianLayer(features, features, dropout_prob, activation, use_batch_norm)
        
    def forward(self, x):
        residual = x
        out = self.layer1(x)
        out = self.layer2(out)
        out += residual  # Skip connection
        return out

# Improved Bayesian Neural Network with Residual Connections
class BayesianNN(nn.Module):
    def __init__(self, input_dim, hidden_dims=[32, 64, 64, 32], dropout_probs=[0.1, 0.2, 0.2, 0.1], 
                 activation='gelu', use_batch_norm=True, use_residual=True):
        super(BayesianNN, self).__init__()
        
        self.use_residual = use_residual
        self.layers = nn.ModuleList()
        
        # Input layer
        self.layers.append(BayesianLayer(input_dim, hidden_dims[0], 
                                        dropout_probs[0], activation, use_batch_norm))
        
        # Hidden layers with potential residual connections
        for i in range(1, len(hidden_dims)):
            if use_residual and hidden_dims[i] == hidden_dims[i-1]:
                self.layers.append(ResidualBlock(hidden_dims[i-1], dropout_probs[i], 
                                                activation, use_batch_norm))
            else:
                self.layers.append(BayesianLayer(hidden_dims[i-1], hidden_dims[i], 
                                                dropout_probs[i], activation, use_batch_norm))
        
        # Output layer
        self.output_layer = nn.Linear(hidden_dims[-1], 1)
        
    def forward(self, x):
        for layer in self.layers:
            x = layer(x)
        x = torch.sigmoid(self.output_layer(x))
        return x
    
    def predict_with_uncertainty(self, x, num_samples=100):
        # Enable dropout for prediction to simulate MC Dropout
        self.train()
        
        predictions = []
        for _ in range(num_samples):
            with torch.no_grad():
                pred = self(x)
                predictions.append(pred)
        
        # Stack all predictions
        stacked_preds = torch.stack(predictions, dim=1)
        
        # Calculate mean and standard deviation over samples
        mean_pred = torch.mean(stacked_preds, dim=1, keepdim=True)
        std_pred = torch.std(stacked_preds, dim=1, keepdim=True)
        
        return mean_pred, std_pred, stacked_preds

# Learning rate scheduler
class CosineWarmupScheduler(optim.lr_scheduler._LRScheduler):
    def __init__(self, optimizer, warmup_epochs, max_epochs, eta_min=0, last_epoch=-1):
        self.warmup_epochs = warmup_epochs
        self.max_epochs = max_epochs
        self.eta_min = eta_min
        super(CosineWarmupScheduler, self).__init__(optimizer, last_epoch)
        
    def get_lr(self):
        if self.last_epoch < self.warmup_epochs:
            # Linear warmup
            alpha = self.last_epoch / self.warmup_epochs
            return [base_lr * alpha for base_lr in self.base_lrs]
        else:
            # Cosine annealing
            progress = (self.last_epoch - self.warmup_epochs) / (self.max_epochs - self.warmup_epochs)
            cosine_decay = 0.5 * (1 + np.cos(np.pi * progress))
            return [self.eta_min + (base_lr - self.eta_min) * cosine_decay for base_lr in self.base_lrs]

In [None]:
def train_and_evaluate_improved_bnn(X, y, hidden_dims=[32, 64, 64, 32],
                                  dropout_probs=[0.1, 0.2, 0.2, 0.1], 
                                  activation='gelu', use_batch_norm=True, use_residual=True,
                                  epochs=300, batch_size=32, lr=0.001, weight_decay=1e-4,
                                  warmup_epochs=30, class_weights=None):
    """
    Train and evaluate an Improved Bayesian Neural Network
    """
    # Ensure y is binary and has the right format
    y = y.astype(int)
    
    # Split data - stratify to handle class imbalance
    X_train, X__val_test, y_train, y_val_test = train_test_split(X, y, test_size=0.3, random_state=42, stratify=y)
    X_val, X_test, y_val, y_test = train_test_split(X__val_test, y_val_test, test_size=0.5, random_state=42, stratify=y_val_test)

    # Standardize features
    scaler = StandardScaler()
    X_train = scaler.fit_transform(X_train)
    X_val = scaler.transform(X_val)
    X_test = scaler.transform(X_test)

    # Convert to PyTorch tensors
    X_train_tensor = torch.FloatTensor(X_train)
    y_train_tensor = torch.FloatTensor(y_train).reshape(-1, 1)
    X_val_tensor = torch.FloatTensor(X_val)
    y_val_tensor = torch.FloatTensor(y_val).reshape(-1, 1)
    X_test_tensor = torch.FloatTensor(X_test)
    y_test_tensor = torch.FloatTensor(y_test).reshape(-1, 1)

    # Create data loaders
    train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
    val_dataset = TensorDataset(X_val_tensor, y_val_tensor)
    val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)
    test_dataset = TensorDataset(X_test_tensor, y_test_tensor)
    
    # Initialize model
    input_dim = X_train.shape[1]
    bnn_model = BayesianNN(input_dim, hidden_dims, dropout_probs, 
                                  activation, use_batch_norm, use_residual)

    # Define loss function with class weights if provided
    if class_weights is not None:
        # Calculate class weights if not provided
        if class_weights == 'auto':
            counts = np.bincount(y_train.astype(int))
            class_weights = torch.FloatTensor([1.0, counts[0] / counts[1]])  # Inverse frequency
        else:
            class_weights = torch.FloatTensor(class_weights)
        criterion = nn.BCELoss(weight=None)  # Will apply weights manually
    else:
        criterion = nn.BCELoss()

    # Define optimizer with weight decay (L2 regularization)
    optimizer = optim.AdamW(bnn_model.parameters(), lr=lr, weight_decay=weight_decay)
    
    # Learning rate scheduler
    scheduler = CosineWarmupScheduler(optimizer, warmup_epochs, epochs, eta_min=lr/10)

    # Training loop
    train_losses = []
    val_losses = []
    min_val_loss = float('inf')
    patience = 0.3 * epochs  # Early stopping patience
    patience_counter = 0
    best_model = None
    
    for epoch in range(epochs):
        bnn_model.train()
        epoch_loss = 0
        for batch_X, batch_y in train_loader:
            optimizer.zero_grad()
            outputs = bnn_model(batch_X)
            
            # Apply class weights if needed
            if class_weights is not None:
                batch_weights = torch.ones_like(batch_y)
                for i in range(len(class_weights)):
                    batch_weights[batch_y == i] = class_weights[i]
                loss = F.binary_cross_entropy(outputs, batch_y, weight=batch_weights)
            else:
                loss = criterion(outputs, batch_y)
                
            loss.backward()
            
            # Gradient clipping to prevent exploding gradients
            torch.nn.utils.clip_grad_norm_(bnn_model.parameters(), max_norm=1.0)
            
            optimizer.step()
            epoch_loss += loss.item()
        
        # Update learning rate
        scheduler.step()
        
        # Track losses
        train_losses.append(epoch_loss / len(train_loader))

        # Validation step
        bnn_model.eval()
        with torch.no_grad():
            val_loss = 0
            for batch_X, batch_y in val_loader:
                outputs = bnn_model(batch_X)
                loss = criterion(outputs, batch_y)
                val_loss += loss.item()
            val_losses.append(val_loss / len(val_loader))
        
        # Early stopping
        if val_loss < min_val_loss:
            min_val_loss = val_loss
            # Save the best model
            best_model = bnn_model.state_dict().copy()
            patience_counter = 0
        else:
            patience_counter += 1
            if patience_counter >= patience:
                print(f"Early stopping at epoch {epoch+1}")
                break

        # Print progress every 50 epochs
        if (epoch + 1) % 50 == 0:
            print(f'Epoch {epoch+1}/{epochs}, Loss: {train_losses[-1]:.4f}, Val Loss: {val_losses[-1]:.4f}, LR: {optimizer.param_groups[0]["lr"]:.6f}')

        # Restore the best model
        if best_model is not None:
            bnn_model.load_state_dict(best_model)

    # Plot training loss
    plt.figure(figsize=(12, 5))
    plt.subplot(1, 2, 1)
    plt.plot(train_losses, label='Training loss')
    plt.plot(val_losses, label='Validation loss')
    plt.title('Training and Validation Loss')
    plt.xlabel('Epoch')
    plt.ylabel('Loss')
    plt.legend()
    plt.grid(True)

    # Plot learning rate
    lr_history = []
    dummy_scheduler = CosineWarmupScheduler(optimizer, warmup_epochs, epochs, eta_min=lr/10)
    for i in range(epochs):
        lr_history.append(dummy_scheduler.get_lr()[0])
        dummy_scheduler.step()
        
    plt.subplot(1, 2, 2)
    plt.plot(lr_history)
    plt.title('Learning Rate Schedule')
    plt.xlabel('Epoch')
    plt.ylabel('Learning Rate')
    plt.grid(True)
    plt.tight_layout()
    plt.show()

    # Make predictions with uncertainty
    bnn_model.eval()  # This won't disable dropout for uncertainty estimation
    mean_pred, std_pred, all_pred_samples = bnn_model.predict_with_uncertainty(X_test_tensor)

    # Convert to numpy
    mean_pred_np = mean_pred.numpy()
    std_pred_np = std_pred.numpy()
    all_samples_np = all_pred_samples.numpy()

    # Calculate optimal threshold for F1 score (instead of default 0.5)
    thresholds = np.linspace(0.1, 0.9, 100)
    f1_scores = []
    for threshold in thresholds:
        y_pred = (mean_pred_np > threshold).astype(int).flatten()
        f1 = f1_score(y_test.flatten(), y_pred)
        f1_scores.append(f1)
    
    optimal_threshold = thresholds[np.argmax(f1_scores)]
    print(f"Optimal threshold for F1 score: {optimal_threshold:.3f}")
    
    # Convert probabilities to class labels with optimal threshold
    y_pred_class = (mean_pred_np > optimal_threshold).astype(int).flatten()
    
    # Ensure y_test is flattened as well for proper comparison
    y_test_flat = y_test.flatten()

    # Calculate metrics
    accuracy = accuracy_score(y_test_flat, y_pred_class)
    prec = precision_score(y_test_flat, y_pred_class)
    rec = recall_score(y_test_flat, y_pred_class)
    f1 = f1_score(y_test_flat, y_pred_class)
    conf_matrix = confusion_matrix(y_test_flat, y_pred_class)
    
    print(f"Test accuracy: {accuracy:.4f}")
    print(f"Precision: {prec:.4f}")
    print(f"Recall: {rec:.4f}")
    print(f"F1 Score: {f1:.4f}")
    print(f"Confusion Matrix: \n{conf_matrix}")

    # ROC curve
    fpr, tpr, _ = roc_curve(y_test_flat, mean_pred_np.flatten())
    roc_auc = auc(fpr, tpr)

    plt.figure(figsize=(12, 10))
    
    # ROC curve
    plt.subplot(2, 2, 1)
    plt.plot(fpr, tpr, label=f'AUC = {roc_auc:.3f}')
    plt.plot([0, 1], [0, 1], 'k--')
    plt.xlabel('False Positive Rate')
    plt.ylabel('True Positive Rate')
    plt.title('ROC Curve')
    plt.legend()
    plt.grid(True, alpha=0.3)
    
    # F1 Score vs Threshold
    plt.subplot(2, 2, 2)
    plt.plot(thresholds, f1_scores)
    plt.axvline(x=optimal_threshold, color='r', linestyle='--', label=f'Optimal = {optimal_threshold:.2f}')
    plt.xlabel('Threshold')
    plt.ylabel('F1 Score')
    plt.title('F1 Score vs Threshold')
    plt.legend()
    plt.grid(True, alpha=0.3)
    
    # Uncertainty vs. error correlation
    errors = np.abs(y_test_flat - mean_pred_np.flatten())
    plt.subplot(2, 2, 3)
    plt.scatter(std_pred_np.flatten(), errors, alpha=0.5)
    plt.xlabel('Predictive Uncertainty (std)')
    plt.ylabel('Absolute Error')
    plt.title('Uncertainty vs Error Correlation')
    plt.grid(True, alpha=0.3)

    # Add trend line
    slope, intercept, r_value, p_value, std_err = stats.linregress(std_pred_np.flatten(), errors)
    x = np.linspace(min(std_pred_np.flatten()), max(std_pred_np.flatten()), 100)
    y = slope * x + intercept
    plt.plot(x, y, 'r-', label=f'R² = {r_value**2:.3f}')
    plt.legend()
    
    plt.tight_layout()
    plt.show()
    
    # Calibration plot
    plt.figure(figsize=(8, 6))
    # Group predictions into bins
    n_bins = 10
    bin_edges = np.linspace(0, 1, n_bins + 1)
    bin_indices = np.digitize(mean_pred_np.flatten(), bin_edges) - 1
    bin_indices = np.clip(bin_indices, 0, n_bins - 1)
    
    # Calculate mean predicted probability and actual frequency for each bin
    bin_probs = np.zeros(n_bins)
    bin_freqs = np.zeros(n_bins)
    bin_counts = np.zeros(n_bins)
    
    for i in range(len(y_test_flat)):
        bin_probs[bin_indices[i]] += mean_pred_np.flatten()[i]
        bin_freqs[bin_indices[i]] += y_test_flat[i]
        bin_counts[bin_indices[i]] += 1
    
    # Avoid division by zero
    valid_bins = bin_counts > 0
    bin_probs[valid_bins] /= bin_counts[valid_bins]
    bin_freqs[valid_bins] /= bin_counts[valid_bins]
    
    # Plot calibration curve
    bin_centers = (bin_edges[:-1] + bin_edges[1:]) / 2
    plt.plot(bin_centers[valid_bins], bin_freqs[valid_bins], 'o-', label='Model')
    plt.plot([0, 1], [0, 1], 'k--', label='Perfectly calibrated')
    plt.xlabel('Mean predicted probability')
    plt.ylabel('Actual frequency')
    plt.title('Calibration Plot')
    plt.legend()
    plt.grid(True, alpha=0.3)
    plt.show()
    
    return bnn_model, scaler, optimal_threshold

In [None]:
X = data_df.drop(y_col, axis=1).values
y = data_df[y_col].values

model, scaler, threshold = train_and_evaluate_improved_bnn(
    X, y,
    hidden_dims=[32, 128, 256, 128, 32],  # Deeper network with varying widths
    dropout_probs=[0.1, 0.2, 0.2, 0.2, 0.1],  # Varying dropout rates
    activation='gelu',  # Modern activation function
    use_batch_norm=True,  # Use batch normalization
    use_residual=True,  # Use residual connections
    epochs=1000,
    batch_size=32,
    lr=0.001,
    weight_decay=1e-4,  # L2 regularization
    warmup_epochs=100,
    class_weights='auto'  # Automatically handle class imbalance
)

print("BNN training and evaluation complete.")