# <p style="background-image: url(https://thumb.ac-illust.com/ef/efba71f003602d81b40951d0040bbab0_t.jpeg);font-family:consolas;font-size:120%;color:#ffffff;text-align:center;border-radius:20px 20px; padding:5px;"> About </p>

This notebook is created for participation in the “Predicting Loan Payback” Playground Prediction Competition on Kaggle. The objective is to build a **PyTorch** model that predicts whether a client will subscribe to a bank term deposit, based on their personal and financial attributes.

# <p style="background-image: url(https://thumb.ac-illust.com/ef/efba71f003602d81b40951d0040bbab0_t.jpeg);font-family:consolas;font-size:120%;color:#ffffff;text-align:center;border-radius:20px 20px; padding:5px"> Imports </p>

In [None]:
import numpy as np
import pandas as pd
import math
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import roc_auc_score
import torch
import torch.nn as nn
from torch.utils.data import TensorDataset
from torch.utils.data import DataLoader
import warnings
warnings.filterwarnings('ignore')

# <p style="background-image: url(https://thumb.ac-illust.com/ef/efba71f003602d81b40951d0040bbab0_t.jpeg);font-family:consolas;font-size:120%;color:#ffffff;text-align:center;border-radius:20px 20px; padding:5px"> Reading Data </p>

In [None]:
train = pd.read_csv("/kaggle/input/playground-series-s5e11/train.csv", index_col="id")
test = pd.read_csv("/kaggle/input/playground-series-s5e11/test.csv", index_col="id")

In [None]:
train.head()

Checking for `null` values in the dataset.

In [None]:
train.isna().sum()

In [None]:
test.isna().sum()

Selecting the `categorical_cols` and the `numerical_cols` from the dataset.

In [None]:
categorical_cols = test.select_dtypes(include=['object']).columns
numerical_cols = test.select_dtypes(include=['int64', 'float64']).columns

print(f"The categorical value columns are: {categorical_cols.values}")
print(f"The numerical value columns are: {numerical_cols.values}")

# <p style="background-image: url(https://thumb.ac-illust.com/ef/efba71f003602d81b40951d0040bbab0_t.jpeg);font-family:consolas;font-size:120%;color:#ffffff;text-align:center;border-radius:20px 20px; padding:5px"> Visualizations </p>

The following plot shows the count of **loan_paid_back**.

In [None]:
y_counts = train['loan_paid_back'].value_counts()
plt.figure(figsize=(4, 4))

cmap = plt.get_cmap('flare')
colors = cmap(np.linspace(0, 1, len(y_counts)))

plt.pie(
    y_counts.values,
    labels=y_counts.index,
    autopct='%1.1f%%',
    colors=colors,
    startangle=90,
    counterclock=False,
    textprops={'color': 'white'}
)
plt.title('Distribution of loan_paid_back')
plt.tight_layout()
plt.show()

Visualize the distribution of each categorical feature in relation to the target variable `loan_paid_back` using count plots.

In [None]:
n_plots = len(categorical_cols)
cols_per_row = math.ceil(n_plots / 2)

plt.figure(figsize=(5 * cols_per_row, 10))

for i, col in enumerate(categorical_cols, 1):
    plt.subplot(3, cols_per_row, i)
    sns.countplot(x=col, hue='loan_paid_back', data=train, palette='mako')
    plt.title(f"{col} vs loan_paid_back count")
    plt.xticks(rotation=90)

plt.tight_layout()
plt.show()

Visualize the distribution of each numerical feature in relation to the target variable `y` using histogram plots.

In [None]:
n_plots = len(numerical_cols)
cols_per_row = math.ceil(n_plots / 2)

plt.figure(figsize=(5 * cols_per_row, 10))

for i, col in enumerate(numerical_cols, 1):
    plt.subplot(3, cols_per_row, i)
    sns.histplot(x=col, hue='loan_paid_back', data=train, fill=True, palette='mako', bins=30)
    plt.title(f"{col} vs loan_paid_back count")

plt.tight_layout()
plt.show()

# <p style="background-image: url(https://thumb.ac-illust.com/ef/efba71f003602d81b40951d0040bbab0_t.jpeg);font-family:consolas;font-size:120%;color:#ffffff;text-align:center;border-radius:20px 20px; padding:5px"> Data Preprocessing </p>

`LabelEncoder` is used to encode the categorical values.

In [None]:
encoder = LabelEncoder()
for i in categorical_cols:
    train[i] = encoder.fit_transform(train[i])
    test[i] = encoder.transform(test[i])

`MinMaxScaler` is used to scale the values before feeding into the neural network.

In [None]:
scaler = MinMaxScaler()
train[numerical_cols] = scaler.fit_transform(train[numerical_cols])
test[numerical_cols] = scaler.transform(test[numerical_cols])

The dataset is partitioned into feature matrix **X** and target vector **y** to prepare it for model training.

In [None]:
X = train.drop('loan_paid_back', axis=1)
y = train['loan_paid_back']

Using `train_test_split` to split the **X** and **y** for training the model.

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=21, stratify=y)

`CUDA` is set up for faster training.

In [None]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f'Using device: {device}')

Converting from `pandas.core.frame.DataFrame` to `torch.Tensor`.

In [None]:
X_train_tensor = torch.tensor(X_train.values.astype(np.float32)).to(device)
y_train_tensor = torch.tensor(y_train.values.astype(np.float32)).unsqueeze(1).to(device)

X_val_tensor = torch.tensor(X_test.values.astype(np.float32)).to(device)
y_val_tensor = torch.tensor(y_test.values.astype(np.float32)).unsqueeze(1).to(device)

X_test_tensor = torch.tensor(test.values.astype(np.float32)).to(device)

Setting up the `DataLoaders` from the tensors.

In [None]:
train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
val_dataset = TensorDataset(X_val_tensor, y_val_tensor)

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False)

# <p style="background-image: url(https://thumb.ac-illust.com/ef/efba71f003602d81b40951d0040bbab0_t.jpeg);font-family:consolas;font-size:120%;color:#ffffff;text-align:center;border-radius:20px 20px; padding:5px"> Model Training </p>

The architecture is designed to balance expressive power with regularization, ensuring strong generalization on tabular data.

#### Layer Breakdown
- **Input Layer**: Accepts `input_dim` features.
- **Hidden Layer 1**: 
  - `Linear(input_dim → 256)`
  - `BatchNorm1d(256)`
  - `LeakyReLU`
  - `Dropout(0.3)`
- **Hidden Layer 2**:
  - `Linear(256 → 128)`
  - `BatchNorm1d(128)`
  - `ELU`
  - `Dropout(0.3)`
- **Hidden Layer 3**:
  - `Linear(64 → 32)`
  - `BatchNorm1d(32)`
  - `ReLU`
  - `Dropout(0.2)`
- **Hidden Layer 4**:
  - `Linear(64 → 32)`
  - `BatchNorm1d(32)`
  - `ReLU`
- **Output Layer**:
  - `Linear(32 → 1)` → Outputs raw logits for binary classification

#### Design Rationale
- **Batch Normalization** stabilizes training and accelerates convergence.
- **LeakyReLU** mitigates dying neuron issues in early layers.
- **Dropout** adds regularization to prevent overfitting.
- **Final ReLU** introduces non-linearity before the output layer.

This architecture is optimized for tabular datasets and integrates well with `BCEWithLogitsLoss`, which expects raw logits as input.

In [None]:
class BinaryClassifier(nn.Module):
    def __init__(self, input_dim):
        super(BinaryClassifier, self).__init__()
        self.net = nn.Sequential(
            nn.Linear(input_dim, 256),
            nn.BatchNorm1d(256),
            nn.LeakyReLU(),
            nn.Dropout(0.3),
            nn.Linear(256, 128),
            nn.BatchNorm1d(128),
            nn.ELU(),
            nn.Dropout(0.3),
            nn.Linear(128, 64),
            nn.BatchNorm1d(64),
            nn.ReLU(),
            nn.Dropout(0.2),
            nn.Linear(64, 32),
            nn.ReLU(),
            nn.Linear(32, 1)
        )
        
    def forward(self, x):
        return self.net(x)

The model is trained with a weighted loss function to handle class imbalance, and a learning rate scheduler is used to adaptively reduce the learning rate based on validation performance.

**Key Components:**
- `BinaryClassifier`: A 4-layer feedforward neural network with BatchNorm, LeakyReLU, and Dropout.
- `BCEWithLogitsLoss`: Weighted using `pos_weight` to address class imbalance.
- `Adam Optimizer`: Initialized with a learning rate of `0.005`.
- `ReduceLROnPlateau`: Monitors ROC AUC and reduces LR by half if no improvement for 5 epochs.

This setup ensures robust training while preventing overfitting and stagnation in learning.

In [None]:
model = BinaryClassifier(input_dim=X_train.shape[1]).to(device)
num_neg = (y_train == 0).sum()
num_pos = (y_train == 1).sum()
pos_weight_value = num_neg / num_pos
pos_weight = torch.tensor([pos_weight_value]).to(device)
criterion = nn.BCEWithLogitsLoss(pos_weight=pos_weight)
optimizer = torch.optim.Adam(model.parameters(), lr=0.005)
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(
    optimizer, mode='max', factor=0.5, patience=5, verbose=True
)

Train the `BinaryClassifier` using a standard supervised learning loop with performance tracking and early stopping. The model is evaluated every epoch on both training and validation sets, with metrics logged for analysis.

#### Epoch-wise Training
- **Loss Function**: `BCEWithLogitsLoss` with `pos_weight` to handle class imbalance.
- **Optimizer**: Adam with initial LR = 0.005.
- **Scheduler**: `ReduceLROnPlateau` monitors ROC AUC and reduces LR if no improvement for 5 epochs.

#### Metrics Tracked
- `Train/Val Loss`
- `Train/Val Accuracy`
- `Validation ROC AUC`

#### Early Stopping
- Monitors best ROC AUC.
- Stops training if no improvement for **20 consecutive epochs**.

#### Highlights
- Predictions are thresholded at 0.5 after applying `sigmoid`.
- ROC AUC is computed using raw probabilities for better discrimination.
- Model weights are saved when ROC AUC improves.

This setup ensures efficient training while guarding against overfitting and learning stagnation.

In [None]:
best_auc = 0.0
patience = 20
counter = 0
num_epochs = 200
train_losses = []
val_losses = []
train_accuracies = []
val_accuracies = []
roc_aucs = []

for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0
    correct = 0
    total = 0
    for batch_X, batch_y in train_loader:
        batch_X = batch_X.to(device)
        batch_y = batch_y.to(device)
        optimizer.zero_grad()
        logits = model(batch_X)
        loss = criterion(logits, batch_y)
        loss.backward()
        optimizer.step()
        running_loss += loss.item() * batch_X.size(0)
        preds = (torch.sigmoid(logits) > 0.5).float()
        correct += (preds == batch_y).sum().item()
        total += batch_y.size(0)
    epoch_train_loss = running_loss / total
    epoch_train_accuracy = correct / total
    train_losses.append(epoch_train_loss)
    train_accuracies.append(epoch_train_accuracy)

    model.eval()
    val_running_loss = 0.0
    val_correct = 0
    val_total = 0
    all_preds = []
    all_labels = []
    with torch.no_grad():
        for batch_X, batch_y in val_loader:
            batch_X = batch_X.to(device)
            batch_y = batch_y.to(device)
            logits = model(batch_X)
            loss = criterion(logits, batch_y)
            val_running_loss += loss.item() * batch_X.size(0)
            probs = torch.sigmoid(logits)
            preds = (probs > 0.5).float()
            all_preds.extend(probs.squeeze().cpu().numpy())
            all_labels.extend(batch_y.squeeze().cpu().numpy())

            val_correct += (preds == batch_y).sum().item()
            val_total += batch_y.size(0)

    epoch_val_loss = val_running_loss / val_total
    epoch_val_accuracy = val_correct / val_total
    val_losses.append(epoch_val_loss)
    val_accuracies.append(epoch_val_accuracy)
    
    roc_auc = roc_auc_score(all_labels, all_preds)
    roc_aucs.append(roc_auc)
    scheduler.step(roc_auc)
    if ((epoch+1)%5 == 0):
        print(f"Epoch {epoch+1}: Train_Loss={epoch_train_loss:.4f}, Val_Loss={epoch_val_loss:.4f}, "
          f"Train_Acc={epoch_train_accuracy:.4f}, Val_Acc={epoch_val_accuracy:.4f}, ROC_AUC={roc_auc:.4f}")
        for param_group in optimizer.param_groups:
            print(f"Current LR: {param_group['lr']}")

    if roc_auc > best_auc:
        best_auc = roc_auc
        best_model_state = model.state_dict()
        counter = 0
    else:
        counter += 1
        if counter >= patience:
            print("Early stopping.")
            break

#### Training Metrics Visualization

To monitor model performance across epochs, we plot the following metrics:

#### What We Track
- **Loss**: Measures prediction error for both training and validation sets.
- **Accuracy**: Percentage of correct predictions.
- **ROC AUC**: Evaluates classification quality, especially for imbalanced data.

#### Visualization Details
- Color palette: `Paired` from Matplotlib for clear contrast.
- Layout: 3 side-by-side subplots for easy comparison.
- Epochs: X-axis spans from 1 to the final training epoch.

#### Insights
- **Loss Trends**: Helps detect overfitting or underfitting.
- **Accuracy Curves**: Useful for gauging generalization.
- **ROC AUC Curve**: Key metric for binary classification robustness.

This visual summary provides a quick diagnostic of training dynamics and helps guide model tuning decisions.


In [None]:
epochs = range(1, len(train_losses) + 1)

plt.figure(figsize=(15, 4))
cmap = plt.get_cmap('Paired')

plt.subplot(1, 3, 1)
plt.plot(epochs, train_losses, label='Train Loss', color=cmap(0))
plt.plot(epochs, val_losses, label='Val Loss', color=cmap(1))
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.title('Loss per Epoch')
plt.legend()

plt.subplot(1, 3, 2)
plt.plot(epochs, train_accuracies, label='Train Acc', color=cmap(2))
plt.plot(epochs, val_accuracies, label='Val Acc', color=cmap(3))
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.title('Accuracy per Epoch')
plt.legend()

plt.subplot(1, 3, 3)
plt.plot(epochs, roc_aucs, label='Val ROC AUC', color=cmap(4))
plt.xlabel('Epoch')
plt.ylabel('ROC AUC')
plt.title('ROC AUC per Epoch')
plt.legend()

plt.tight_layout()
plt.show()

# <p style="background-image: url(https://thumb.ac-illust.com/ef/efba71f003602d81b40951d0040bbab0_t.jpeg);font-family:consolas;font-size:120%;color:#ffffff;text-align:center;border-radius:20px 20px; padding:5px"> Inference </p>

Loading the *best* model state.

In [None]:
model.load_state_dict(best_model_state)

#### Validation ROC AUC

- Switched to `eval` mode and disabled gradients.
- Predicted probabilities using `sigmoid`.
- Calculated ROC AUC: **{roc_auc_score:.4f}**

This score reflects how well the model separates the two classes.

In [None]:
model.eval()
with torch.no_grad():
    test_logits = model(X_val_tensor)
    test_probs = torch.sigmoid(test_logits).squeeze().cpu().numpy()
print(f"ROC AUC Score: {roc_auc_score(y_test, test_probs):.4f}")

In [None]:
model.eval()
with torch.no_grad():
    test_logits = model(X_test_tensor)
    test_probs = torch.sigmoid(test_logits).squeeze().cpu().numpy()

# <p style="background-image: url(https://thumb.ac-illust.com/ef/efba71f003602d81b40951d0040bbab0_t.jpeg);font-family:consolas;font-size:120%;color:#ffffff;text-align:center;border-radius:20px 20px; padding:5px"> Submission </p>

In [None]:
sub = pd.read_csv('/kaggle/input/playground-series-s5e11/sample_submission.csv')
submission = pd.DataFrame({
    "id": sub['id'],
    "loan_paid_back": test_probs 
})

submission.to_csv("submission.csv", index=False)