In [38]:
import numpy as np
import pandas as pd

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

import torch
import torch.nn as nn
from torch.utils.data import TensorDataset, DataLoader

from sklearn.impute import SimpleImputer
from sklearn.metrics import accuracy_score, roc_auc_score

In [39]:
df = pd.read_csv('UCI_Credit_Card.csv')

In [40]:
target = 'default.payment.next.month'
X = df.drop(columns=[target, 'ID'])
y = df[target].values.astype(np.float32)

X_train, X_test, y_train, y_test = train_test_split(
    X, y, random_state=42, test_size=0.2, stratify=y
)

scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [41]:
X_train_t = torch.tensor(X_train_scaled, dtype=torch.float32)
X_test_t = torch.tensor(X_test_scaled, dtype=torch.float32)

y_train_t = torch.tensor(y_train,dtype=torch.float32).unsqueeze(1)
y_test_t = torch.tensor(y_test, dtype=torch.float32).unsqueeze(1)

train_ds = TensorDataset(X_train_t, y_train_t)
test_ds = TensorDataset(X_test_t, y_test_t)
print(X_train_t.shape)
train_loader = DataLoader(train_ds, batch_size=64, shuffle=True)
test_loader = DataLoader(test_ds, batch_size=64, shuffle=False)
input_dim = X_train_t.shape[1]

torch.Size([24000, 23])


In [42]:
class CreditNet(nn.Module):
    def __init__(self, input_dim=23):
        super().__init__()
        self.net = nn.Sequential(
            nn.Linear(input_dim, 64),
            nn.ReLU(),
            nn.Dropout(0.35),
            nn.Linear(64, 32),
            nn.ReLU(),
            nn.Dropout(0.35),
            nn.Linear(32, 1)
        )
        
    def forward(self, X):
        return self.net(X)
    
model = CreditNet(input_dim)
criterion = nn.BCEWithLogitsLoss()
optimizer = torch.optim.AdamW(model.parameters(), lr=1e-3, weight_decay=1e-4)


In [43]:
num_epochs = 15
model.train()
for epoch in range(num_epochs):
    running_loss = 0.0
    
    for X_batch, y_batch in train_loader:
        optimizer.zero_grad()
        logits = model(X_batch)
        loss = criterion(logits, y_batch)
        loss.backward()
        optimizer.step()
        
        running_loss += loss.item() * X_batch.size(0)
    
    epoch_loss = running_loss / len(train_loader)
    
    
    model.eval()
    all_probs = []
    all_true = []
    
    with torch.no_grad():
        for X_batch, y_batch in test_loader:
            logits = model(X_batch)
            probs = torch.sigmoid(logits)
            all_probs.append(probs.cpu().numpy())
            all_true.append(y_batch.cpu().numpy())
            
    all_probs = np.vstack(all_probs).ravel()
    all_true = np.vstack(all_true).ravel()
    
    y_pred = (all_probs > 0.5).astype(int)
    
    acc = accuracy_score(all_true, y_pred)
    auc = roc_auc_score(all_true, all_probs)
    
    print(f"Epoch {epoch+1}/{num_epochs} | train loss: {epoch_loss:.4f} | "
          f"test acc: {acc:.4f} | test AUC: {auc:.4f}")
    

Epoch 1/15 | train loss: 32.1436 | test acc: 0.8082 | test AUC: 0.7150
Epoch 2/15 | train loss: 28.5315 | test acc: 0.8162 | test AUC: 0.7452
Epoch 3/15 | train loss: 28.0089 | test acc: 0.8180 | test AUC: 0.7578
Epoch 4/15 | train loss: 27.7445 | test acc: 0.8175 | test AUC: 0.7638
Epoch 5/15 | train loss: 27.5448 | test acc: 0.8195 | test AUC: 0.7677
Epoch 6/15 | train loss: 27.4084 | test acc: 0.8187 | test AUC: 0.7699
Epoch 7/15 | train loss: 27.2947 | test acc: 0.8195 | test AUC: 0.7667
Epoch 8/15 | train loss: 27.2609 | test acc: 0.8182 | test AUC: 0.7690
Epoch 9/15 | train loss: 27.1836 | test acc: 0.8198 | test AUC: 0.7698
Epoch 10/15 | train loss: 27.0990 | test acc: 0.8203 | test AUC: 0.7705
Epoch 11/15 | train loss: 27.0300 | test acc: 0.8190 | test AUC: 0.7727
Epoch 12/15 | train loss: 27.0150 | test acc: 0.8182 | test AUC: 0.7687
Epoch 13/15 | train loss: 26.9967 | test acc: 0.8190 | test AUC: 0.7718
Epoch 14/15 | train loss: 26.9350 | test acc: 0.8190 | test AUC: 0.7681
E