### 1) Import Necessary Libraries

In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
import torch.utils.data as data
from torch.utils.tensorboard import SummaryWriter
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder

### Loading and Preparing Dataset

In [2]:
# import dataset
loan_data = pd.read_csv('preprocessed_loans50k.csv')

# Convert 'status' to numerical labels
status_mapping = {
    'Safe': 0,
    'Risky': 1
}

loan_data['status'] = loan_data['status'].map(status_mapping)

# Separate features and labels
X = loan_data.drop('status', axis=1)
y = loan_data['status']

# Use LabelEncoder to encode 'Safe' as 0 and 'Risky' as 1
label_encoder = LabelEncoder()
y = label_encoder.fit_transform(y)

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Convert data to PyTorch tensors
X_train = torch.tensor(X_train.values, dtype=torch.float32)
X_test = torch.tensor(X_test.values, dtype=torch.float32)
y_train = torch.tensor(y_train, dtype=torch.long)
y_test = torch.tensor(y_test, dtype=torch.long)

# Create DataLoader for training and testing data
train_dataset = data.TensorDataset(X_train, y_train)
test_dataset = data.TensorDataset(X_test, y_test)

train_loader = data.DataLoader(train_dataset, batch_size=64, shuffle=True)
test_loader = data.DataLoader(test_dataset, batch_size=64, shuffle=False)


### 3) Defining Binary Classification Model

class LoanBinaryModel(nn.Module):
    def __init__(self, input_dim, hidden_dim):
        super(LoanBinaryModel, self).__init__()
        self.fc1 = nn.Linear(input_dim, hidden_dim)
        self.relu = nn.ReLU()
        self.fc2 = nn.Linear(hidden_dim, 1)
        self.sigmoid = nn.Sigmoid()

    def forward(self, x):
        out = self.fc1(x)
        out = self.relu(out)
        out = self.fc2(out)
        out = self.sigmoid(out)
        return out

In [3]:
class LoanBinaryModel(nn.Module):
    def __init__(self, input_dim, hidden_dim):
        super(LoanBinaryModel, self).__init__()
        self.fc1 = nn.Linear(input_dim, hidden_dim)
        self.relu = nn.ReLU()
        self.fc2 = nn.Linear(hidden_dim, hidden_dim)
        self.fc3 = nn.Linear(hidden_dim, hidden_dim)
        self.fc4 = nn.Linear(hidden_dim, 1)
        self.sigmoid = nn.Sigmoid()

    def forward(self, x):
        out = self.fc1(x)
        out = self.relu(out)
        out = self.fc2(out)
        out = self.relu(out)
        out = self.fc3(out)
        out = self.relu(out)
        out = self.fc4(out)
        out = self.sigmoid(out)
        return out


### 4) Training Binary Classification Model

In [4]:
def train_binary_model(model, train_loader, test_loader, num_epochs, learning_rate, target_accuracy, device):
    model.to(device)
    criterion = nn.BCELoss()
    optimizer = optim.Adadelta(model.parameters(), lr=learning_rate)
    writer = SummaryWriter()

    total_steps = len(train_loader)
    for epoch in range(num_epochs):
        model.train()
        total_loss = 0
        for i, (inputs, labels) in enumerate(train_loader):
            inputs, labels = inputs.to(device), labels.to(device).view(-1, 1)

            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels.float())
            loss.backward()
            optimizer.step()

            total_loss += loss.item()

        average_loss = total_loss / len(train_loader)
        writer.add_scalar('Loss/train', average_loss, epoch + 1)

        model.eval()
        correct = 0
        total = 0
        with torch.no_grad():
            for inputs, labels in test_loader:
                inputs, labels = inputs.to(device), labels.to(device).view(-1, 1)
                outputs = model(inputs)
                predicted = (outputs >= 0.5).float()
                total += labels.size(0)
                correct += (predicted == labels).sum().item()

        accuracy = 100 * correct / total
        writer.add_scalar('Accuracy/test', accuracy, epoch + 1)

        print(f'Epoch [{epoch + 1}/{num_epochs}], Loss: {average_loss:.4f}, Accuracy: {accuracy:.2f}%')

        if accuracy >= target_accuracy:
            print(f"Reached target accuracy of {target_accuracy:.2f}%.")
            break

    writer.flush()
    writer.close()

# Set the device to GPU if available, otherwise CPU
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Define the model
input_dim = X_train.shape[1]
hidden_dim = 32
model = LoanBinaryModel(input_dim, hidden_dim)

# Train the model with stopping criterion
num_epochs = 100
learning_rate = 0.00001
target_accuracy = 95.0  # Stop training when the accuracy reaches 95%
train_binary_model(model, train_loader, test_loader, num_epochs, learning_rate, target_accuracy, device)

Epoch [1/100], Loss: 16.4306, Accuracy: 83.12%
Epoch [2/100], Loss: 16.4301, Accuracy: 83.12%
Epoch [3/100], Loss: 16.4304, Accuracy: 83.12%
Epoch [4/100], Loss: 16.4297, Accuracy: 83.12%
Epoch [5/100], Loss: 16.4292, Accuracy: 83.12%
Epoch [6/100], Loss: 16.4286, Accuracy: 83.12%
Epoch [7/100], Loss: 16.4292, Accuracy: 83.12%
Epoch [8/100], Loss: 16.4299, Accuracy: 83.12%
Epoch [9/100], Loss: 16.4299, Accuracy: 83.12%
Epoch [10/100], Loss: 16.4306, Accuracy: 83.12%
Epoch [11/100], Loss: 16.4308, Accuracy: 83.12%
Epoch [12/100], Loss: 16.4304, Accuracy: 83.12%
Epoch [13/100], Loss: 16.4308, Accuracy: 83.12%
Epoch [14/100], Loss: 16.4297, Accuracy: 83.12%
Epoch [15/100], Loss: 16.4304, Accuracy: 83.12%
Epoch [16/100], Loss: 16.4301, Accuracy: 83.12%
Epoch [17/100], Loss: 16.4301, Accuracy: 83.12%
Epoch [18/100], Loss: 16.4299, Accuracy: 83.12%
Epoch [19/100], Loss: 16.4301, Accuracy: 83.12%
Epoch [20/100], Loss: 16.4304, Accuracy: 83.12%
Epoch [21/100], Loss: 16.4292, Accuracy: 83.12%
E

KeyboardInterrupt: 

In [None]:
from sklearn.metrics import confusion_matrix, classification_report
import numpy as np

def evaluate_model(model, test_loader, device):
    model.eval()
    y_true = []
    y_pred = []

    with torch.no_grad():
        for inputs, labels in test_loader:
            inputs, labels = inputs.to(device), labels.to(device).view(-1, 1)
            outputs = model(inputs)
            predicted = (outputs >= 0.5).float()  # Convert probabilities to binary predictions
            y_pred.extend(predicted.cpu().numpy())
            y_true.extend(labels.cpu().numpy())

    # Convert lists to numpy arrays
    y_true = np.array(y_true)
    y_pred = np.array(y_pred)

    # Create the confusion matrix
    cm = confusion_matrix(y_true, y_pred)

    print("Confusion Matrix:")
    print(cm)

    # Print classification report with zero_division parameter
    report = classification_report(y_true, y_pred, target_names=['Safe', 'Risky'], zero_division=1)
    print("\nClassification Report:")
    print(report)

# Evaluate the trained model and print the confusion matrix
evaluate_model(model, test_loader, device)
