In [313]:
import torch
import pandas as pd
from sklearn.model_selection import train_test_split
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader


device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [314]:
train_ratio = 0.75
validation_ratio = 0.15
test_ratio = 0.10

df = pd.read_csv("loan_approval_dataset_normalized.csv")
# delete the last row
df = df.drop(df.index[-1])
# suffle the data
df = df.sample(frac=1, random_state=12).reset_index(drop=True)

X = df.drop([" loan_status"], axis=1)
Y = df[" loan_status"]

# train is now 75% of the entire data set
x_train, x_test, y_train, y_test = train_test_split(
    X,
    Y,
    test_size=1 - train_ratio,
    random_state=12,
)

# test is now 10% of the initial data set
# validation is now 15% of the initial data set
x_val, x_test, y_val, y_test = train_test_split(
    x_test,
    y_test,
    test_size=test_ratio / (test_ratio + validation_ratio),
    random_state=12,
)

In [315]:
num_rows, num_columns = x_train.shape

# Print the size
print(f"Number of rows: {num_rows}")
print(f"Number of columns: {num_columns}")

Number of rows: 3201
Number of columns: 11


In [316]:
class Net(nn.Module):
    def __init__(self, D_in, H=64, D_out=1):
        super().__init__()
        self.fc1 = nn.Linear(D_in, H)
        self.fc2 = nn.Linear(H, D_out)
        self.relu = nn.ReLU()
        self.sigmoid = nn.Sigmoid()
        
    def forward(self, x):
        x = self.fc1(x)
        x = self.relu(x)
        x = self.fc2(x)
        x = self.sigmoid(x)
        
        return x.squeeze()

In [317]:
class CustomDataset(Dataset):
    def __init__(self, features, labels):
        self.features = torch.tensor(features, dtype=torch.float32)
        self.labels = torch.tensor(labels, dtype=torch.float32)

    def __len__(self):
        return len(self.features)

    def __getitem__(self, idx):
        x = self.features[idx]
        y = self.labels[idx]
        return x, y

In [318]:
model = Net(D_in=x_train.shape[1]).to(device)

# Define your loss function and optimizer
criterion = nn.BCELoss()  # Binary Cross-Entropy Loss for binary classification
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Define batch size and create data loaders
batch_size = 64
train_dataset = CustomDataset(x_train.values, y_train.values)
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)

val_dataset = CustomDataset(x_val.values, y_val.values)
val_loader = DataLoader(val_dataset, batch_size=batch_size)

# Training loop
num_epochs = 1000

for epoch in range(num_epochs):
    model.train()
    total_loss = 0.0
    i=0
    for inputs, labels in train_loader:
        i+=1
        inputs, labels = inputs.to(device), labels.to(device)
    
        optimizer.zero_grad()
        outputs = model(inputs)
     
        loss = criterion(outputs.squeeze(), labels.squeeze())  
        loss.backward()
        optimizer.step()

        total_loss += loss.item()

    # Print training loss for this epoch
    if (epoch+1) % 100 == 0:
        print(f"Epoch {epoch + 1}/{num_epochs}, Training Loss: {total_loss / len(train_loader)}")

    # Validation
    model.eval()
    val_loss = 0.0
    with torch.no_grad():
        for inputs, labels in val_loader:
            inputs, labels = inputs.to(device), labels.to(device)
            outputs = model(inputs)
            loss = criterion(outputs, labels.float())
            val_loss += loss.item()

    # Print validation loss for this epoch
    if (epoch+1) % 100 == 0:
        print(f"Epoch {epoch + 1}/{num_epochs}, Validation Loss: {val_loss / len(val_loader)}")
        print('----------------------------------------------------------------------------------')


Epoch 100/1000, Training Loss: 0.1750908616848071
Epoch 100/1000, Validation Loss: 0.18533098921179772
----------------------------------------------------------------------------------
Epoch 200/1000, Training Loss: 0.13162506182447495
Epoch 200/1000, Validation Loss: 0.15247788652777672
----------------------------------------------------------------------------------
Epoch 300/1000, Training Loss: 0.11690299147192727
Epoch 300/1000, Validation Loss: 0.14303407333791257
----------------------------------------------------------------------------------
Epoch 400/1000, Training Loss: 0.1063396961955518
Epoch 400/1000, Validation Loss: 0.26786382794380187
----------------------------------------------------------------------------------
Epoch 500/1000, Training Loss: 0.10234918854400224
Epoch 500/1000, Validation Loss: 0.2667809769511223
----------------------------------------------------------------------------------
Epoch 600/1000, Training Loss: 0.09771211749837533
Epoch 600/1000, V

In [319]:
model.eval()
test_dataset = CustomDataset(x_test.values, y_test.values)
test_loader = DataLoader(test_dataset, batch_size=batch_size)
test_loss = 0.0
correct_predictions = 0
total_samples = 0

with torch.no_grad():
    for inputs, labels in test_loader:
        inputs, labels = inputs.to(device), labels.to(device)
        outputs = model(inputs)
        loss = criterion(outputs.squeeze(), labels.squeeze())
        test_loss += loss.item()

        # Calculate accuracy
        predicted_classes = (outputs >= 0.5).int()  # Convert probabilities to binary predictions
        correct_predictions += (predicted_classes == labels).sum().item()
        total_samples += len(labels)

# Print test loss and accuracy
print(f"Test Loss: {test_loss / len(test_loader)}")
print(f"Test Accuracy: {correct_predictions / total_samples * 100:.2f}%")

Test Loss: 0.11444443836808205
Test Accuracy: 96.49%
