### General Concept

Given a dataset with several features related to credit score, create a model that predicts the credit score according to the features

### 1. Process Data

In [288]:
import torch
from torch import nn
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

In [289]:
# set which device to use first
device = "cuda" if torch.cuda.is_available() else "cpu"
device

'cuda'

In [290]:
# put data into dataframe
df = pd.read_csv("creditscores.csv")

df.head()

Unnamed: 0,Age,Gender,Income,Education,Marital Status,Number of Children,Home Ownership,Credit Score
0,25,Female,50000,Bachelor's Degree,Single,0,Rented,High
1,30,Male,100000,Master's Degree,Married,2,Owned,High
2,35,Female,75000,Doctorate,Married,1,Owned,High
3,40,Male,125000,High School Diploma,Single,0,Owned,High
4,45,Female,100000,Bachelor's Degree,Married,3,Owned,High


In [291]:
# create label encoder to make strings into categorized numericals
le = LabelEncoder()

# list of columns needed to be encoded
le_cols = ["Gender", "Education", "Marital Status", "Home Ownership", "Credit Score"]

# encode every listed column
for column in df.columns:
    if column in le_cols:
        le.fit(df[column])
        df[column] = le.transform(df[column])


df.head(30)

Unnamed: 0,Age,Gender,Income,Education,Marital Status,Number of Children,Home Ownership,Credit Score
0,25,0,50000,1,1,0,1,1
1,30,1,100000,4,0,2,0,1
2,35,0,75000,2,0,1,0,1
3,40,1,125000,3,1,0,0,1
4,45,0,100000,1,0,3,0,1
5,50,1,150000,4,0,0,0,1
6,26,0,40000,0,1,0,1,0
7,31,1,60000,1,1,0,1,0
8,36,0,80000,4,0,2,0,1
9,41,1,105000,2,1,0,0,1


In [292]:
# split data
X_train, X_test, y_train, y_test = train_test_split(df.drop(["Credit Score"], axis=1), 
                                                    df["Credit Score"], 
                                                    test_size=0.2, 
                                                    random_state=53)

# normalize features
scaler = StandardScaler()
scaler.fit(X_train)
X_train = scaler.transform(X_train)
X_test = scaler.transform(X_test)

# convert data to pytorch tensors and to device
X_train = torch.tensor(X_train).to(device).type(torch.float32)
X_test = torch.tensor(X_test).to(device).type(torch.float32)
y_train = torch.from_numpy(y_train.values).to(device).type(torch.float32)
y_test = torch.from_numpy(y_test.values).to(device).type(torch.float32)


### 2. Create Model

In [293]:
class CreditClassifier(nn.Module):
    def __init__(self):
        super().__init__()

        self.layer_stack = nn.Sequential(
            nn.Linear(7, 12),
            nn.ReLU(),
            nn.Linear(12, 3)
        )

    def forward(self, x):
        return self.layer_stack(x)
    
model = CreditClassifier().to(device)
model

CreditClassifier(
  (layer_stack): Sequential(
    (0): Linear(in_features=7, out_features=12, bias=True)
    (1): ReLU()
    (2): Linear(in_features=12, out_features=3, bias=True)
  )
)

In [294]:
# define loss and optimizer
loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(params=model.parameters(), lr=0.01)

In [295]:
y_logits = model(X_test)
y_probs = torch.softmax(y_logits, dim=1)
y_probs[:5]

tensor([[0.2379, 0.3055, 0.4566],
        [0.2403, 0.3136, 0.4461],
        [0.2717, 0.3513, 0.3770],
        [0.2525, 0.3944, 0.3531],
        [0.2459, 0.3692, 0.3849]], device='cuda:0', grad_fn=<SliceBackward0>)

In [296]:
def accuracy_fn(y_true, y_pred):
    correct = torch.eq(y_true, y_pred).sum().item() # torch.eq() calculates where two tensors are equal
    acc = (correct / len(y_pred)) * 100 
    return acc

### 3. Train Model

In [297]:
# set seed
torch.manual_seed(69)

# no of epochs
epochs = 1000

# train test loop
for epoch in range(epochs):

    # TRAINING
    model.train()

    # forward pass
    y_logits = model(X_train)
    y_pred = torch.softmax(y_logits, dim=1).argmax(dim=1)

    y_train = y_train.type(torch.LongTensor).to(device)

    # metrics
    loss = loss_fn(y_logits, y_train)
    acc = accuracy_fn(y_train, y_pred)

    # opt zero grad
    optimizer.zero_grad()

    # loss backwards
    loss.backward()

    # opt step
    optimizer.step()

    # TESTING
    model.eval()

    with torch.inference_mode():

        # forward pass
        test_logits = model(X_test)
        test_pred = torch.softmax(test_logits, dim=1).argmax(dim=1)

        # test metrics
        y_test = y_test.type(torch.LongTensor).to(device)
        test_loss = loss_fn(test_logits, y_test)
        test_acc = accuracy_fn(y_test, test_pred)

    # print results
    if epoch % 100 == 0:
        print(f"Epoch: {epoch} | Train Loss: {loss:.4f} | Train Accuracy: {acc:.2f}% | Test Loss: {test_loss:.4f} | Test Accuracy: {test_acc:.2f}%")

Epoch: 0 | Train Loss: 1.1374 | Train Accuracy: 39.69% | Test Loss: 1.0219 | Test Accuracy: 60.61%
Epoch: 100 | Train Loss: 0.1012 | Train Accuracy: 97.71% | Test Loss: 0.0704 | Test Accuracy: 100.00%
Epoch: 200 | Train Loss: 0.0385 | Train Accuracy: 99.24% | Test Loss: 0.0171 | Test Accuracy: 100.00%
Epoch: 300 | Train Loss: 0.0275 | Train Accuracy: 99.24% | Test Loss: 0.0085 | Test Accuracy: 100.00%
Epoch: 400 | Train Loss: 0.0207 | Train Accuracy: 99.24% | Test Loss: 0.0051 | Test Accuracy: 100.00%
Epoch: 500 | Train Loss: 0.0145 | Train Accuracy: 99.24% | Test Loss: 0.0030 | Test Accuracy: 100.00%
Epoch: 600 | Train Loss: 0.0102 | Train Accuracy: 100.00% | Test Loss: 0.0020 | Test Accuracy: 100.00%
Epoch: 700 | Train Loss: 0.0072 | Train Accuracy: 100.00% | Test Loss: 0.0014 | Test Accuracy: 100.00%
Epoch: 800 | Train Loss: 0.0053 | Train Accuracy: 100.00% | Test Loss: 0.0011 | Test Accuracy: 100.00%
Epoch: 900 | Train Loss: 0.0039 | Train Accuracy: 100.00% | Test Loss: 0.0008 | Te

### 4. Evaluate Model

In [298]:
model.eval()

with torch.inference_mode():
    eval_logits = model(X_test[0:5])

eval_prob = torch.softmax(eval_logits, dim=1)
eval_pred = eval_prob.argmax(dim=1)

eval_pred, y_test[0:5]

(tensor([1, 1, 2, 1, 2], device='cuda:0'),
 tensor([1, 1, 2, 1, 2], device='cuda:0'))