### General Concept

Given a dataset with several features related to credit score, create a model that predicts the credit score according to the features

### 1. Process Data

In [34]:
import torch
from torch import nn
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split

In [11]:
# set which device to use first
device = "cuda" if torch.cuda.is_available() else "cpu"
device

'cuda'

In [3]:
# put data into dataframe
df = pd.read_csv("creditscores.csv")

df.head()

Unnamed: 0,Age,Gender,Income,Education,Marital Status,Number of Children,Home Ownership,Credit Score
0,25,Female,50000,Bachelor's Degree,Single,0,Rented,High
1,30,Male,100000,Master's Degree,Married,2,Owned,High
2,35,Female,75000,Doctorate,Married,1,Owned,High
3,40,Male,125000,High School Diploma,Single,0,Owned,High
4,45,Female,100000,Bachelor's Degree,Married,3,Owned,High


In [15]:
# create label encoder to make strings into categorized numericals
le = LabelEncoder()

# list of columns needed to be encoded
le_cols = ["Gender", "Education", "Marital Status", "Home Ownership"]

# encode every listed column
for column in df.columns:
    if column in le_cols:
        le.fit(df[column])
        df[column] = le.transform(df[column])


df.head()

Unnamed: 0,Age,Gender,Income,Education,Marital Status,Number of Children,Home Ownership,Credit Score
0,25,0,50000,1,1,0,1,High
1,30,1,100000,4,0,2,0,High
2,35,0,75000,2,0,1,0,High
3,40,1,125000,3,1,0,0,High
4,45,0,100000,1,0,3,0,High


In [40]:
# split data
X_train, X_test, y_train, y_test = train_test_split(df.drop(["Credit Score"], axis=1), 
                                                    df["Credit Score"], 
                                                    test_size=0.2, 
                                                    random_state=69)

# convert data to pytorch tensors and to device
X_train = torch.tensor(X_train.values).to(device)
X_test = torch.tensor(X_test.values).to(device)
y_train = torch.from_numpy(y_train.values).to(device)
y_test = torch.from_numpy(y_test.values).to(device)

TypeError: can't convert np.ndarray of type numpy.object_. The only supported types are: float64, float32, float16, complex64, complex128, int64, int32, int16, int8, uint64, uint32, uint16, uint8, and bool.

### 2. Create Model

In [10]:
class CreditClassifier(nn.Module):
    def __init__(self):
        super().__init__()

        self.layer_stack = nn.Sequential(
            nn.Linear(7, 8),
            nn.ReLU(),
            nn.Linear(8, 4),
            nn.ReLU(),
            nn.Linear(4, 3)
        )

    def forward(self, x):
        return self.layer_stack(x)
    
model = CreditClassifier().to(device)

In [29]:
# define loss and optimizer
loss_fn = nn.MSELoss()
optimizer = torch.optim.Adam(params=model.parameters(), lr=0.01)

### 3. Train Model

In [None]:
# set seed
torch.manual_seed(69)

# no of epochs
epochs = 1000

# train test loop
for epoch in range(epochs):

    # TRAINING
    model.train()

    # forward pass
    