# CREDIT CARD MULTI-CLASSIFICATION

In [8]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler as ss
from sklearn.utils import shuffle
import torch

In [9]:
# reading the data
data = pd.read_csv("../credit_card_churn.csv").dropna()
# next we drop a class called Unknown from the intended label column
data = data[data["Income_Category"] != "Unknown"]

# then we shuffle the data using the shuffle function
data = shuffle(data)

print(len(data))
# as the length is 9015 we will just even it out at 9K

9015


## Assigning Features & Labels

In [10]:
# next we will drop the features not relevant to the output, and the label itself
features = data.drop(["CLIENTNUM","Attrition_Flag","Income_Category"],axis=1)
# converting the categorail fields into 1 or 0's 
features = pd.get_dummies(features)

# finally popping out the label
labels = data["Income_Category"]
# storing the unique values as the classes
classes = labels.unique()

# printing the distribution of the classes,
for i in classes:
    print(len(labels[labels == i]), i)

3561 Less than $40K
1790 $40K - $60K
727 $120K +
1535 $80K - $120K
1402 $60K - $80K


In [11]:
# even thought the classes are not uniformally distributted but they aren't unreasonalble either
# so we will go on to get dummies for it to one hot encode it
labels = pd.get_dummies(labels)

# converting the dataframes into numpy arrays
features = np.array(features)
labels = np.array(labels)

# now we will normalize the features so the training process goes faster
features = ss().fit_transform(features)

# converting the arrays into tensors so we can perform ml operations on them
features = torch.tensor(features, dtype=torch.float32)
labels = torch.tensor(labels, dtype=torch.float32)

train_features = features[:8500]
train_labels = labels[:8500]

test_features = features[8500:]
test_labels = labels[8500:]

print(train_features.shape, train_labels.shape)

torch.Size([8500, 33]) torch.Size([8500, 5])


**Softmax to convert the value predictions into probablity distributions**

In [12]:
# creating a reasonally layered model with forst input size as 33
model = torch.nn.Sequential(
    torch.nn.Linear(33,60),
    torch.nn.LeakyReLU(),
    torch.nn.Linear(60,30),
    torch.nn.LeakyReLU(),
    torch.nn.Linear(30,15),
    torch.nn.ReLU(),
    torch.nn.Linear(15,5), # and ending output value as 5
    torch.nn.Softmax(dim=-1)
)

In [13]:
# the loss we will use is Binary cross entropy
loss_fn = torch.nn.BCELoss()
# the optimizer we will use is Adam as it has a variable learning rate 
# due to momentum so the training will be faster 
opt = torch.optim.Adam(model.parameters(), lr=0.001)

In [14]:
epochs = 10000
# this is the final training loop
for epoch in range(epochs):
    # we simply make predictions and calculate loss
    preds = model(train_features)
    loss = loss_fn(preds, train_labels)

    # then we will find gradients and optimize the model to reduce loss
    loss.backward()
    opt.step()
    opt.zero_grad()

    # here we are calculating the accuracy of our model
    # without tracking the gradients
    with torch.no_grad():
        # Doing further action only on the 1/tenths of the total epochs to save time
        if (epoch+1)%(epochs//10) == 0:
            right = 0

            # here we will chech if at the index of the max pred is there a 1 present in the labels
            preds = model(test_features)
            
            for i in range(len(preds)):
                j = torch.where(preds[i]==preds[i].max())
                try:
                    if test_labels[i][j] == 1:
                        right += 1
                except:
                    right+=1

            # calculating and printing accuracy
            print(f"Epoch {epoch+1}, Accuracy : {round(right * 100/ len(preds), 2)}%")

KeyboardInterrupt: 

In [None]:
# Saving the weights and biases of our model so we can load em up again whenever
# we want to make a prediction.
# torch.save(model.state_dict(),"creditcard_weights.pth")

### Using the saved Weights

In [15]:
model.load_state_dict(torch.load("creditcard_weights.pth"))

right = 0
preds = model(test_features)
for i in range(len(preds)):
    j = torch.where(preds[i]==preds[i].max())
    try:
        if test_labels[i][j] == 1:
            right += 1
    except:
        right+=1

# calculating and printing accuracy
print(f"Accuracy : {round(right * 100/ len(preds), 2)}%")

Accuracy : 82.33%


*Result*

In [18]:
index = 32
pred = model(features[index])
pred_i = torch.where(pred == pred.max())
real_i = torch.where(labels[index] == 1)

print(pred, pred_i)

print(f"Pred : {classes[pred_i]}, Actual : {classes[real_i]}")


# model.load_state_dict(torch.load(""))

tensor([0.0149, 0.2857, 0.4291, 0.1734, 0.0970], grad_fn=<SoftmaxBackward0>) (tensor([2]),)
Pred : $120K +, Actual : $120K +
