## starting a fresh with neural networks

In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
import numpy as np

## important pytorch modules

In [2]:
import torch
from torch.utils.data import Dataset
from torchvision import datasets
from torchvision.transforms import ToTensor
import matplotlib.pyplot as plt
import os
from torch.utils.data import DataLoader

import torch
from torch import nn
from torch.utils.data import DataLoader
from torchvision import datasets
from torchvision.transforms import ToTensor

In [3]:
data = pd.read_csv('data/clean_data.csv')

In [4]:
data.head()

Unnamed: 0,UNIQUEID,DISBURSED_AMOUNT,ASSET_COST,LTV,BRANCH_ID,MANUFACTURER_ID,EMPLOYMENT_TYPE,DISBURSAL_DATE,STATE_ID,AADHAR_FLAG,...,NEW_ACCTS_IN_LAST_SIX_MONTHS,DELINQUENT_ACCTS_IN_LAST_SIX_MONTHS,AVERAGE_ACCT_AGE,CREDIT_HISTORY_LENGTH,NO_OF_INQUIRIES,LOAN_DEFAULT,APPLICANTS_AGE,DISBURSAL_AGE,DISBURSAL_MONTH,DATES
0,420825,0.439324,0.230606,0.875911,0,0,1,2018-03-08,0,1,...,0,0,0.0,0.0,0,0,0.313725,1.0,3,0.508065
1,417566,0.49028,0.289956,0.877732,0,0,2,2018-01-08,0,1,...,0,0,0.0,0.0,0,0,0.294118,1.0,1,0.032258
2,539055,0.473295,0.268703,0.849499,0,0,2,2018-09-26,0,1,...,0,0,0.0,0.0,1,1,0.45098,1.0,9,0.395161
3,518279,0.513588,0.300784,0.878415,0,0,2,2018-09-19,0,1,...,0,0,1.75,2.0,0,0,0.196078,1.0,9,0.33871
4,529269,0.35951,0.292764,0.576958,0,0,1,2018-09-23,0,1,...,0,0,0.0,0.0,0,0,0.235294,1.0,9,0.370968


In [5]:
del data['DISBURSAL_DATE']

In [6]:
for col in data.columns:
    data[col] = data[col].astype(np.float32)

In [7]:
X = data.drop(['UNIQUEID', 'LOAN_DEFAULT'], axis=1)
data['LOAN_DEFAULT'] = [int(i) for i in data['LOAN_DEFAULT']]
y = data['LOAN_DEFAULT']
x_train, x_test, y_train, y_test = train_test_split(X, y, random_state=7)

In [8]:
x_train.shape

(92804, 30)

creating custom dataset for our dataset

In [9]:
class LoanDefaulDataset(Dataset):
    def __init__(self, X, Y):
        self.x = X
        self.y = Y
        self.len = len(self.x)
        
    def __getitem__(self, index):
        return self.x[index], self.y[index]
    def __len__(self):
        return self.len

In [10]:
train_data = LoanDefaulDataset(x_train.values, y_train.values)
test_data = LoanDefaulDataset(x_test.values, y_test.values)

In [11]:
batch_size = 64
train_loader = DataLoader(dataset=train_data, batch_size=batch_size, shuffle=True, drop_last=True)
test_loader = DataLoader(dataset=test_data, batch_size=batch_size, shuffle=True, drop_last=True)

In [12]:
next(iter(train_loader))

[tensor([[ 0.5503,  0.4987,  0.7404,  ...,  1.0000, 10.0000,  0.6048],
         [ 0.7356,  0.9244,  0.5385,  ...,  1.0000,  3.0000,  0.5161],
         [ 0.5277,  0.6659,  0.4570,  ...,  1.0000,  5.0000,  0.0484],
         ...,
         [ 0.6268,  0.4785,  0.8256,  ...,  1.0000,  7.0000,  0.0484],
         [ 0.5107,  0.3750,  0.7703,  ...,  1.0000,  9.0000,  0.3306],
         [ 0.1630,  0.1374,  0.4255,  ...,  1.0000,  9.0000,  0.4032]]),
 tensor([0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0,
         0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0,
         0, 0, 1, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 1])]

In [13]:
next(iter(test_loader))

[tensor([[ 0.4908,  0.2892,  0.8791,  ...,  1.0000, 11.0000,  0.7419],
         [ 0.1812,  0.2922,  0.2614,  ...,  1.0000, 10.0000,  0.5887],
         [ 0.2278,  0.0977,  0.6535,  ...,  1.0000, 10.0000,  0.6210],
         ...,
         [ 0.3726,  0.3124,  0.6191,  ...,  1.0000,  8.0000,  0.1694],
         [ 0.4794,  0.5149,  0.5435,  ...,  1.0000, 10.0000,  0.5484],
         [ 0.2071,  0.5452,  0.0355,  ...,  1.0000, 10.0000,  0.6290]]),
 tensor([0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0,
         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0,
         0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0])]

In [14]:
# Get cpu or gpu device for training.
device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"Using {device} device")

# Define model
class NeuralNetwork(nn.Module):
    def __init__(self):
        super().__init__()
        self.flatten = nn.Flatten()
        self.linear_relu_stack = nn.Sequential(
            nn.Linear(30, 512),
            nn.ReLU(),
            nn.Linear(512, 512),
            nn.ReLU(),
            nn.Linear(512, 512),
            nn.ReLU(),
            nn.Linear(512, 512),
            nn.ReLU(),
            nn.Linear(512, 512),
            nn.ReLU(),
            nn.Linear(512, 2)
        )

    def forward(self, x):
        x = self.flatten(x)
        logits = self.linear_relu_stack(x)
        return logits

model = NeuralNetwork().to(device)
print(model)

Using cpu device
NeuralNetwork(
  (flatten): Flatten(start_dim=1, end_dim=-1)
  (linear_relu_stack): Sequential(
    (0): Linear(in_features=30, out_features=512, bias=True)
    (1): ReLU()
    (2): Linear(in_features=512, out_features=512, bias=True)
    (3): ReLU()
    (4): Linear(in_features=512, out_features=512, bias=True)
    (5): ReLU()
    (6): Linear(in_features=512, out_features=512, bias=True)
    (7): ReLU()
    (8): Linear(in_features=512, out_features=512, bias=True)
    (9): ReLU()
    (10): Linear(in_features=512, out_features=2, bias=True)
  )
)


In [15]:
loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(), lr=1e-3)

In [16]:
def train(dataloader, model, loss_fn, optimizer):
    size = len(dataloader.dataset)
    model.train()
    for batch, (X, y) in enumerate(dataloader):
        X, y = X.to(device), y.to(device)

        # Compute prediction error
        pred = model(X)
        loss = loss_fn(pred, y)

        # Backpropagation
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        if batch % 1000 == 0:
            loss, current = loss.item(), batch * len(X)
            # print(f"loss: {loss:>7f}  [{current:>5d}/{size:>5d}]")

In [17]:
def test(dataloader, model, loss_fn):
    size = len(dataloader.dataset)
    num_batches = len(dataloader)
    model.eval()
    test_loss, correct = 0, 0
    with torch.no_grad():
        for X, y in dataloader:
            X, y = X.to(device), y.to(device)
            pred = model(X)
            test_loss += loss_fn(pred, y).item()
            correct += (pred.argmax(1) == y).type(torch.float).sum().item()
    test_loss /= num_batches
    correct /= size
    print(f"Test Error: \n Accuracy: {(100*correct):>0.8f}%, Avg loss: {test_loss:>8f} \n")

In [18]:
epochs = 1000
for t in range(epochs):
    print(f"Epoch {t+1}\n-------------------------------")
    train(train_loader, model, loss_fn, optimizer)
    test(test_loader, model, loss_fn)
print("Done!")

Epoch 1
-------------------------------
Test Error: 
 Accuracy: 76.87409084%, Avg loss: 0.554852 

Epoch 2
-------------------------------
Test Error: 
 Accuracy: 76.87085825%, Avg loss: 0.545336 

Epoch 3
-------------------------------
Test Error: 
 Accuracy: 76.88378859%, Avg loss: 0.541951 

Epoch 4
-------------------------------
Test Error: 
 Accuracy: 76.87409084%, Avg loss: 0.540942 

Epoch 5
-------------------------------
Test Error: 
 Accuracy: 76.87085825%, Avg loss: 0.540120 

Epoch 6
-------------------------------
Test Error: 
 Accuracy: 76.87409084%, Avg loss: 0.539684 

Epoch 7
-------------------------------
Test Error: 
 Accuracy: 76.89671893%, Avg loss: 0.539176 

Epoch 8
-------------------------------
Test Error: 
 Accuracy: 76.86762567%, Avg loss: 0.539414 

Epoch 9
-------------------------------
Test Error: 
 Accuracy: 76.87409084%, Avg loss: 0.539068 

Epoch 10
-------------------------------
Test Error: 
 Accuracy: 76.87409084%, Avg loss: 0.539006 

Epoch 11


In [19]:
x_test.values[0]

array([ 0.49452686,  0.4932729 ,  0.5724044 , 43.        ,  3.        ,
        1.        , 17.        ,  1.        ,  0.        ,  0.        ,
        0.        ,  0.        ,  0.        ,  0.        ,  0.        ,
        0.        ,  0.35855922,  0.        ,  0.        ,  0.        ,
        0.        ,  0.        ,  0.        ,  0.        ,  0.        ,
        0.        ,  0.15686275,  1.        , 10.        ,  0.5967742 ],
      dtype=float32)