Route: Goal 1 -> 1.2 DNN  
Purpose: to build a DNN model based on PyTorch  
Kernel: Python

In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
import numpy as np
import datetime 
from time import time
import pandas as pd
import seaborn as sns
import torch.utils.data as Data
from sklearn.preprocessing import StandardScaler

In [5]:
# Data Segmentation Tool
from sklearn.model_selection import train_test_split

route = 'lending_club_onehot.csv'
lending_club_onehot = pd.read_csv(route,index_col=0)
lending_club_onehot.head()

Unnamed: 0,loan_amnt,funded_amnt_inv,int_rate,installment,grade,sub_grade,emp_length,annual_inc,dti,delinq_2yrs,...,addr_state_48,initial_list_status_0,initial_list_status_1,application_type_0,application_type_1,hardship_flag_0,hardship_flag_1,debt_settlement_flag_0,debt_settlement_flag_1,target
1,15000,15000.0,5.32,451.73,0,0,10,105000.0,14.24,0,...,0,1,0,1,0,1,0,1,0,0
2,4800,4800.0,7.99,150.4,0,4,10,110000.0,16.29,0,...,0,1,0,1,0,1,0,1,0,1
3,15000,15000.0,10.49,487.47,1,11,10,75000.0,10.38,0,...,0,0,1,1,0,1,0,1,0,0
4,3000,3000.0,5.32,90.35,0,0,9,78000.0,22.11,0,...,0,1,0,1,0,1,0,1,0,0
5,8000,8000.0,13.49,271.45,2,21,4,55000.0,9.56,1,...,0,1,0,1,0,1,0,1,0,0


In [7]:
# The following is to partition the original data, 
# to divide them into training set for training, verification set for verifying, test set for testing.
X = lending_club_onehot.iloc[:,:-1]
y = lending_club_onehot.iloc[:,-1:]

X_train, X_combined, y_train, y_combined = train_test_split(X, y, test_size=0.4, random_state=1)
X_verify, X_test, y_verify, y_test = train_test_split(X_combined, y_combined, test_size=0.5, random_state=1)

print(X.shape, X_train.shape, X_verify.shape, X_test.shape)
X_train.head()

(57094, 160) (34256, 160) (11419, 160) (11419, 160)


Unnamed: 0,loan_amnt,funded_amnt_inv,int_rate,installment,grade,sub_grade,emp_length,annual_inc,dti,delinq_2yrs,...,addr_state_47,addr_state_48,initial_list_status_0,initial_list_status_1,application_type_0,application_type_1,hardship_flag_0,hardship_flag_1,debt_settlement_flag_0,debt_settlement_flag_1
55257,20000,20000.0,15.59,699.1,2,24,10,65500.0,30.08,0,...,0,0,1,0,1,0,1,0,1,0
51038,19000,19000.0,18.99,696.37,3,32,5,50000.0,17.07,0,...,0,0,0,1,1,0,1,0,1,0
2898,11725,11725.0,17.99,423.83,3,31,6,122000.0,11.84,1,...,0,0,1,0,1,0,1,0,1,0
36501,10500,10500.0,30.99,346.13,6,64,5,35000.0,6.58,0,...,0,0,1,0,1,0,1,0,1,0
12876,35000,35000.0,11.49,1154.0,1,14,10,105091.0,15.11,0,...,0,0,0,1,1,0,1,0,1,0


In [16]:
# Network Test
def test(X,y,path):
    
    net = simple_DNN()
    net.load_state_dict(torch.load(path))
    
    X = torch.Tensor(np.array(X))
    
    prediction = net.predict(X).argmax(axis=1)
    
    summary = prediction.shape[0]
    cnt = 0
    for i in range(summary):
        if np.array(prediction)[i]==y['target'].iloc[i]: cnt += 1
    
    print(summary-cnt)
    print(cnt/summary)
    return cnt/summary

In [13]:
# Network Training Process
def train(X,y,batch_N,epoch, path, hot=False):
    
    net = simple_DNN()
    # According to the cold/hot start state, read the pre-training model
    if hot:
        net.load_state_dict(torch.load(path))
    
    # The loss function uses "Cross Entropy Loss"
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(net.parameters(), lr=0.0001)
    # Transform data into torch.Tensor
    X = torch.Tensor(np.array(X))
    y = torch.Tensor(np.array(y))
    train_set = Data.TensorDataset(X,y)

    result_df = pd.DataFrame(columns = ['Epoch', 'Loss'])
    
    # The training has epoch turns，and the batch size is batch_N
    for i in range(epoch):
        t0 = time()
        # devide each batch
        train_loader = Data.DataLoader(
            dataset=train_set,
            batch_size=batch_N,
            shuffle=True,
            drop_last=True,
        )
        # Total loss Los
        Los = 0
        
        for step,(feature,label) in enumerate(train_loader):
        # data.shape is (100,95)
            label = label.long().reshape([-1])
            
            optimizer.zero_grad()
            prediction = net.predict(feature)
            loss = criterion(prediction,label)
            loss.backward()
            optimizer.step()
            Los += loss
    
        print('i=',i,'   Loss=',Los)
        
        print(time()-t0)
        
        torch.save(net.state_dict(), './dnn_onehot_1e-5_new/dnn.pth')
        result = test(X_verify, y_verify, './dnn_onehot_1e-5_new/dnn.pth')
        result_df.loc[i] = [i, result]
        
        if result >= 0.999:
            
            test(X_train, y_train, './dnn_onehot_1e-5_new/dnn.pth')
            test(X_verify, y_verify, './dnn_onehot_1e-5_new/dnn.pth')
            test(X_test, y_test, './dnn_onehot_1e-5_new/dnn.pth')
            result_test = test(X_test, y_test, './dnn_onehot_1e-5_new/dnn.pth')
            torch.save(net.state_dict(), './dnn_onehot_1e-5_new/dnn_' + str(i) +  '_' + str(result_test) + '_success.pth')
            
    torch.save(net.state_dict(), path)

In [17]:
class simple_DNN(nn.Module):
    def __init__(self):
        # Inherit the parent class
        super(simple_DNN,self).__init__()
        
        self.ten_layers = nn.Sequential(
            nn.Linear(160, 200), nn.BatchNorm1d(200), nn.LeakyReLU(True),
            
            nn.Linear(200, 300), nn.BatchNorm1d(300), nn.LeakyReLU(True),
            
            nn.Linear(300,300), nn.BatchNorm1d(300), nn.ReLU(True),
            nn.Linear(300,500), nn.BatchNorm1d(500), nn.ReLU(True),
            nn.Linear(500,500), nn.BatchNorm1d(500), nn.ReLU(True),
            nn.Linear(500,200), nn.BatchNorm1d(200), nn.ReLU(True),
            nn.Linear(200,200), nn.BatchNorm1d(200), nn.ReLU(True),
            nn.Linear(200,50), nn.BatchNorm1d(50), nn.ReLU(True),
            
            nn.Linear(50,10), nn.ß(10), nn.ReLU(True),
            
            # Finalize the classification results
            nn.Linear(10,2),
        )
        
    def predict(self,x):
        
        x = self.ten_layers(x)
    
        return x

In [18]:
# Put the training set X_train into training
batch_Num = 200
epoch = 600
hot = False
train(X_train,y_train,batch_Num,epoch,'DNN_lendingclub_onehot_BN_epoch_600_lr_1e-5.pth', hot)

i= 0    Loss= tensor(66.2187, grad_fn=<AddBackward0>)
5.598600149154663
425
0.9627813293633418
i= 1    Loss= tensor(47.4868, grad_fn=<AddBackward0>)
5.3793628215789795
185
0.9837989316052194
i= 2    Loss= tensor(38.4901, grad_fn=<AddBackward0>)
4.897463083267212
407
0.9643576495314826
i= 3    Loss= tensor(32.1536, grad_fn=<AddBackward0>)
4.812809228897095
98
0.9914178124179
i= 4    Loss= tensor(26.7370, grad_fn=<AddBackward0>)
4.836700916290283
113
0.9901042122777827
i= 5    Loss= tensor(23.1226, grad_fn=<AddBackward0>)
4.941549062728882
62
0.9945704527541817
i= 6    Loss= tensor(19.1825, grad_fn=<AddBackward0>)
4.951761245727539
252
0.9779315176460286
i= 7    Loss= tensor(16.4565, grad_fn=<AddBackward0>)
4.804059982299805
100
0.991242665732551
i= 8    Loss= tensor(14.5057, grad_fn=<AddBackward0>)
4.865347862243652
62
0.9945704527541817
i= 9    Loss= tensor(12.5875, grad_fn=<AddBackward0>)
4.854455947875977
80
0.9929941325860409
i= 10    Loss= tensor(10.9400, grad_fn=<AddBackward0>)
4.

15
0.9986863998598826
i= 84    Loss= tensor(0.9470, grad_fn=<AddBackward0>)
5.088438034057617
10
0.9991242665732551
25
0.9992702008407286
10
0.9991242665732551
9
0.9992118399159295
9
0.9992118399159295
i= 85    Loss= tensor(0.6448, grad_fn=<AddBackward0>)
5.0437541007995605
20
0.9982485331465102
i= 86    Loss= tensor(0.6077, grad_fn=<AddBackward0>)
4.965257167816162
12
0.9989491198879061
i= 87    Loss= tensor(0.3913, grad_fn=<AddBackward0>)
5.072646141052246
17
0.9985112531745337
i= 88    Loss= tensor(0.4514, grad_fn=<AddBackward0>)
5.018435955047607
13
0.9988615465452316
i= 89    Loss= tensor(0.6879, grad_fn=<AddBackward0>)
4.963150262832642
16
0.9985988265172082
i= 90    Loss= tensor(0.6937, grad_fn=<AddBackward0>)
5.031605005264282
10
0.9991242665732551
24
0.9992993928070995
10
0.9991242665732551
8
0.999299413258604
8
0.999299413258604
i= 91    Loss= tensor(0.8943, grad_fn=<AddBackward0>)
4.9637932777404785
22
0.9980733864611612
i= 92    Loss= tensor(0.5889, grad_fn=<AddBackward0>)


11
0.9990366932305806
8
0.9997664642690332
11
0.9990366932305806
6
0.9994745599439531
6
0.9994745599439531
i= 152    Loss= tensor(0.2293, grad_fn=<AddBackward0>)
4.86577296257019
13
0.9988615465452316
i= 153    Loss= tensor(0.3151, grad_fn=<AddBackward0>)
4.873547077178955
11
0.9990366932305806
8
0.9997664642690332
11
0.9990366932305806
7
0.9993869866012786
7
0.9993869866012786
i= 154    Loss= tensor(0.3386, grad_fn=<AddBackward0>)
4.903369903564453
11
0.9990366932305806
14
0.999591312470808
11
0.9990366932305806
8
0.999299413258604
8
0.999299413258604
i= 155    Loss= tensor(0.5597, grad_fn=<AddBackward0>)
4.854063987731934
17
0.9985112531745337
i= 156    Loss= tensor(0.4184, grad_fn=<AddBackward0>)
4.877140998840332
11
0.9990366932305806
23
0.9993285847734703
11
0.9990366932305806
9
0.9992118399159295
9
0.9992118399159295
i= 157    Loss= tensor(0.4688, grad_fn=<AddBackward0>)
4.965955972671509
31
0.9972852263770908
i= 158    Loss= tensor(0.2550, grad_fn=<AddBackward0>)
4.9438431262969

9
0.9992118399159295
8
0.9997664642690332
9
0.9992118399159295
6
0.9994745599439531
6
0.9994745599439531
i= 213    Loss= tensor(0.3993, grad_fn=<AddBackward0>)
5.0823681354522705
20
0.9982485331465102
i= 214    Loss= tensor(0.1816, grad_fn=<AddBackward0>)
4.916331052780151
14
0.9987739732025571
i= 215    Loss= tensor(0.1773, grad_fn=<AddBackward0>)
4.875935077667236
15
0.9986863998598826
i= 216    Loss= tensor(0.4250, grad_fn=<AddBackward0>)
4.866369962692261
14
0.9987739732025571
i= 217    Loss= tensor(0.2197, grad_fn=<AddBackward0>)
4.909236907958984
11
0.9990366932305806
11
0.9996788883699206
11
0.9990366932305806
6
0.9994745599439531
6
0.9994745599439531
i= 218    Loss= tensor(0.3738, grad_fn=<AddBackward0>)
4.906738042831421
19
0.9983361064891847
i= 219    Loss= tensor(0.3269, grad_fn=<AddBackward0>)
5.454704999923706
14
0.9987739732025571
i= 220    Loss= tensor(0.2903, grad_fn=<AddBackward0>)
4.851095914840698
16
0.9985988265172082
i= 221    Loss= tensor(0.3249, grad_fn=<AddBackw

i= 269    Loss= tensor(0.2755, grad_fn=<AddBackward0>)
4.897665023803711
13
0.9988615465452316
i= 270    Loss= tensor(0.2651, grad_fn=<AddBackward0>)
4.946232080459595
11
0.9990366932305806
17
0.9995037365716954
11
0.9990366932305806
9
0.9992118399159295
9
0.9992118399159295
i= 271    Loss= tensor(0.2529, grad_fn=<AddBackward0>)
4.9025421142578125
8
0.999299413258604
6
0.9998248482017749
8
0.999299413258604
7
0.9993869866012786
7
0.9993869866012786
i= 272    Loss= tensor(0.1543, grad_fn=<AddBackward0>)
4.907132148742676
10
0.9991242665732551
7
0.999795656235404
10
0.9991242665732551
9
0.9992118399159295
9
0.9992118399159295
i= 273    Loss= tensor(0.2033, grad_fn=<AddBackward0>)
4.924605131149292
10
0.9991242665732551
5
0.9998540401681457
10
0.9991242665732551
8
0.999299413258604
8
0.999299413258604
i= 274    Loss= tensor(0.1322, grad_fn=<AddBackward0>)
4.998012065887451
11
0.9990366932305806
13
0.9996205044371789
11
0.9990366932305806
11
0.9990366932305806
11
0.9990366932305806
i= 275 

8
0.999299413258604
8
0.999299413258604
i= 325    Loss= tensor(0.1805, grad_fn=<AddBackward0>)
5.252955913543701
12
0.9989491198879061
i= 326    Loss= tensor(0.1713, grad_fn=<AddBackward0>)
5.507203817367554
9
0.9992118399159295
8
0.9997664642690332
9
0.9992118399159295
8
0.999299413258604
8
0.999299413258604
i= 327    Loss= tensor(0.1614, grad_fn=<AddBackward0>)
5.6034767627716064
8
0.999299413258604
2
0.9999416160672583
8
0.999299413258604
6
0.9994745599439531
6
0.9994745599439531
i= 328    Loss= tensor(0.1759, grad_fn=<AddBackward0>)
6.540149927139282
8
0.999299413258604
8
0.9997664642690332
8
0.999299413258604
10
0.9991242665732551
10
0.9991242665732551
i= 329    Loss= tensor(0.1166, grad_fn=<AddBackward0>)
6.164115905761719
16
0.9985988265172082
i= 330    Loss= tensor(0.1447, grad_fn=<AddBackward0>)
5.163996934890747
14
0.9987739732025571
i= 331    Loss= tensor(0.1538, grad_fn=<AddBackward0>)
5.349443197250366
9
0.9992118399159295
5
0.9998540401681457
9
0.9992118399159295
10
0.999

7
0.9993869866012786
7
0.9993869866012786
i= 378    Loss= tensor(0.2378, grad_fn=<AddBackward0>)
5.224629878997803
15
0.9986863998598826
i= 379    Loss= tensor(0.0887, grad_fn=<AddBackward0>)
5.22758674621582
12
0.9989491198879061
i= 380    Loss= tensor(0.0854, grad_fn=<AddBackward0>)
5.422539949417114
15
0.9986863998598826
i= 381    Loss= tensor(0.2448, grad_fn=<AddBackward0>)
6.0381927490234375
9
0.9992118399159295
9
0.9997372723026623
9
0.9992118399159295
8
0.999299413258604
8
0.999299413258604
i= 382    Loss= tensor(0.1374, grad_fn=<AddBackward0>)
4.942538022994995
12
0.9989491198879061
i= 383    Loss= tensor(0.1251, grad_fn=<AddBackward0>)
4.9550580978393555
9
0.9992118399159295
3
0.9999124241008874
9
0.9992118399159295
11
0.9990366932305806
11
0.9990366932305806
i= 384    Loss= tensor(0.1688, grad_fn=<AddBackward0>)
4.938041925430298
13
0.9988615465452316
i= 385    Loss= tensor(0.1688, grad_fn=<AddBackward0>)
4.920762300491333
10
0.9991242665732551
8
0.9997664642690332
10
0.99912

10
0.9991242665732551
i= 429    Loss= tensor(0.1096, grad_fn=<AddBackward0>)
4.963712215423584
13
0.9988615465452316
i= 430    Loss= tensor(0.1584, grad_fn=<AddBackward0>)
4.906419038772583
11
0.9990366932305806
8
0.9997664642690332
11
0.9990366932305806
10
0.9991242665732551
10
0.9991242665732551
i= 431    Loss= tensor(0.1096, grad_fn=<AddBackward0>)
4.9237329959869385
9
0.9992118399159295
3
0.9999124241008874
9
0.9992118399159295
6
0.9994745599439531
6
0.9994745599439531
i= 432    Loss= tensor(0.0596, grad_fn=<AddBackward0>)
4.943170070648193
9
0.9992118399159295
4
0.9998832321345166
9
0.9992118399159295
8
0.999299413258604
8
0.999299413258604
i= 433    Loss= tensor(0.0480, grad_fn=<AddBackward0>)
5.029950857162476
10
0.9991242665732551
1
0.9999708080336291
10
0.9991242665732551
11
0.9990366932305806
11
0.9990366932305806
i= 434    Loss= tensor(0.0252, grad_fn=<AddBackward0>)
4.953242778778076
8
0.999299413258604
2
0.9999416160672583
8
0.999299413258604
8
0.999299413258604
8
0.999299

7
0.9993869866012786
i= 479    Loss= tensor(0.0993, grad_fn=<AddBackward0>)
5.6614649295806885
13
0.9988615465452316
i= 480    Loss= tensor(0.0862, grad_fn=<AddBackward0>)
5.763411998748779
11
0.9990366932305806
2
0.9999416160672583
11
0.9990366932305806
11
0.9990366932305806
11
0.9990366932305806
i= 481    Loss= tensor(0.1569, grad_fn=<AddBackward0>)
5.8390867710113525
17
0.9985112531745337
i= 482    Loss= tensor(0.1794, grad_fn=<AddBackward0>)
5.6749560832977295
12
0.9989491198879061
i= 483    Loss= tensor(0.1950, grad_fn=<AddBackward0>)
6.083028078079224
12
0.9989491198879061
i= 484    Loss= tensor(0.1488, grad_fn=<AddBackward0>)
5.757183074951172
11
0.9990366932305806
4
0.9998832321345166
11
0.9990366932305806
10
0.9991242665732551
10
0.9991242665732551
i= 485    Loss= tensor(0.0908, grad_fn=<AddBackward0>)
5.951754808425903
9
0.9992118399159295
3
0.9999124241008874
9
0.9992118399159295
9
0.9992118399159295
9
0.9992118399159295
i= 486    Loss= tensor(0.1898, grad_fn=<AddBackward0>)

1
0.9999708080336291
10
0.9991242665732551
10
0.9991242665732551
10
0.9991242665732551
i= 530    Loss= tensor(0.0533, grad_fn=<AddBackward0>)
4.942251920700073
11
0.9990366932305806
2
0.9999416160672583
11
0.9990366932305806
7
0.9993869866012786
7
0.9993869866012786
i= 531    Loss= tensor(0.3611, grad_fn=<AddBackward0>)
5.028597831726074
15
0.9986863998598826
i= 532    Loss= tensor(0.2158, grad_fn=<AddBackward0>)
4.918733835220337
14
0.9987739732025571
i= 533    Loss= tensor(0.0389, grad_fn=<AddBackward0>)
4.88569712638855
8
0.999299413258604
4
0.9998832321345166
8
0.999299413258604
9
0.9992118399159295
9
0.9992118399159295
i= 534    Loss= tensor(0.0522, grad_fn=<AddBackward0>)
5.00239896774292
10
0.9991242665732551
3
0.9999124241008874
10
0.9991242665732551
10
0.9991242665732551
10
0.9991242665732551
i= 535    Loss= tensor(0.0285, grad_fn=<AddBackward0>)
5.029851913452148
9
0.9992118399159295
1
0.9999708080336291
9
0.9992118399159295
9
0.9992118399159295
9
0.9992118399159295
i= 536   

10
0.9991242665732551
9
0.9992118399159295
9
0.9992118399159295
i= 580    Loss= tensor(0.2115, grad_fn=<AddBackward0>)
4.989241123199463
13
0.9988615465452316
i= 581    Loss= tensor(0.1556, grad_fn=<AddBackward0>)
4.891281843185425
9
0.9992118399159295
2
0.9999416160672583
9
0.9992118399159295
8
0.999299413258604
8
0.999299413258604
i= 582    Loss= tensor(0.0503, grad_fn=<AddBackward0>)
4.962843894958496
8
0.999299413258604
1
0.9999708080336291
8
0.999299413258604
10
0.9991242665732551
10
0.9991242665732551
i= 583    Loss= tensor(0.0688, grad_fn=<AddBackward0>)
5.055004119873047
9
0.9992118399159295
4
0.9998832321345166
9
0.9992118399159295
9
0.9992118399159295
9
0.9992118399159295
i= 584    Loss= tensor(0.1097, grad_fn=<AddBackward0>)
4.992244005203247
8
0.999299413258604
2
0.9999416160672583
8
0.999299413258604
6
0.9994745599439531
6
0.9994745599439531
i= 585    Loss= tensor(0.1037, grad_fn=<AddBackward0>)
5.040976047515869
8
0.999299413258604
3
0.9999124241008874
8
0.999299413258604

In [10]:
test(X_train, y_train, 'dnn_335_success_99.95.pth')

9
0.9997898178421298


In [11]:
test(X_test, y_test, 'dnn_335_success_99.95.pth')

7
0.9995095978702536
