# Modelling workflow

In [1]:
from models.dataset import DataFrameDataset
from pathlib import Path
dataset = DataFrameDataset.from_csv(Path('repo', 'loan_data.csv'))

In [2]:
print(dataset)

DataFrameDataset with 19997 samples, 13 features, target column: loan_status


In [3]:
dataset.df.head()

Unnamed: 0,person_age,person_gender,person_education,person_income,person_emp_exp,person_home_ownership,loan_amnt,loan_intent,loan_int_rate,loan_percent_income,cb_person_cred_hist_length,credit_score,previous_loan_defaults_on_file,loan_status
0,-0.451005,0,0,-0.738289,0.284695,3,-1.134235,5,0.417089,-0.82374,-0.468077,1.149147,1,0
1,-0.958398,1,0,-0.442347,-0.898926,3,-0.455572,4,-0.881605,-0.117102,-0.725478,-0.119689,0,1
2,0.056388,0,3,-1.020443,0.115606,2,-1.19803,1,-1.241293,-0.218051,0.046725,1.307752,0,1
3,-0.958398,1,4,-0.644173,-0.729837,1,-0.380165,0,-0.506002,0.488587,-0.468077,-0.417072,0,1
4,-0.451005,0,1,-0.31805,-0.560748,0,-0.938177,1,-0.324567,-0.924688,-0.725478,-0.218817,1,0


In [4]:
from models.neural_network import BaseLoanNN

model = BaseLoanNN()
model

BaseLoanNN(
  (layer_1): Linear(in_features=13, out_features=50, bias=True)
  (layer_2): Linear(in_features=50, out_features=50, bias=True)
  (output_layer): Linear(in_features=50, out_features=1, bias=True)
)

In [5]:
# Define criterion and optimizer
import torch.nn as nn
import torch.optim as optim

criterion = nn.BCEWithLogitsLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

In [7]:
from models.arena import Arena
arena = Arena(model, optimizer, criterion, dataset, test_split=0.1)

In [8]:
arena.train(epochs=20, batch_size=32)

Epoch [1/20], BCE: 0.3322
Validation BCE: 0.2733
Epoch [2/20], BCE: 0.2585
Validation BCE: 0.2583
Epoch [3/20], BCE: 0.2531
Validation BCE: 0.2568
Epoch [4/20], BCE: 0.2499
Validation BCE: 0.2528
Epoch [5/20], BCE: 0.2464
Validation BCE: 0.2554
Epoch [6/20], BCE: 0.2453
Validation BCE: 0.2608
Epoch [7/20], BCE: 0.2439
Validation BCE: 0.2610
Epoch [8/20], BCE: 0.2426
Validation BCE: 0.2507
Epoch [9/20], BCE: 0.2408
Validation BCE: 0.2468
Epoch [10/20], BCE: 0.2399
Validation BCE: 0.2460
Epoch [11/20], BCE: 0.2385
Validation BCE: 0.2462
Epoch [12/20], BCE: 0.2367
Validation BCE: 0.2458
Epoch [13/20], BCE: 0.2362
Validation BCE: 0.2424
Epoch [14/20], BCE: 0.2355
Validation BCE: 0.2444
Epoch [15/20], BCE: 0.2360
Validation BCE: 0.2479
Epoch [16/20], BCE: 0.2344
Validation BCE: 0.2455
Epoch [17/20], BCE: 0.2328
Validation BCE: 0.2424
Epoch [18/20], BCE: 0.2319
Validation BCE: 0.2409
Epoch [19/20], BCE: 0.2311
Validation BCE: 0.2442
Epoch [20/20], BCE: 0.2296
Validation BCE: 0.2431


In [9]:
arena.evaluate()

In [11]:
print(arena.results)

Average Train BCE: 0.2448
Average Validation BCE: 0.2502
Average Test BCE: 0.2337

