# Neural Network from the ground up - Bank Fraud

Based on Chapter 4 of the Fast.AI Deep Learning Book

### Data Normalization

In [123]:
from fastcore.all import *
from fastai.data.all import *
from pandas import *
import numpy as np
import torch.utils.data as data_utils

df = pd.read_csv("data.csv")

def normalize(x, max):
    return x / max

current_address_months_count_max = df.current_address_months_count.max()
customer_age_max = df.customer_age.max()
days_since_request_max = df.days_since_request.max()
intended_balcon_amount_max = df.intended_balcon_amount.max()
intended_balcon_amount_min = df.intended_balcon_amount.min()
zip_count_4w_max = df.zip_count_4w.max()
velocity_24h_max = df.velocity_24h.max()
velocity_6h_max = df.velocity_6h.max()
velocity_4w_max = df.velocity_4w.max()
bank_branch_count_8w_max = df.bank_branch_count_8w.max()
date_of_birth_distinct_emails_4w_max = df.date_of_birth_distinct_emails_4w.max()
credit_risk_score_max = df.credit_risk_score.max()
credit_risk_score_min = df.credit_risk_score.min()
proposed_credit_limit_max = df.proposed_credit_limit.max()

df["current_address_months_count"] = df.current_address_months_count.apply(lambda x: normalize(x, current_address_months_count_max))
df["customer_age"] = df.customer_age.apply(lambda x: normalize(x, customer_age_max))
df["days_since_request"] = df.days_since_request.apply(lambda x: normalize(x, days_since_request_max))
df["intended_balcon_amount"] = df.intended_balcon_amount.apply(lambda x: normalize(x + abs(intended_balcon_amount_min), intended_balcon_amount_max + abs(intended_balcon_amount_min)))
df["zip_count_4w"] = df.zip_count_4w.apply(lambda x: normalize(x, zip_count_4w_max))
df["velocity_24h"] = df.velocity_24h.apply(lambda x: normalize(x, velocity_24h_max))
df["velocity_6h"] = df.velocity_6h.apply(lambda x: normalize(x, velocity_6h_max))
df["velocity_4w"] = df.velocity_4w.apply(lambda x: normalize(x, velocity_4w_max))
df["bank_branch_count_8w"] = df.bank_branch_count_8w.apply(lambda x: normalize(x, bank_branch_count_8w_max))
df["date_of_birth_distinct_emails_4w"] = df.date_of_birth_distinct_emails_4w.apply(lambda x: normalize(x, date_of_birth_distinct_emails_4w_max))
df["credit_risk_score"] = df.credit_risk_score.apply(lambda x: normalize(x + abs(credit_risk_score_min), credit_risk_score_max + abs(credit_risk_score_min)))
df["proposed_credit_limit"] = df.proposed_credit_limit.apply(lambda x: normalize(x, proposed_credit_limit_max))

df.payment_type = df.payment_type.astype("category")
df.employment_status = df.employment_status.astype("category")
df.housing_status = df.housing_status.astype("category")
df.source = df.employment_status.astype("category")
df.device_os = df.device_os.astype("category")
df.to_csv("normalized.csv")


### Setup Training and Validation DataSets

In [140]:
dummies = pd.get_dummies(df)
print(dummies.shape)
fraud = dummies[dummies.fraud_bool == 1].drop('fraud_bool', axis=1)
not_fraud = dummies[dummies.fraud_bool == 0].drop('fraud_bool', axis=1)
print(fraud.shape)

fraud_train = fraud.sample(frac=.8, random_state=100)
fraud_valid = fraud.drop(fraud_train.index)
not_fraud_train = not_fraud.sample(frac=.8, random_state=100)
not_fraud_valid = not_fraud.drop(not_fraud_train.index)

train_x_t = torch.cat([torch.tensor(fraud_train.values).type(torch.FloatTensor), torch.tensor(not_fraud_train.values).type(torch.FloatTensor)])
print(train_x_t.shape)
train_x = train_x_t.view(-1, 52)
train_y = tensor([1]*len(fraud_train) + [0]*len(not_fraud_train))
dset = list(zip(train_x, train_y))

valid_x = torch.cat([torch.tensor(fraud_valid.values).type(torch.FloatTensor), torch.tensor(not_fraud_valid.values).type(torch.FloatTensor)]).view(-1, 52)
valid_y = tensor([1]*len(fraud_valid) + [0]*len(not_fraud_valid))
valid_dset = list(zip(valid_x, valid_y))

(1000000, 53)
(11029, 52)
torch.Size([800000, 52])


### Linear

In [141]:
def init_params(size, std=1.0): return (torch.randn(size)*std).type(torch.FloatTensor).requires_grad_()

weights = init_params(52); print(weights)
bias = init_params(1); print(bias)

print(weights.type())

def linear1(xb):
    cross = xb@weights
    return cross + bias

preds = linear1(train_x); preds

tensor([-0.1349, -0.2689, -0.3426,  1.6692, -2.5082,  0.8137,  0.9905, -0.9003,
        -1.8515,  1.4286,  0.8886,  0.0733, -0.7074, -1.3049, -1.3353, -0.1335,
        -0.6165, -0.1443,  0.1219, -0.3225,  2.0532, -0.3747, -1.4065, -0.4709,
        -0.0100, -1.4380,  1.1074,  0.8684,  0.5591, -0.0363, -0.7992, -1.0435,
        -2.7727,  1.5963,  1.4730,  2.0461, -1.0994,  0.3424, -0.9222, -0.8537,
         0.5174, -0.3800, -1.0286, -0.0213,  1.2814, -0.4026, -3.0598,  0.2060,
         0.8690, -0.7434,  0.3271,  0.7673], requires_grad=True)
tensor([0.9328], requires_grad=True)
torch.FloatTensor


tensor([4.0164, 4.9184, 4.3605,  ..., 2.2637, 2.6816, 0.8623],
       grad_fn=<AddBackward0>)

In [142]:
corrects = (preds>0.0).float() == train_y
corrects

tensor([ True,  True,  True,  ..., False, False, False])

In [143]:
corrects.float().mean().item()

0.048746250569820404

In [150]:
def mse(preds, targets): return ((preds-targets)**2).mean().sqrt()

def measure_loss(predictions, targets):
    predictions = predictions.sigmoid()
    return torch.where(targets==1, 1-predictions, predictions).mean()

weights = init_params((52,1))
bias = init_params(1)

dl = DataLoader(dset, batch_size=512)
xb, yb = first(dl)
xb.shape, yb.shape

valid_dl = DataLoader(valid_dset, batch_size=512)

In [151]:
def calc_grad(xb, yb, model):
    preds = model(xb)
    loss = measure_loss(preds, yb)
    loss.backward()

def train_epoch(model, lr, params):
    for xb, yb in dl:
        calc_grad(xb, yb, model)
        for p in params:
            p.data -= p.grad*lr
            p.grad.zero_()

def batch_accuracy(xb, yb):
    preds = xb.sigmoid()
    correct = (preds>0.5) == yb
    return correct.float().mean()

def validate_epoch(model):
    accs = [batch_accuracy(model(xb), yb) for xb, yb in valid_dl]
    return round(torch.stack(accs).mean().item(), 4)

batch_accuracy(linear1(train_x[:12]), train_y[:12])


tensor(0.8333)

In [152]:
validate_epoch(linear1)

0.155

In [154]:
lr = 0.001
params = weights, bias
train_epoch(linear1, lr, params)
validate_epoch(linear1)

for i in range(20):
    train_epoch(linear1, lr, params)
    print(validate_epoch(linear1), end=' ')
    

0.9879 0.9881 0.9882 0.9884 0.9885 0.9885 0.9886 0.9886 0.9886 0.9887 0.9887 0.9888 0.9888 0.9888 0.9888 0.9889 0.9889 0.9889 0.9889 0.9889 

In [156]:
from fastai.vision.all import *

simple_net = nn.Sequential(
    nn.Linear(52, 30),
    nn.ReLU(),
    nn.Linear(30,1)
)

dls = DataLoaders(dl, valid_dl)
learn = Learner(dls, simple_net, opt_func=SGD, loss_func=measure_loss, metrics=batch_accuracy)
learn.fit(40, 0.0001)


epoch,train_loss,valid_loss,batch_accuracy,time
0,0.490994,0.490455,0.723407,00:05
1,0.465529,0.465546,0.980727,00:05
2,0.439329,0.439903,0.988945,00:05
3,0.411961,0.413106,0.98897,00:05
4,0.383127,0.384863,0.98897,00:05
5,0.352739,0.355104,0.98897,00:05
6,0.321031,0.324072,0.98897,00:05
7,0.288605,0.29236,0.98897,00:05
8,0.25637,0.260867,0.98897,00:05
9,0.225371,0.230611,0.98897,00:05
