# MODELLING
## Logistic Regression with Tensorflow Keras
### Fraud Detection System Development using Deep Neural Network for Reported Transactional Data

#### Import Libraries and Dataset

In [1]:
import pandas as pd
import tensorflow as tf
import numpy as np

In [6]:
df = pd.read_csv('user_data_clip99_minmax.csv', sep = ';')

In [7]:
df = df.rename(columns={"Unnamed: 0": "id"})
df = df.drop(columns = ['uid', 'id'])
df.head()

Unnamed: 0,is_scammer,trx_date,report_date,registereddate,birthday,is_verified,aqc_freq_prepaid_mobile,aqc_mean_prepaid_mobile_amount,aqc_freq_topup,aqc_freq_topup_within_7d,...,gender_Male,gender_None,job_position_KARYAWAN,job_position_LAINNYA,job_position_PEGAWAI_NS,job_position_PELAJAR,job_position_RUMAH_TANGGA,job_position_SPESIALIS,job_position_TIDAK_KERJA,job_position_WIRASWASTA
0,1,0.376934,0.005629,0.673512,0.909517,1,0.019455,0.456958,0.02275,0.007009,...,0,0,0,0,0,1,0,0,0,0
1,0,0.552039,0.63227,0.784524,0.948204,1,0.0,0.0,0.038576,0.074766,...,1,0,0,0,0,1,0,0,0,0
2,0,0.772855,0.696998,0.783631,0.766422,1,0.077821,0.324991,0.683482,0.271028,...,1,0,0,0,0,0,0,0,0,1
3,0,0.535865,0.550657,0.565476,0.974102,1,0.042802,0.200904,0.092977,0.042056,...,0,0,0,0,0,1,0,0,0,0
4,0,0.78692,0.71576,0.999702,0.688586,1,0.0,0.0,0.031652,0.056075,...,1,0,0,0,0,0,0,0,0,1


#### Dataset Splitting and Shuffling

In [8]:
# Split Train and Validation Dataset
#from sklearn.model_selection import train_test_split
SPLIT_SIZE = 0.7368421

train_len = int(len(df) * SPLIT_SIZE)

shuffled_files = df.sample(frac = 1, random_state = 1)

test_set = shuffled_files[train_len:]
train_set = shuffled_files[:train_len]

y_train = train_set['is_scammer']
x_train = train_set.drop(columns=['is_scammer'])
y_test = test_set['is_scammer']
x_test = test_set.drop(columns=['is_scammer'])

In [45]:
# Convert to tensors
x_train, y_train = tf.convert_to_tensor(x_train, dtype=tf.float32), tf.convert_to_tensor(y_train, dtype=tf.float32)
x_test, y_test = tf.convert_to_tensor(x_test, dtype=tf.float32), tf.convert_to_tensor(y_test, dtype=tf.float32)

In [36]:
y_test

<tf.Tensor: shape=(10716,), dtype=float32, numpy=array([1., 1., 1., ..., 1., 1., 1.], dtype=float32)>

#### Modelling

In [90]:
model = tf.keras.models.Sequential(
    tf.keras.layers.Dense(units=1, activation='sigmoid', input_dim=x_train.shape[1])
)

In [91]:
# Main Metrics: F1, precision
# Complementary Metrics: 

In [92]:
model.compile(
    optimizer='sgd',
    loss='binary_crossentropy', 
    metrics=[
        tf.keras.metrics.Precision(), 
        tf.keras.metrics.Recall(), 
        tf.keras.metrics.FalseNegatives(),
        tf.keras.metrics.FalsePositives(),
        tf.keras.metrics.TruePositives(),
        tf.keras.metrics.TrueNegatives()
    ]
)

In [93]:
history = model.fit(
    x_train, 
    y_train, 
    epochs=50, 
    validation_data=(x_test, y_test) 
#     callbacks=[metrics]
)

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50


Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50


Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


#### Evaluation

In [108]:
history.history.keys()

dict_keys(['loss', 'precision_5', 'recall_5', 'false_negatives_2', 'false_positives_2', 'true_positives_2', 'true_negatives_2', 'val_loss', 'val_precision_5', 'val_recall_5', 'val_false_negatives_2', 'val_false_positives_2', 'val_true_positives_2', 'val_true_negatives_2'])

In [114]:
for key in history.history.keys():
    print(str(key) + " : " + str(history.history[key][49]))

loss : 0.008797332644462585
precision_5 : 1.0
recall_5 : 1.0
false_negatives_2 : 0.0
false_positives_2 : 0.0
true_positives_2 : 14809.0
true_negatives_2 : 15192.0
val_loss : 0.008578669279813766
val_precision_5 : 1.0
val_recall_5 : 1.0
val_false_negatives_2 : 0.0
val_false_positives_2 : 0.0
val_true_positives_2 : 5241.0
val_true_negatives_2 : 5475.0


#### Save Model

In [115]:
model.save("log_reg_keras.h5")