### All imports in one place

In [1]:
import pickle
import warnings
import numpy as np
import pandas as pd
from sklearn.svm import SVC
from keras.layers import Dense
from keras.models import Sequential
from sklearn.tree import DecisionTreeClassifier
from sklearn.preprocessing import StandardScaler
from sklearn.neighbors import KNeighborsClassifier

### Ignoring warnings

In [2]:
warnings.filterwarnings('ignore')

### Loading dataset and labels

In [3]:
dataset = pd.read_csv('corpora/x_train.csv')
labels = pd.read_csv('corpora/y_train.csv')

### Some general information about the dataset

In [4]:
dataset.info()
labels.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 14165 entries, 0 to 14164
Data columns (total 10 columns):
 #   Column          Non-Null Count  Dtype  
---  ------          --------------  -----  
 0   step            14165 non-null  float64
 1   type            14165 non-null  object 
 2   amount          14165 non-null  float64
 3   nameOrig        14165 non-null  object 
 4   oldbalanceOrg   14165 non-null  float64
 5   newbalanceOrig  14165 non-null  float64
 6   nameDest        14165 non-null  object 
 7   oldbalanceDest  14165 non-null  float64
 8   newbalanceDest  14165 non-null  float64
 9   isFlaggedFraud  14165 non-null  float64
dtypes: float64(7), object(3)
memory usage: 1.1+ MB
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 14165 entries, 0 to 14164
Data columns (total 1 columns):
 #   Column   Non-Null Count  Dtype  
---  ------   --------------  -----  
 0   isFraud  14165 non-null  float64
dtypes: float64(1)
memory usage: 110.8 KB


### Replacing the type column values with integers

In [5]:
dataset['type'].replace({
    'CASH_OUT' : 1,
    'PAYMENT'  : 2,
    'CASH_IN'  : 3,
    'TRANSFER' : 4,
    'DEBIT'    : 5
}, inplace=True)

### Converting the nameOrig and nameDest into integers

In [6]:
for i in dataset.index:
    nameOrig = dataset.at[i, 'nameOrig']
    nameOrigP = '0' + nameOrig[1:]
    dataset.loc[i, 'nameOrig'] = nameOrigP 
    nameDest = dataset.at[i, 'nameDest']
    if nameDest[0] == 'C':
        nameDestP = '0' + nameDest[1:]
        dataset.loc[i, 'nameDest'] = nameDestP
    elif nameDest[0] == 'M':
        nameDestP = '1' + nameDest[1:]
        dataset.loc[i, 'nameDest'] = nameDestP

### Type casting all the columns to float64

In [7]:
dataset.astype('float64').dtypes

step              float64
type              float64
amount            float64
nameOrig          float64
oldbalanceOrg     float64
newbalanceOrig    float64
nameDest          float64
oldbalanceDest    float64
newbalanceDest    float64
isFlaggedFraud    float64
dtype: object

### Scaling all the columns using the StandardScaler

In [8]:
standardScaler = StandardScaler()
columns_to_scale = [
    'step',
    'amount',
    'nameOrig',
    'oldbalanceOrg',
    'newbalanceOrig',
    'nameDest',
    'oldbalanceDest', 
    'newbalanceDest'
]
dataset[columns_to_scale] = standardScaler.fit_transform(dataset[columns_to_scale])

### Training sets

In [9]:
x_train, y_train = dataset, labels

### Training and saving the KNN Classifier with K = 5

In [105]:
knn_classifier = KNeighborsClassifier(n_neighbors=5)
knn_classifier.fit(x_train, y_train)
pickle.dump(knn_classifier, open('models/knn_classifier.sav', 'wb'))

### Training and saving the SV Classifier with a linear kernel

In [106]:
svc_classifier = SVC(kernel='linear')
svc_classifier.fit(x_train, y_train)
pickle.dump(svc_classifier, open('models/svc_classifier.sav', 'wb'))

### Training and saving the DT Classifier with as many features as columns in dataset

In [107]:
dt_classifier = DecisionTreeClassifier(max_features=len(x_train.columns), random_state=0)
dt_classifier.fit(x_train, y_train)
pickle.dump(dt_classifier, open('models/dt_classifier.sav', 'wb'))

### Preparing data for Neural Network training

In [10]:
x_train = pd.DataFrame(x_train).to_numpy()
y_train = pd.DataFrame(y_train).to_numpy()

### Defining and training Neural Network

In [13]:
model = Sequential()
model.add(Dense(12, input_dim=10, activation='relu'))
model.add(Dense(8, activation='relu'))
model.add(Dense(1, activation='sigmoid'))
model.compile(loss='binary_crossentropy', optimizer='adam')
model.fit(x_train, y_train, epochs=150, batch_size=10)

Epoch 1/150
Epoch 2/150
Epoch 3/150
Epoch 4/150
Epoch 5/150
Epoch 6/150
Epoch 7/150
Epoch 8/150
Epoch 9/150
Epoch 10/150
Epoch 11/150
Epoch 12/150
Epoch 13/150
Epoch 14/150
Epoch 15/150
Epoch 16/150
Epoch 17/150
Epoch 18/150
Epoch 19/150
Epoch 20/150
Epoch 21/150
Epoch 22/150
Epoch 23/150
Epoch 24/150
Epoch 25/150
Epoch 26/150
Epoch 27/150
Epoch 28/150
Epoch 29/150
Epoch 30/150
Epoch 31/150
Epoch 32/150
Epoch 33/150
Epoch 34/150
Epoch 35/150
Epoch 36/150
Epoch 37/150
Epoch 38/150
Epoch 39/150
Epoch 40/150
Epoch 41/150
Epoch 42/150
Epoch 43/150
Epoch 44/150
Epoch 45/150
Epoch 46/150
Epoch 47/150
Epoch 48/150
Epoch 49/150
Epoch 50/150
Epoch 51/150
Epoch 52/150
Epoch 53/150
Epoch 54/150
Epoch 55/150
Epoch 56/150
Epoch 57/150
Epoch 58/150
Epoch 59/150
Epoch 60/150
Epoch 61/150
Epoch 62/150
Epoch 63/150
Epoch 64/150
Epoch 65/150
Epoch 66/150
Epoch 67/150
Epoch 68/150
Epoch 69/150
Epoch 70/150
Epoch 71/150
Epoch 72/150
Epoch 73/150
Epoch 74/150
Epoch 75/150
Epoch 76/150
Epoch 77/150
Epoch 78

<tensorflow.python.keras.callbacks.History at 0x273a5016710>

### Saving Neural Network

In [14]:
model_json = model.to_json()
with open('models/neural_network.json', 'w') as json_file:
    json_file.write(model_json)
model.save_weights('models/neural_network.h5')