In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import random

In [2]:
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, confusion_matrix
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier

In [3]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Input, Dense, Dropout
from tensorflow.keras.losses import binary_crossentropy
from tensorflow.keras.optimizers import SGD
from tensorflow.keras.metrics import BinaryAccuracy, Precision, Recall

print(f"Version of TensorFlow : {tf.__version__}")


Version of TensorFlow : 2.15.0


In [4]:
data = pd.read_csv("creditcard_2023.csv")

In [5]:
data.head()

Unnamed: 0,id,V1,V2,V3,V4,V5,V6,V7,V8,V9,...,V21,V22,V23,V24,V25,V26,V27,V28,Amount,Class
0,0,-0.260648,-0.469648,2.496266,-0.083724,0.129681,0.732898,0.519014,-0.130006,0.727159,...,-0.110552,0.217606,-0.134794,0.165959,0.12628,-0.434824,-0.08123,-0.151045,17982.1,0
1,1,0.9851,-0.356045,0.558056,-0.429654,0.27714,0.428605,0.406466,-0.133118,0.347452,...,-0.194936,-0.605761,0.079469,-0.577395,0.19009,0.296503,-0.248052,-0.064512,6531.37,0
2,2,-0.260272,-0.949385,1.728538,-0.457986,0.074062,1.419481,0.743511,-0.095576,-0.261297,...,-0.00502,0.702906,0.945045,-1.154666,-0.605564,-0.312895,-0.300258,-0.244718,2513.54,0
3,3,-0.152152,-0.508959,1.74684,-1.090178,0.249486,1.143312,0.518269,-0.06513,-0.205698,...,-0.146927,-0.038212,-0.214048,-1.893131,1.003963,-0.51595,-0.165316,0.048424,5384.44,0
4,4,-0.20682,-0.16528,1.527053,-0.448293,0.106125,0.530549,0.658849,-0.21266,1.049921,...,-0.106984,0.729727,-0.161666,0.312561,-0.414116,1.071126,0.023712,0.419117,14278.97,0


In [6]:
print(f"Shape of data : {data.shape}")

Shape of data : (568630, 31)


## Preprocessing

##### Checking for duplicated data points and removing them

In [7]:
duplicated_values = data.value_counts()
duplicated_values = duplicated_values.to_frame()
duplicated_values.head(10)

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,Unnamed: 5_level_0,Unnamed: 6_level_0,Unnamed: 7_level_0,Unnamed: 8_level_0,Unnamed: 9_level_0,Unnamed: 10_level_0,Unnamed: 11_level_0,Unnamed: 12_level_0,Unnamed: 13_level_0,Unnamed: 14_level_0,Unnamed: 15_level_0,Unnamed: 16_level_0,Unnamed: 17_level_0,Unnamed: 18_level_0,Unnamed: 19_level_0,Unnamed: 20_level_0,Unnamed: 21_level_0,Unnamed: 22_level_0,Unnamed: 23_level_0,Unnamed: 24_level_0,Unnamed: 25_level_0,Unnamed: 26_level_0,Unnamed: 27_level_0,Unnamed: 28_level_0,Unnamed: 29_level_0,Unnamed: 30_level_0,count
id,V1,V2,V3,V4,V5,V6,V7,V8,V9,V10,V11,V12,V13,V14,V15,V16,V17,V18,V19,V20,V21,V22,V23,V24,V25,V26,V27,V28,Amount,Class,Unnamed: 31_level_1
0,-0.260648,-0.469648,2.496266,-0.083724,0.129681,0.732898,0.519014,-0.130006,0.727159,0.637735,-0.98702,0.293438,-0.941386,0.54902,1.804879,0.215598,0.512307,0.333644,0.12427,0.091202,-0.110552,0.217606,-0.134794,0.165959,0.12628,-0.434824,-0.08123,-0.151045,17982.1,0,1
379089,0.211525,-0.172433,0.38909,-0.520573,0.605021,0.154907,0.629336,-0.149879,0.376518,0.485291,-0.062064,0.695621,-0.620391,0.341974,0.055285,0.501684,0.734376,0.561183,0.332665,-0.122305,-0.190367,-0.483677,0.040308,1.227441,-0.864953,0.369719,-0.180891,-0.130064,5114.32,1,1
379083,-0.056224,0.812127,-0.901054,1.340335,0.57243,-1.108707,-0.168552,0.044594,-1.323992,-0.901344,0.927204,-1.148801,-0.729588,-1.585337,0.657354,-0.459101,-0.253884,0.267426,-0.384456,0.549243,0.106539,-0.598813,-0.222497,-0.920982,-0.234792,0.728264,0.856948,0.942765,21612.5,1,1
379084,-0.661333,0.71846,-0.627551,1.009158,-0.610775,0.162125,-0.632635,0.014514,-1.03004,-0.75554,0.853292,-1.100166,-0.388581,-1.093695,0.672853,-1.310834,-1.149475,-1.409933,0.803254,0.051403,0.673335,-0.309568,0.318867,-0.280152,-0.691953,0.310704,0.617553,0.538272,3875.05,1,1
379085,0.908772,-0.120471,0.25935,0.334283,0.530768,0.306387,0.485395,-0.116964,-0.046796,0.492581,0.286629,0.413548,-1.123131,-0.019479,0.359567,1.681941,1.137343,1.205748,-1.577292,-0.327593,-0.131594,-0.258222,-0.100895,-0.415285,0.660461,0.153713,-0.21213,0.053075,13369.44,1,1
379086,-0.988588,0.210995,-0.685727,0.832897,-0.717356,0.14371,-0.654842,-0.027151,-0.7058,-0.723645,0.816733,-0.932878,-0.331674,-0.936884,0.922593,-0.937358,-1.074161,-0.904533,0.678118,-0.760861,-0.049257,0.656853,0.249006,-0.274331,-0.294853,-0.389519,-0.643844,0.237237,23063.22,1,1
379087,0.060282,0.576045,-0.739466,1.076765,1.078398,-0.947936,0.163517,-0.001971,-1.080344,-0.530279,1.011454,-0.68108,-0.825464,-1.279849,-0.577576,0.664725,0.632843,1.908391,-1.322008,0.190153,0.011185,-0.50718,-0.099939,-0.959833,-0.847559,0.172763,0.372449,0.61753,3659.93,1,1
379088,-0.920784,0.253166,-0.986008,0.576595,-0.612349,-1.121961,-0.514484,0.447629,-0.94091,-1.038442,0.958835,-1.041447,-0.325763,-1.195772,-0.176406,-0.837172,-1.001923,-0.81947,0.171095,0.055188,0.198699,-0.370894,-0.1696,-0.450099,0.008034,1.729979,1.57124,-0.89811,507.83,1,1
379090,-1.864083,1.771047,-1.801018,1.555376,-2.225492,-1.801494,-1.947294,2.786778,-1.685096,-1.692345,1.38075,-1.490524,1.82277,-1.128814,-0.07967,-1.666332,-1.889376,-1.874638,1.559518,-0.35662,0.783244,-0.419915,-0.882868,0.933353,0.522755,-0.105125,-0.764064,-1.02426,15846.81,1,1
379098,0.366211,0.555796,-0.764675,1.121016,1.010087,-0.950854,0.185136,-0.071981,-1.106592,-0.605405,1.245389,-0.857179,-2.556022,-1.329595,-0.302395,0.135132,0.583234,1.096294,-2.051636,0.151008,0.038532,-0.351939,-0.541486,-0.844571,2.217356,1.147506,0.456721,0.860927,22966.06,1,1


In [8]:
data = data.drop_duplicates()
data = data.reset_index()
data = data.drop(['index'], axis=1)
data.head()

Unnamed: 0,id,V1,V2,V3,V4,V5,V6,V7,V8,V9,...,V21,V22,V23,V24,V25,V26,V27,V28,Amount,Class
0,0,-0.260648,-0.469648,2.496266,-0.083724,0.129681,0.732898,0.519014,-0.130006,0.727159,...,-0.110552,0.217606,-0.134794,0.165959,0.12628,-0.434824,-0.08123,-0.151045,17982.1,0
1,1,0.9851,-0.356045,0.558056,-0.429654,0.27714,0.428605,0.406466,-0.133118,0.347452,...,-0.194936,-0.605761,0.079469,-0.577395,0.19009,0.296503,-0.248052,-0.064512,6531.37,0
2,2,-0.260272,-0.949385,1.728538,-0.457986,0.074062,1.419481,0.743511,-0.095576,-0.261297,...,-0.00502,0.702906,0.945045,-1.154666,-0.605564,-0.312895,-0.300258,-0.244718,2513.54,0
3,3,-0.152152,-0.508959,1.74684,-1.090178,0.249486,1.143312,0.518269,-0.06513,-0.205698,...,-0.146927,-0.038212,-0.214048,-1.893131,1.003963,-0.51595,-0.165316,0.048424,5384.44,0
4,4,-0.20682,-0.16528,1.527053,-0.448293,0.106125,0.530549,0.658849,-0.21266,1.049921,...,-0.106984,0.729727,-0.161666,0.312561,-0.414116,1.071126,0.023712,0.419117,14278.97,0


In [9]:
print(f"Shape of updated data : {data.shape}")

Shape of updated data : (568630, 31)


##### Checking NaN values

In [10]:
data.isnull().sum()

id        0
V1        0
V2        0
V3        0
V4        0
V5        0
V6        0
V7        0
V8        0
V9        0
V10       0
V11       0
V12       0
V13       0
V14       0
V15       0
V16       0
V17       0
V18       0
V19       0
V20       0
V21       0
V22       0
V23       0
V24       0
V25       0
V26       0
V27       0
V28       0
Amount    0
Class     0
dtype: int64

#### Checking the distribution of the Class and Undersampling

Here, output y is called class
* If Class = 1 -> Fraud Transaction
* If Class = 0 -> Legit Transaction

In [11]:
data['Class'].value_counts()

Class
0    284315
1    284315
Name: count, dtype: int64

The data exhibits a significant bias towards legitimate transactions. A basic model predicting legitimate transactions every time would yield an accuracy of over 99%, which is not ideal. Therefore, #undersampling the data is necessary.

In [12]:
legit_data = data[data['Class'] == 0]
fraud_data = data[data['Class'] == 1]

In [13]:
print(f"Shape of Legit Data : {legit_data.shape}")
print(f"Shape of Fraud Data : {fraud_data.shape}")

Shape of Legit Data : (284315, 31)
Shape of Fraud Data : (284315, 31)


Taking random samples from Legit Data same as size of Fraud Data

In [14]:
legit_data_updated = legit_data.sample(n = fraud_data.shape[0])

In [15]:

print(f"Shape of Legit Data : {legit_data_updated.shape}")
print(f"Shape of Fraud Data : {fraud_data.shape}")

Shape of Legit Data : (284315, 31)
Shape of Fraud Data : (284315, 31)


In [16]:
# Concatenate the data
data_updated = pd.concat([legit_data_updated, fraud_data], axis = 0)
data_updated

Unnamed: 0,id,V1,V2,V3,V4,V5,V6,V7,V8,V9,...,V21,V22,V23,V24,V25,V26,V27,V28,Amount,Class
55001,55001,-0.157158,-0.624370,1.904319,0.002548,-0.063530,1.569317,0.060122,-0.379401,1.219746,...,0.383390,-1.018242,-2.242913,-0.238905,1.015113,-0.398106,0.421939,0.227771,9232.58,0
218980,218980,-0.561426,-1.859024,0.952442,-1.917161,0.186360,-0.261528,-0.138172,-0.710571,1.689376,...,-0.299712,-0.162900,-3.940340,0.280969,0.599533,-0.818793,-0.264296,0.953107,12767.83,0
183068,183068,1.733065,-0.484927,-0.338547,-0.508672,1.844365,2.149494,0.423823,0.009915,0.542049,...,-0.097469,0.060500,0.030920,1.513812,0.658086,-1.289520,-0.234373,-0.254534,9319.29,0
83369,83369,0.161355,-0.722348,1.919677,-1.596573,-0.233721,0.402011,0.318655,-0.175508,-0.564956,...,-0.068540,0.547724,0.028902,0.633042,-0.876625,-0.279478,-0.251008,-0.289524,16565.23,0
113630,113630,-0.031355,-0.219666,1.487046,-0.116659,0.280620,0.567073,0.558993,-0.070880,0.330330,...,-0.092115,0.201758,-0.085954,0.423868,-0.401917,-0.580871,0.261089,0.372878,22246.09,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
568625,568625,-0.833437,0.061886,-0.899794,0.904227,-1.002401,0.481454,-0.370393,0.189694,-0.938153,...,0.167503,0.419731,1.288249,-0.900861,0.560661,-0.006018,3.308968,0.081564,4394.16,1
568626,568626,-0.670459,-0.202896,-0.068129,-0.267328,-0.133660,0.237148,-0.016935,-0.147733,0.483894,...,0.031874,0.388161,-0.154257,-0.846452,-0.153443,1.961398,-1.528642,1.704306,4653.40,1
568627,568627,-0.311997,-0.004095,0.137526,-0.035893,-0.042291,0.121098,-0.070958,-0.019997,-0.122048,...,0.140788,0.536523,-0.211100,-0.448909,0.540073,-0.755836,-0.487540,-0.268741,23572.85,1
568628,568628,0.636871,-0.516970,-0.300889,-0.144480,0.131042,-0.294148,0.580568,-0.207723,0.893527,...,-0.060381,-0.195609,-0.175488,-0.554643,-0.099669,-1.434931,-0.159269,-0.076251,10160.83,1


In [17]:
# randomly shuffling
data_updated = data_updated.sample(frac=1)
data_updated

Unnamed: 0,id,V1,V2,V3,V4,V5,V6,V7,V8,V9,...,V21,V22,V23,V24,V25,V26,V27,V28,Amount,Class
529137,529137,-0.928640,-0.238530,-0.757097,0.020714,0.117750,0.200009,-0.525105,-0.294222,-0.816412,...,-0.226424,1.428466,-0.231378,-0.429209,1.398836,1.700747,0.891586,-2.369334,10673.51,1
491226,491226,-0.295538,-0.924505,0.023364,1.059566,0.280535,0.693233,0.976836,-0.311143,0.185700,...,-0.217292,-0.157562,0.445011,1.974289,-0.962623,0.163325,0.130430,-1.861842,9255.71,1
318639,318639,-0.984914,0.414957,-0.552622,-0.247136,0.166065,0.791129,-0.656226,-1.566631,0.617445,...,2.680002,-2.183399,0.377818,0.200263,0.485012,1.074103,-0.009749,1.203711,18211.81,1
446685,446685,-0.909071,0.517082,-0.679894,0.699931,-0.721859,-0.724758,-0.674349,0.364500,-0.882138,...,0.345441,0.210109,-0.422754,0.349922,-0.064643,-0.521351,0.072704,1.840916,2454.39,1
490279,490279,1.005584,0.295857,-0.645788,1.023021,0.719596,-0.329439,0.094332,-0.095545,-0.670240,...,-0.010355,-0.353939,-0.137286,-0.206871,0.741441,0.388425,0.359051,0.560608,9943.88,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
345037,345037,-1.138970,1.070851,-1.203811,0.729509,-1.121804,-1.404404,-0.899363,1.067754,-0.695637,...,0.263305,-0.476927,0.069326,-0.006757,0.528282,-0.587281,1.111546,0.705665,11922.76,1
306289,306289,-0.481239,1.069196,-0.963115,1.262110,0.254864,-1.582914,-0.294381,0.259746,-1.126491,...,0.163664,-0.453683,-0.431971,-0.428562,1.712918,0.934449,0.307238,0.683768,22772.46,1
515502,515502,-0.841052,0.348399,-0.732623,0.553494,-0.950673,0.288186,-0.519068,0.386787,-0.730611,...,0.269629,0.484955,0.175233,-0.612080,0.180018,0.300356,-0.372990,-0.322921,21432.67,1
167696,167696,-0.110155,-0.365570,1.450943,-1.065077,0.656764,1.279073,0.385734,0.013016,0.768832,...,0.028573,1.262582,-0.187531,-2.211880,-0.672890,1.572368,0.262641,0.341398,15385.03,0


In [18]:
data_updated['Class'].value_counts()

Class
1    284315
0    284315
Name: count, dtype: int64

### Splitting the dataset

In [19]:
X = data_updated.iloc[:250000,:-1]
Y = data_updated.iloc[:250000,-1]

In [20]:
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.35, shuffle=True)

In [21]:
print(f"Shape of X_train : {X_train.shape}")
print(f"Shape of X_test : {X_test.shape}")
print(f"Shape of Y_train : {Y_train.shape}")
print(f"Shape of Y_test : {Y_test.shape}")

Shape of X_train : (162500, 30)
Shape of X_test : (87500, 30)
Shape of Y_train : (162500,)
Shape of Y_test : (87500,)


In [22]:
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

## Feature Selection

In [None]:
from sklearn.ensemble import RandomForestClassifier
import matplotlib.pyplot as plt

rf_classifier = RandomForestClassifier(n_estimators=100, random_state=42)

# Fit the classifier to the training data
rf_classifier.fit(X_train, y_train)

# Get feature importances from the trained model
feature_importances = rf_classifier.feature_importances_

# Create a DataFrame to display feature importances
feature_importance_df = pd.DataFrame({'Feature': X.columns, 'Importance': feature_importances})

# Sort features by importance in descending order
feature_importance_df = feature_importance_df.sort_values(by='Importance', ascending=False)

# Plot the feature importances
plt.figure(figsize=(10, 6))
plt.barh(feature_importance_df['Feature'], feature_importance_df['Importance'])
plt.xlabel('Feature Importance')
plt.ylabel('Feature')
plt.title('Random Forest Feature Importance')
plt.show()

In [None]:
from sklearn.feature_selection import SelectFromModel
# Use SelectFromModel to automatically select features based on importance
sfm = SelectFromModel(clf)
sfm.fit(X_train, Y_train)

# Get selected features
selected_features = X_train.columns[sfm.get_support()]

# Transform the data to keep only selected features
X_train_selected = sfm.transform(X_train)
X_test_selected = sfm.transform(X_test)

# Print selected features
print("Selected Features:", selected_features)


## Models and Algorithm

### 1. Logistic Regression

In [23]:
logistic_r = LogisticRegression(penalty='l2', C=0.0001)
logistic_r.fit(X_train, Y_train)

In [24]:
Y_logistic_r = logistic_r.predict(X_test)
Y_logistic_r

array([0, 1, 1, ..., 0, 0, 0], dtype=int64)

In [25]:
from sklearn.model_selection import cross_val_score

# Assuming you have X_train and Y_train
logistic_r = LogisticRegression(penalty='l2', C=0.1)
cv_scores_precision = cross_val_score(logistic_r, X_train, Y_train, cv=5, scoring='precision')
print(cv_scores_precision)

[0.99888676 0.99913318 0.99913329 0.99888628 0.99925752]


In [26]:
print(f"Accuracy : {accuracy_score(Y_test, Y_logistic_r)*100}%")
print(f"Confusion Matrix : \n {confusion_matrix(Y_test, Y_logistic_r)}")

Accuracy : 97.74514285714285%
Confusion Matrix : 
 [[43545    43]
 [ 1930 41982]]


### 2.Decision Tree Classifier

In [31]:
from sklearn.model_selection import GridSearchCV

# Create a decision tree classifier with initial regularization
decision_tree_classification = DecisionTreeClassifier(
    random_state=42,
    max_depth=5,  # Start with a shallower tree
    min_samples_split=100,  # Require more samples to split
    min_samples_leaf=30,  # Require more samples at leaf nodes
    max_features='sqrt',  # Consider fewer features at each split
    ccp_alpha=0.001  # Enable cost complexity pruning
)

# Define a grid of hyperparameters to explore
param_grid = {
    'max_depth': [5, 10, 15],
    'min_samples_split': [50, 100, 150],
    'min_samples_leaf': [20, 30, 40],
    'ccp_alpha': [0.001, 0.006 , 0.01],
}

# Perform grid search cross-validation to find optimal hyperparameters
grid_search = GridSearchCV(decision_tree_classification, param_grid, cv=5)
grid_search.fit(X_train, Y_train)

# Retrieve the best model
best_model = grid_search.best_estimator_


In [32]:
Y_DTc= best_model.predict(X_test)

In [33]:
print(f"Accuracy : {accuracy_score(Y_test, Y_DTc)*100}%")
print(f"Confusion Matrix : \n {confusion_matrix(Y_test, Y_DTc)}")

Accuracy : 99.50742857142856%
Confusion Matrix : 
 [[43253   335]
 [   96 43816]]


### 3. Random Forest Classifier

In [34]:
random_forest_classification = RandomForestClassifier(n_estimators = 20,)
random_forest_classification.fit(X_train, Y_train)

In [35]:
Y_RF_class = random_forest_classification.predict(X_test)

In [36]:
print(f"Accuracy : {accuracy_score(Y_test, Y_RF_class)*100}%")
print(f"Confusion Matrix : \n {confusion_matrix(Y_test, Y_RF_class)}")

Accuracy : 99.97142857142856%
Confusion Matrix : 
 [[43581     7]
 [   18 43894]]


### 4. Neural Network approach

In [37]:
model = Sequential()
model.add(Input(X_train.shape[1]))
model.add(Dense(64, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(128, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(256, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(16, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(1, activation='sigmoid'))




In [38]:
model.compile(loss = binary_crossentropy,
              optimizer = SGD(learning_rate = 0.001),
              metrics = [
                  BinaryAccuracy(name='accuracy'),
                  Precision(name='precision'),
                  Recall(name='recall')]
              )

In [39]:
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense (Dense)               (None, 64)                1984      
                                                                 
 dropout (Dropout)           (None, 64)                0         
                                                                 
 dense_1 (Dense)             (None, 128)               8320      
                                                                 
 dropout_1 (Dropout)         (None, 128)               0         
                                                                 
 dense_2 (Dense)             (None, 256)               33024     
                                                                 
 dropout_2 (Dropout)         (None, 256)               0         
                                                                 
 dense_3 (Dense)             (None, 16)                4

In [40]:
history = model.fit(X_train, Y_train, epochs = 10,  validation_split = 0.25)

Epoch 1/10

Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [41]:
# i = np.arange(1, 151)

# fig, ((ax1, ax2), (ax3, ax4)) = plt.subplots(2, 2)
# fig.set_figheight(12)
# fig.set_figwidth(18)

# ax1.plot(i, history.history['loss'])
# ax1.plot(i, history.history['val_loss'])
# ax1.set_xlabel('Epoch')
# ax1.set_ylabel('Loss')
# ax1.set_title('Loss vs Epoch')
# ax1.legend(['Training', 'Validation'])

# ax2.plot(i, history.history['accuracy'])
# ax2.plot(i, history.history['val_accuracy'])
# ax2.set_xlabel('Epoch')
# ax2.set_ylabel('Accuracy')
# ax2.set_title('Accuracy vs Epoch')
# ax2.legend(['Training', 'Validation'])

# # If you want to plot precision and recall, you may need to calculate them during training and store in 'history'
# # Assuming history contains 'precision', 'val_precision', 'recall', 'val_recall'
# ax3.plot(i, history.history['precision'])
# ax3.plot(i, history.history['val_precision'])
# ax3.set_xlabel('Epoch')
# ax3.set_ylabel('Precision')
# ax3.set_title('Precision vs Epoch')
# ax3.legend(['Training', 'Validation'])

# ax4.plot(i, history.history['recall'])
# ax4.plot(i, history.history['val_recall'])
# ax4.set_xlabel('Epoch')
# ax4.set_ylabel('Recall')
# ax4.set_title('Recall vs Epoch')
# ax4.legend(['Training', 'Validation'])

# plt.show()


In [42]:
Y_neural_net = model.predict(X_test)
Y_neural_net = [1 if i > 0.5 else 0 for i in list(Y_neural_net)]



In [43]:
print(f"Accuracy : {accuracy_score(Y_test, Y_neural_net)*100}%")
print(f"Confusion Matrix : \n {confusion_matrix(Y_test, Y_neural_net)}")

Accuracy : 99.264%
Confusion Matrix : 
 [[43509    79]
 [  565 43347]]


## Conclusion


All models work good but Neural Network gives the highest accuracy marginally i.e. 96.47%

In [44]:
print(f"Accuracy by Logistic Regression : {accuracy_score(Y_test, Y_logistic_r)*100}%")
print(f"Accuracy by Decision Tree Classifier : {accuracy_score(Y_test, Y_DTc)*100}%")
print(f"Accuracy by Random Forest Classifier : {accuracy_score(Y_test, Y_RF_class)*100}%")
print(f"Accuracy by Neural Network : {accuracy_score(Y_test, Y_neural_net)*100}%")

Accuracy by Logistic Regression : 97.74514285714285%
Accuracy by Decision Tree Classifier : 99.50742857142856%
Accuracy by Random Forest Classifier : 99.97142857142856%
Accuracy by Neural Network : 99.264%


In [45]:
print(f"Confusion Matrix for Logistic Regression : \n {confusion_matrix(Y_test, Y_logistic_r)}")
print(f"Confusion Matrix for Decision Tree Classifier : \n {confusion_matrix(Y_test, Y_DTc)}")
print(f"Confusion Matrix for Random Forest Classifier : \n {confusion_matrix(Y_test, Y_RF_class)}")
print(f"Confusion Matrix for Neural Network : \n {confusion_matrix(Y_test, Y_neural_net)}")

Confusion Matrix for Logistic Regression : 
 [[43545    43]
 [ 1930 41982]]
Confusion Matrix for Decision Tree Classifier : 
 [[43253   335]
 [   96 43816]]
Confusion Matrix for Random Forest Classifier : 
 [[43581     7]
 [   18 43894]]
Confusion Matrix for Neural Network : 
 [[43509    79]
 [  565 43347]]


In [46]:
from sklearn.metrics import f1_score

f1 = f1_score(Y_test, Y_logistic_r)

print(f"F1 Score: {f1}")
############################################
f1 = f1_score(Y_test, Y_DTc)

print(f"F1 Score: {f1}")
############################################
f1 = f1_score(Y_test, Y_RF_class)

print(f"F1 Score: {f1}")
############################################
f1 = f1_score(Y_test, Y_neural_net)

print(f"F1 Score: {f1}")

F1 Score: 0.9770413209676856
F1 Score: 0.9951057765463361
F1 Score: 0.9997153041121475
F1 Score: 0.992626348210401
