# Importing necessary libraries

In [3]:
import tensorflow as tf
import pandas as pd
from tensorflow import keras
from tensorflow.keras import Sequential
from tensorflow.keras.layers import Flatten, Dense, Dropout,BatchNormalization
from tensorflow.keras.layers import Conv1D, MaxPool1D
from tensorflow.keras.optimizers import Adam

In [4]:
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf
from sklearn.metrics import confusion_matrix,accuracy_score
from sklearn.metrics import classification_report
import seaborn as sns 
from sklearn.metrics import roc_auc_score
from sklearn.metrics import roc_curve
from sklearn.metrics import precision_recall_curve,auc
from sklearn.calibration import calibration_curve
import import_ipynb
from DataPreProcessing import new_df,X,y,X_train, X_test, y_train,y_test,sc

# Separate the dataset into non-fraudulent and fraudulent transactions

In [5]:
non_fraud = new_df[new_df['isFraud'] == 0]
fraud = new_df[new_df['isFraud'] == 1]

# Display the shapes of the non-fraudulent and fraudulent datasets

In [6]:
non_fraud.shape, fraud.shape

((6354407, 5), (8213, 5))

# Randomly sample the non-fraudulent dataset to match the size of the fraudulent dataset

In [7]:
non_fraud = non_fraud.sample(fraud.shape[0])
non_fraud.shape

(8213, 5)

#  Combine fraud and non-fraud datasets to create a balanced dataset

In [8]:
data = pd.concat([fraud, non_fraud], ignore_index=True)

# Count the number of fraudulent (1) and non-fraudulent (0) transactions in the combined dataset


In [9]:
data['isFraud'].value_counts()

isFraud
1    8213
0    8213
Name: count, dtype: int64

# Convert y_train and y_test to numpy arrays


In [10]:
y_train = np.array(y_train)
y_test = np.array(y_test)

 # Reshape X_train and X_test to include a third dimension for compatibility with convolutional neural networks


In [11]:
X_train = X_train.reshape(X_train.shape[0], X_train.shape[1], 1)
X_test = X_test.reshape(X_test.shape[0], X_test.shape[1], 1)

# Display the new shapes of X_train and X_test after reshaping


In [12]:
X_train.shape, X_test.shape


((4771965, 4, 1), (1590655, 4, 1))

# Defining the architecture of a Convolutional Neural Network (CNN) 

In [13]:
epochs = 20
model = Sequential()
model.add(Conv1D(32,2, activation= 'relu', input_shape = X_train[0].shape)) 
model.add(BatchNormalization())
model.add(Dropout(0.2))


model.add(Conv1D(64,2,activation='relu'))
model.add(BatchNormalization())
model.add(Dropout(0.5))

model.add(Flatten())
model.add(Dense(64,activation='relu'))
model.add(Dropout(0.5))

model.add(Dense(1,activation='sigmoid'))

# Display a summary of the model architecture, including layer types, output shapes, and number of parameters


In [14]:
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv1d (Conv1D)             (None, 3, 32)             96        
                                                                 
 batch_normalization (BatchN  (None, 3, 32)            128       
 ormalization)                                                   
                                                                 
 dropout (Dropout)           (None, 3, 32)             0         
                                                                 
 conv1d_1 (Conv1D)           (None, 2, 64)             4160      
                                                                 
 batch_normalization_1 (Batc  (None, 2, 64)            256       
 hNormalization)                                                 
                                                                 
 dropout_1 (Dropout)         (None, 2, 64)             0

# Compile the Model

In [15]:
model.compile(optimizer = Adam(lr=0.0001), loss = 'binary_crossentropy', metrics=['accuracy'])

  super(Adam, self).__init__(name, **kwargs)


# Train the Model

In [16]:
model_training_results = model.fit(X_train, y_train, epochs=epochs, validation_data=(X_test,y_test), verbose=1)

Epoch 1/20


# To Check if the model is overfitting or underfitting

In [None]:
y_train_pred_cnn = model.predict(X_train)
y_val_pred_cnn = model.predict(X_test)

# Convert predictions to binary format (0 or 1)
y_train_pred_cnn_binary = np.where(y_train_pred_cnn > 0.5, 1, 0)
y_test_pred_cnn_binary = np.where(y_val_pred_cnn > 0.5, 1, 0)

# Calculate accuracy scores
train_accuracy_cnn = accuracy_score(y_train, y_train_pred_cnn_binary)
val_accuracy_cnn = accuracy_score(y_test, y_test_pred_cnn_binary)

In [None]:
train_accuracy = round(train_accuracy_cnn)
val_accuracy = round(val_accuracy_cnn)
print("Training Accuracy:", train_accuracy )
print("Validation Accuracy:",  val_accuracy)


In [None]:
if train_accuracy > val_accuracy:
    print("The model may be overfitting.")
elif train_accuracy < val_accuracy:
    print("The model may be underfitting.")
else:
    print("The model's performance on training and validation sets are similar")


# Predicting a new result

In [None]:
 
input_data = np.array([[4,181.00, 181.00,0.00]])

 
# Adjust the input shape according to your model's requirements
input_data_reshaped = input_data.reshape((1, input_data.shape[1], 1))  # (batch_size, sequence_length, num_features)

# Make predictions using the reshaped input data
predictions = model.predict(input_data_reshaped)

# Print the predictions
print(predictions)


In [None]:
 
input_data = np.array([[1,9839.64, 170136.0,160296.36]])

 
# Adjust the input shape according to your model's requirements
input_data_reshaped = input_data.reshape((1, input_data.shape[1], 1))  # (batch_size, sequence_length, num_features)

# Make predictions using the reshaped input data
predictions = model.predict(input_data_reshaped)

# Print the predictions
print(predictions)


# Prediciting the test set results

In [None]:
y_pred = model.predict(X_test)

In [None]:
threshold = 0.5   #A threshold of 0.5 is often chosen when there is no specific preference for precision or recall. 
                  #It provides a balanced trade-off between the two metrics.
y_pred_binary = np.where(y_pred >= threshold, 1, 0)

# Accuracy

In [None]:
accuracy = accuracy_score(y_test,y_pred_binary)
round(accuracy*100,2)

# Classification Report

In [None]:
print(classification_report(y_test, y_pred_binary))

'''
0- No Fraud
1- Fraud
'''

# ROC Curve

In [None]:
# Make predictions on the test set
y_pred_proba = model.predict(X_test)

# Calculate ROC AUC score
ann_roc_auc = roc_auc_score(y_test, y_pred_proba)

# Calculate ROC curve
fpr, tpr, thresholds = roc_curve(y_test, y_pred_proba)

# Plot ROC curve
plt.figure()
plt.plot(fpr, tpr, label='CNN Classifier (area = %0.2f)' % ann_roc_auc)
plt.plot([0, 1], [0, 1],'r--')
plt.xlim([0.0, 1.0])
plt.ylim([0.0, 1.05])
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('Receiver Operating Characteristic')
plt.legend(loc="lower right")
plt.show()


# Precision-Recall Curve

In [None]:
y_prob_train_ann = model.predict(X_train)

# Calculate precision-recall curve
precision_ann, recall_ann, _ = precision_recall_curve(y_train, y_prob_train_ann)

# Plot precision-recall curve
plt.fill_between(recall_ann, precision_ann)
plt.ylabel("Precision")
plt.xlabel("Recall")
plt.title("Train Precision-Recall curve")
plt.show()
from sklearn.metrics import precision_recall_curve, auc
auc_prc = auc(recall_ann, precision_ann)
print("AUC-PRC:", auc_prc)

# Confusion Matrix

In [None]:
# Compute confusion matrix
cm = confusion_matrix(y_test, y_pred_binary)

# Plot confusion matrix
plt.figure(figsize=(8, 6))
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', cbar=False)
plt.xlabel('Predicted labels')
plt.ylabel('True labels')
plt.title('Confusion Matrix')
plt.show()


# Calibration Curve

In [None]:
# Get predicted probabilities for the training set
y_prob_train_ann = model.predict(X_train)

# Calculate calibration curve
prob_true, prob_pred = calibration_curve(y_train, y_prob_train_ann, n_bins=10)

# Plot calibration curve
plt.plot(prob_pred, prob_true, marker='o', linestyle='-', label='Calibration Curve')
plt.plot([0, 1], [0, 1], linestyle='--', color='gray', label='Perfectly calibrated')
plt.xlabel('Mean Predicted Probability')
plt.ylabel('Fraction of Positives')
plt.title('Calibration Curve')
plt.legend()
plt.show()
