<a href="https://colab.research.google.com/github/MahdiFaourr/MahdiFaourr/blob/main/credit_card_fraud_detection_model.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install opendatasets

In [None]:
!pip install scikeras

In [3]:
# Import necessary libraries and functions
import opendatasets as od
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import pickle
from sklearn.model_selection import train_test_split,cross_val_score,StratifiedKFold
from scikeras.wrappers import KerasClassifier
from tensorflow.keras.layers import Dense,Dropout
from tensorflow.keras.models import Sequential
from tensorflow.keras.utils import plot_model

In [None]:
# Load the data from kaggle
od.download("https://www.kaggle.com/datasets/nelgiriyewithana/credit-card-fraud-detection-dataset-2023")

In [None]:
# Load and disply the data
path_to_data="credit-card-fraud-detection-dataset-2023/creditcard_2023.csv"
data=pd.read_csv(path_to_data)
data.head()

In [49]:
# Data shape
print("This data contains: "+str(data.shape[0])+" rows and "+str(data.shape[1])+" columns.")

This data contains: 568630 rows and 30 columns.


In [None]:
# Check data info
data.info()

In [None]:
# Check the number of samples in each class
data['Class'].value_counts()

In [9]:
# Drop id column
data=data.drop("id",axis=1)

In [None]:
# Plot histograms for all columns (V1 to V28)
plt.figure(figsize=(16, 20))
for i in range(1, 29):
    plt.subplot(7, 4, i)
    plt.hist(data[data['Class'] == 0][f'V{i}'], bins=50, alpha=0.5, color='blue', label='Non-Fraud')
    plt.hist(data[data['Class'] == 1][f'V{i}'], bins=50, alpha=0.5, color='red', label='Fraud')
    plt.title(f'V{i}')
    plt.legend()
plt.tight_layout()
plt.show()


In [48]:
# Check the duplications
duplicates = data.duplicated()
if duplicates.any():
    print("Duplicates found in the entire DataFrame.")
    print("The count of duplicated samples is :",duplicates.sum(),".")


Duplicates found in the entire DataFrame.
The count of duplicated samples is : 1 .


In [21]:
# Normalize the data
data_max=data.max()
data=data.divide(data_max)

In [27]:
# Split the data into testing and training data
x=data.iloc[:,:-1].values
y=data.iloc[:,-1].values
x_train,x_test,y_train,y_test=train_test_split(x,y,test_size=0.2,random_state=50)

In [None]:
# Define your model
model = Sequential()

# Input layer
model.add(Dense(32, input_dim=x_train.shape[1], activation='relu'))  # 'features_count' should be the number of input features

# Hidden layers
model.add(Dense(64, activation='relu'))
model.add(Dropout(0.5))  # Optional dropout layer to prevent overfitting

model.add(Dense(32, activation='relu'))
model.add(Dropout(0.5))  # Optional dropout layer

# Output layer
model.add(Dense(1, activation='sigmoid'))  # Binary classification

# Compile the model
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

# Optionally, you can print a summary of the model's architecture
model.summary()


In [None]:
# Plot the model
plot_model(model, to_file='model.png', show_shapes=True, show_layer_names=True)

In [None]:
# Create a KerasClassifier with your model-building function
estimator = KerasClassifier(build_fn=model, epochs=10, batch_size=60, verbose=0)  # Adjust verbosity as needed
# Use StratifiedKFold for cross-validation
kfold = StratifiedKFold(n_splits=4, shuffle=True, random_state=42)

# Perform cross-validation and calculate accuracy scores
results = cross_val_score(estimator, x_train, y_train, cv=kfold, scoring='accuracy')

# Print the cross-validation results
print(f"Accuracy: {results.mean()} ({results.std()})")


In [None]:
# Fit the training data into the model
model.fit(x_train,y_train,epochs=10,batch_size=60,verbose=1)

In [None]:
# Evaluate the model on testing data
model.evaluate(x_test,y_test)

In [42]:
def fraud_detector(input):
  return model.predict(input)

In [None]:
# Test the fraud_detector function
input= np.random.rand(1, 29)
fraud_detector(input)

In [None]:
model_filename = 'my_model.pkl'

# Save the model to a file using pickle
with open(model_filename, 'wb') as file:
    pickle.dump(model, file)

print(f'Model saved to {model_filename}')
