In [2]:
import pandas as pd
import numpy as np
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier, AdaBoostClassifier, VotingClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score, confusion_matrix
from sklearn.preprocessing import MinMaxScaler
from imblearn.over_sampling import SMOTE

In [3]:
data = pd.read_csv('creditcard_2023.csv')
X = data.drop(['Class'], axis=1)
y = data['Class']

In [4]:
data.head()

Unnamed: 0,id,V1,V2,V3,V4,V5,V6,V7,V8,V9,...,V21,V22,V23,V24,V25,V26,V27,V28,Amount,Class
0,0,-0.260648,-0.469648,2.496266,-0.083724,0.129681,0.732898,0.519014,-0.130006,0.727159,...,-0.110552,0.217606,-0.134794,0.165959,0.12628,-0.434824,-0.08123,-0.151045,17982.1,0
1,1,0.9851,-0.356045,0.558056,-0.429654,0.27714,0.428605,0.406466,-0.133118,0.347452,...,-0.194936,-0.605761,0.079469,-0.577395,0.19009,0.296503,-0.248052,-0.064512,6531.37,0
2,2,-0.260272,-0.949385,1.728538,-0.457986,0.074062,1.419481,0.743511,-0.095576,-0.261297,...,-0.00502,0.702906,0.945045,-1.154666,-0.605564,-0.312895,-0.300258,-0.244718,2513.54,0
3,3,-0.152152,-0.508959,1.74684,-1.090178,0.249486,1.143312,0.518269,-0.06513,-0.205698,...,-0.146927,-0.038212,-0.214048,-1.893131,1.003963,-0.51595,-0.165316,0.048424,5384.44,0
4,4,-0.20682,-0.16528,1.527053,-0.448293,0.106125,0.530549,0.658849,-0.21266,1.049921,...,-0.106984,0.729727,-0.161666,0.312561,-0.414116,1.071126,0.023712,0.419117,14278.97,0


In [5]:
# Scale Features
scaler = MinMaxScaler()
X_scaled = scaler.fit_transform(X)

In [6]:
# Handle Class Imbalance with SMOTE
smote = SMOTE(random_state=42)
X_resampled, y_resampled = smote.fit_resample(X_scaled, y)

In [7]:
X_train, X_test, y_train, y_test = train_test_split(X_resampled, y_resampled, test_size=0.2, random_state=42)


In [8]:
logistic_clf = LogisticRegression(max_iter=1000, random_state=42)
random_forest_clf = RandomForestClassifier(n_estimators=100, random_state=42)
gradient_boost_clf = GradientBoostingClassifier(n_estimators=100, learning_rate=0.1, random_state=42)
adaboost_clf = AdaBoostClassifier(n_estimators=100, random_state=42)

In [9]:
# Ensemble Voting Classifier
voting_clf = VotingClassifier(estimators=[
    ('logistic', logistic_clf),
    ('random_forest', random_forest_clf),
    ('gradient_boost', gradient_boost_clf),
    ('adaboost', adaboost_clf)
], voting='soft')  # Use 'hard' for majority voting

In [10]:
# Fit Ensemble Model
voting_clf.fit(X_train, y_train)

In [11]:
# Predict
y_pred = voting_clf.predict(X_test)
y_pred_prob = voting_clf.predict_proba(X_test)[:, 1]


In [12]:
# Metrics
accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred)
recall = recall_score(y_test, y_pred)
f1 = f1_score(y_test, y_pred)
roc_auc = roc_auc_score(y_test, y_pred_prob)

In [13]:
# Print Results
print("ML Ensemble Learning Results:")
print(f"Accuracy: {accuracy:.4f}")
print(f"Precision: {precision:.4f}")
print(f"Recall: {recall:.4f}")
print(f"F1 Score: {f1:.4f}")
print(f"ROC-AUC: {roc_auc:.4f}")
print("\nConfusion Matrix:")
print(confusion_matrix(y_test, y_pred))

ML Ensemble Learning Results:
Accuracy: 0.9998
Precision: 0.9999
Recall: 0.9997
F1 Score: 0.9998
ROC-AUC: 1.0000

Confusion Matrix:
[[56747     3]
 [   19 56957]]


In [14]:
import pickle

# Save the model
with open('voting_classifier_model.pkl', 'wb') as f:
    pickle.dump(voting_clf, f)

# Optionally, save the scaler
with open('scaler.pkl', 'wb') as f:
    pickle.dump(scaler, f)


In [15]:
#!pip install tensorflow




In [16]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from imblearn.over_sampling import SMOTE
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.optimizers import SGD
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix, roc_auc_score

In [17]:
data = pd.read_csv('creditcard_2023.csv')

In [18]:
# Step 2: Data Preprocessing
# Separate features and target variable
X = data.drop(['Class'], axis=1)
y = data['Class']

In [19]:
# Scale the features using Min-Max Scaling
scaler = MinMaxScaler()
X_scaled = scaler.fit_transform(X)

In [20]:
# Step 3: Handle Class Imbalance Using SMOTE
smote = SMOTE(random_state=42)
X_resampled, y_resampled = smote.fit_resample(X_scaled, y)

In [21]:
X_train, X_test, y_train, y_test = train_test_split(X_resampled, y_resampled, test_size=0.2, random_state=42)


In [22]:
# Step 5: Build the Deep Learning Model
model = Sequential()

In [23]:
model.add(Dense(32, input_dim=X_train.shape[1], activation='relu'))


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [24]:
# Hidden Layers
model.add(Dense(64, activation='relu'))
model.add(Dropout(0.3))  # Dropout to prevent overfitting
model.add(Dense(128, activation='relu'))
model.add(Dropout(0.3))
model.add(Dense(64, activation='relu'))
model.add(Dense(32, activation='relu'))

In [25]:
# Output Layer
model.add(Dense(1, activation='sigmoid'))

In [26]:
# Compile the Model
optimizer = SGD(learning_rate=0.01)
model.compile(optimizer=optimizer, loss='binary_crossentropy', metrics=['accuracy'])

In [27]:
# Step 6: Train the Model
history = model.fit(X_train, y_train, epochs=10, batch_size=100, validation_split=0.2, verbose=1)

Epoch 1/10
[1m3640/3640[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 3ms/step - accuracy: 0.8082 - loss: 0.3860 - val_accuracy: 0.9957 - val_loss: 0.0144
Epoch 2/10
[1m3640/3640[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 2ms/step - accuracy: 0.9957 - loss: 0.0165 - val_accuracy: 0.9966 - val_loss: 0.0121
Epoch 3/10
[1m3640/3640[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 2ms/step - accuracy: 0.9967 - loss: 0.0126 - val_accuracy: 0.9960 - val_loss: 0.0140
Epoch 4/10
[1m3640/3640[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 2ms/step - accuracy: 0.9972 - loss: 0.0115 - val_accuracy: 0.9976 - val_loss: 0.0098
Epoch 5/10
[1m3640/3640[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 2ms/step - accuracy: 0.9974 - loss: 0.0104 - val_accuracy: 0.9966 - val_loss: 0.0123
Epoch 6/10
[1m3640/3640[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 2ms/step - accuracy: 0.9976 - loss: 0.0097 - val_accuracy: 0.9974 - val_loss: 0.0101
Epoch 7/10
[

In [28]:

# Step 7: Evaluate the Model
# Predict probabilities and binary labels
y_pred_prob = model.predict(X_test)
y_pred = (y_pred_prob > 0.5).astype(int)

[1m3554/3554[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 1ms/step


In [29]:
accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred)
recall = recall_score(y_test, y_pred)
f1 = f1_score(y_test, y_pred)
roc_auc = roc_auc_score(y_test, y_pred_prob)


In [30]:
print("Model Evaluation Metrics:")
print(f"Accuracy: {accuracy:.4f}")
print(f"Precision: {precision:.4f}")
print(f"Recall: {recall:.4f}")
print(f"F1 Score: {f1:.4f}")
print(f"ROC-AUC: {roc_auc:.4f}")

Model Evaluation Metrics:
Accuracy: 0.9984
Precision: 0.9996
Recall: 0.9973
F1 Score: 0.9984
ROC-AUC: 0.9997


In [31]:
# Confusion Matrix
conf_matrix = confusion_matrix(y_test, y_pred)
print("\nConfusion Matrix:")
print(conf_matrix)


Confusion Matrix:
[[56726    24]
 [  155 56821]]


In [32]:
model.save('fraud_detection_model.h5')



In [33]:
from sklearn.linear_model import LogisticRegression

# Train Ensemble Model
voting_clf.fit(X_train, y_train)
ml_probs = voting_clf.predict_proba(X_test)[:, 1]

# Train Deep Learning Model
history = model.fit(X_train, y_train, epochs=10, batch_size=100, verbose=0)
dl_probs = model.predict(X_test).flatten()

# Combine Predictions (Stacking)
combined_probs = np.vstack((ml_probs, dl_probs)).T

# Train Meta-Classifier
meta_clf = LogisticRegression(random_state=42)
meta_clf.fit(combined_probs, y_test)

# Predict with Meta-Classifier
meta_probs = meta_clf.predict_proba(combined_probs)[:, 1]
meta_pred = (meta_probs > 0.5).astype(int)

# Evaluate Combined Model
accuracy = accuracy_score(y_test, meta_pred)
precision = precision_score(y_test, meta_pred)
recall = recall_score(y_test, meta_pred)
f1 = f1_score(y_test, meta_pred)
roc_auc = roc_auc_score(y_test, meta_probs)

print("Combined Model Results:")
print(f"Accuracy: {accuracy:.4f}")
print(f"Precision: {precision:.4f}")
print(f"Recall: {recall:.4f}")
print(f"F1 Score: {f1:.4f}")
print(f"ROC-AUC: {roc_auc:.4f}")


[1m3554/3554[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 1ms/step
Combined Model Results:
Accuracy: 0.9996
Precision: 0.9998
Recall: 0.9994
F1 Score: 0.9996
ROC-AUC: 1.0000
