In [1]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import BaggingClassifier, StackingClassifier, AdaBoostClassifier
from sklearn.metrics import accuracy_score

In [2]:
data = pd.read_csv('hypothetical_credit_card_fraud_dataset.csv')

In [3]:
data.head()

Unnamed: 0,Time,Amount,V1,V2,V3,V4,V5,V6,V7,V8,...,V20,V21,V22,V23,V24,V25,V26,V27,V28,Class
0,15796,2673.238169,-1.154637,-0.821287,-0.93266,-0.904871,0.369161,0.181057,-1.551648,0.856511,...,1.007726,0.523766,-0.215739,0.422013,-0.784941,0.196971,0.640914,0.848113,-1.278296,0
1,861,3375.974299,-0.880606,1.910436,0.590781,-0.42888,-1.023945,-0.543439,0.770353,-0.987865,...,-0.390747,0.399696,0.45338,0.516466,-1.930938,0.789793,-2.435428,0.153,1.191734,0
2,76821,2087.823971,0.764188,-0.989004,-0.693922,-0.863511,0.746436,1.217293,2.474264,1.038524,...,-1.22136,1.008072,-1.567991,-0.60528,0.589278,0.343764,0.016343,1.014318,0.419039,0
3,54887,3653.664379,1.972525,-1.008268,-0.394,0.956394,0.247423,0.633243,0.682537,-2.596158,...,-0.582964,0.581572,-0.459876,0.254293,0.646933,-1.65792,0.0768,0.29033,-0.629044,0
4,6266,2609.789614,1.382433,-0.70222,0.022491,0.287854,-0.297841,-0.10097,0.184764,0.416518,...,0.797399,-0.678255,1.765494,-0.13233,-1.903325,-0.212728,-0.157566,1.187292,0.717499,0


In [4]:
# Separate features and target variable
X = data.drop(columns = ['Class'])
y = data['Class']

# Splitting for Training and Testing

In [6]:
# Split the dataset into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Bagging

In [7]:
# Apply Bagging with Decision Trees as the base model
base_model = DecisionTreeClassifier(random_state=42)
bagging_model = BaggingClassifier(base_model, n_estimators=10, random_state=42)
bagging_model.fit(X_train, y_train)

# Stacking

In [9]:
# Apply Stacking with Decision Trees as base models and Logistic Regression as the meta-model
base_models = [
    ('dt1', DecisionTreeClassifier(random_state=42)),
    ('dt2', DecisionTreeClassifier(random_state=42)),
    ('dt3', DecisionTreeClassifier(random_state=42)),
]
meta_model = DecisionTreeClassifier(random_state=42)
stacking_model = StackingClassifier(estimators=base_models, final_estimator=meta_model)
stacking_model.fit(X_train, y_train)

# Boosting

In [10]:
# Apply AdaBoost with Decision Trees as the base model
base_model_boost = DecisionTreeClassifier(random_state=42)
adaboost_model = AdaBoostClassifier(base_model_boost, n_estimators=50, random_state=42)
adaboost_model.fit(X_train, y_train)

# Making Predictions with Bagging , Stacking And Boosting 

In [11]:
# Make predictions on the test set
bagging_preds = bagging_model.predict(X_test)
stacking_preds = stacking_model.predict(X_test)
adaboost_preds = adaboost_model.predict(X_test)

# Calcultaing Their Accuracies

In [12]:
# Calculate accuracy for each model
bagging_accuracy = accuracy_score(y_test, bagging_preds)
stacking_accuracy = accuracy_score(y_test, stacking_preds)
adaboost_accuracy = accuracy_score(y_test, adaboost_preds)

# Comparing Accuracies

In [13]:
print("Bagging Accuracy:", bagging_accuracy)
print("Stacking Accuracy:", stacking_accuracy)
print("AdaBoost Accuracy:", adaboost_accuracy)

Bagging Accuracy: 0.989
Stacking Accuracy: 0.989
AdaBoost Accuracy: 0.976
