In [58]:
import pandas as pd
from sklearn.impute import KNNImputer
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import GridSearchCV
from imblearn.under_sampling import RandomUnderSampler
from imblearn.over_sampling import RandomOverSampler
from sklearn.metrics import classification_report

In [59]:
# Load the Credit Card Fraud Detection dataset
df = pd.read_csv('/content/creditcard.csv')

In [60]:
df['Class'].value_counts()

Class
0.0    270805
1.0       481
Name: count, dtype: int64

In [61]:
df = df.dropna()

In [62]:
# Split the dataset into features and target
X = df.drop('Class', axis=1)
y = df['Class']

In [63]:
# train test split
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

In [64]:
# Handling Imbalanced Data
# Undersample the majority class
rus = RandomUnderSampler(random_state=42)
X_resampled, y_resampled = rus.fit_resample(X_train, y_train)

In [65]:
# Oversample the minority class
ros = RandomOverSampler(random_state=42)
X_resampled, y_resampled = ros.fit_resample(X_train, y_train)

In [66]:
# Handling Missing Values
# Impute missing values using KNN imputation
imputer = KNNImputer()
X_imputed = imputer.fit_transform(X_resampled)

In [67]:
# Pruning Overfit Trees
# Train the decision tree classifier with parameter tuning
param_grid = {'ccp_alpha': [0.001, 0.002, 0.003]} # Adjust the range of alpha values
clf = DecisionTreeClassifier()
grid_search = GridSearchCV(clf, param_grid, cv=5)
grid_search.fit(X_imputed, y_resampled)

In [68]:
# Get the best pruned decision tree classifier
best_clf = grid_search.best_estimator_

In [69]:
# Evaluate the pruned decision tree classifier
X_test_imputed = imputer.transform(X_test) # Impute missing values in the test dataset
y_pred = best_clf.predict(X_test_imputed)
print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

         0.0       1.00      0.99      1.00     81239
         1.0       0.14      0.83      0.24       147

    accuracy                           0.99     81386
   macro avg       0.57      0.91      0.62     81386
weighted avg       1.00      0.99      0.99     81386

