In [None]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split, GridSearchCV, KFold
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from imblearn.under_sampling import NearMiss
from imblearn.over_sampling import SMOTE, RandomOverSampler
from imblearn.combine import SMOTETomek
from imblearn.ensemble import EasyEnsembleClassifier

In [None]:
data = pd.read_csv('/content/Credit Card Fraud Detection (1).csv')

In [None]:
data.shape

(984, 5)

In [None]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 984 entries, 0 to 983
Data columns (total 5 columns):
 #   Column             Non-Null Count  Dtype 
---  ------             --------------  ----- 
 0   credit_card        984 non-null    int64 
 1   city               984 non-null    object
 2   state              984 non-null    object
 3   zipcode            984 non-null    int64 
 4   credit_card_limit  984 non-null    int64 
dtypes: int64(3), object(2)
memory usage: 38.6+ KB


In [None]:
categorical_columns = ['city', 'state']
encoder = OneHotEncoder(sparse_output=False, drop='first')
encoded_cat = encoder.fit_transform(data[categorical_columns])


In [None]:
numerical_columns = ['credit_card_limit', 'zipcode']
scaler = StandardScaler()
scaled_num = scaler.fit_transform(data[numerical_columns])

In [None]:
X = np.concatenate([encoded_cat, scaled_num], axis=1)

In [None]:
np.random.seed(42)
y = np.random.randint(0, 2, size=X.shape[0])

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=0.7, random_state=42)

In [None]:
cv = KFold(n_splits=5, shuffle=True, random_state=42)

In [None]:
rfc = RandomForestClassifier(random_state=42)
rfc.fit(X_train, y_train)
y_pred = rfc.predict(X_test)
print("\nRandomForestClassifier Results:")
print(confusion_matrix(y_test, y_pred))
print(accuracy_score(y_test, y_pred))
print(classification_report(y_test, y_pred))


RandomForestClassifier Results:
[[72 79]
 [66 79]]
0.5101351351351351
              precision    recall  f1-score   support

           0       0.52      0.48      0.50       151
           1       0.50      0.54      0.52       145

    accuracy                           0.51       296
   macro avg       0.51      0.51      0.51       296
weighted avg       0.51      0.51      0.51       296



In [None]:
class_weight = {0: 1, 1: 100}
rfc_weighted = RandomForestClassifier(class_weight=class_weight, random_state=42)
rfc_weighted.fit(X_train, y_train)
y_pred_weighted = rfc_weighted.predict(X_test)
print("\nRandomForestClassifier with class_weight Results:")
print(confusion_matrix(y_test, y_pred_weighted))
print(accuracy_score(y_test, y_pred_weighted))
print(classification_report(y_test, y_pred_weighted))


RandomForestClassifier with class_weight Results:
[[ 37 114]
 [ 31 114]]
0.5101351351351351
              precision    recall  f1-score   support

           0       0.54      0.25      0.34       151
           1       0.50      0.79      0.61       145

    accuracy                           0.51       296
   macro avg       0.52      0.52      0.47       296
weighted avg       0.52      0.51      0.47       296



In [None]:
under_sampler = NearMiss()
X_resampled, y_resampled = under_sampler.fit_resample(X_train, y_train)
rfc.fit(X_resampled, y_resampled)
y_pred_under = rfc.predict(X_test)
print("\nUnder Sampling Results:")
print(confusion_matrix(y_test, y_pred_under))
print(accuracy_score(y_test, y_pred_under))
print(classification_report(y_test, y_pred_under))


Under Sampling Results:
[[69 82]
 [69 76]]
0.48986486486486486
              precision    recall  f1-score   support

           0       0.50      0.46      0.48       151
           1       0.48      0.52      0.50       145

    accuracy                           0.49       296
   macro avg       0.49      0.49      0.49       296
weighted avg       0.49      0.49      0.49       296



In [None]:
smote = SMOTE(random_state=42)
X_resampled_smote, y_resampled_smote = smote.fit_resample(X_train, y_train)
rfc.fit(X_resampled_smote, y_resampled_smote)
y_pred_smote = rfc.predict(X_test)
print("\nOver Sampling (SMOTE) Results:")
print(confusion_matrix(y_test, y_pred_smote))
print(accuracy_score(y_test, y_pred_smote))
print(classification_report(y_test, y_pred_smote))


Over Sampling (SMOTE) Results:
[[74 77]
 [66 79]]
0.5168918918918919
              precision    recall  f1-score   support

           0       0.53      0.49      0.51       151
           1       0.51      0.54      0.52       145

    accuracy                           0.52       296
   macro avg       0.52      0.52      0.52       296
weighted avg       0.52      0.52      0.52       296



In [None]:
smote_tomek = SMOTETomek(random_state=42)
X_resampled_st, y_resampled_st = smote_tomek.fit_resample(X_train, y_train)
rfc.fit(X_resampled_st, y_resampled_st)
y_pred_st = rfc.predict(X_test)
print("\nSMOTETomek Results:")
print(confusion_matrix(y_test, y_pred_st))
print(accuracy_score(y_test, y_pred_st))
print(classification_report(y_test, y_pred_st))


SMOTETomek Results:
[[74 77]
 [67 78]]
0.5135135135135135
              precision    recall  f1-score   support

           0       0.52      0.49      0.51       151
           1       0.50      0.54      0.52       145

    accuracy                           0.51       296
   macro avg       0.51      0.51      0.51       296
weighted avg       0.51      0.51      0.51       296



In [None]:
eec = EasyEnsembleClassifier(random_state=42)
eec.fit(X_train, y_train)
y_pred_eec = eec.predict(X_test)
print("\nEasy Ensemble Results:")
print(confusion_matrix(y_test, y_pred_eec))
print(accuracy_score(y_test, y_pred_eec))
print(classification_report(y_test, y_pred_eec))


Easy Ensemble Results:
[[79 72]
 [71 74]]
0.5168918918918919
              precision    recall  f1-score   support

           0       0.53      0.52      0.52       151
           1       0.51      0.51      0.51       145

    accuracy                           0.52       296
   macro avg       0.52      0.52      0.52       296
weighted avg       0.52      0.52      0.52       296

