In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.metrics import classification_report, confusion_matrix, roc_auc_score

In [2]:
# Load the dataset
df = pd.read_csv(r'C:\Users\abi3c\Desktop\creditcard.csv')

In [3]:
# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(df.drop('Class', axis=1), df['Class'], test_size=0.2, random_state=42)

In [4]:
# Train a Gradient Boosting Machine (GBM) model
gbm = GradientBoostingClassifier()
gbm.fit(X_train, y_train)

GradientBoostingClassifier(ccp_alpha=0.0, criterion='friedman_mse', init=None,
                           learning_rate=0.1, loss='deviance', max_depth=3,
                           max_features=None, max_leaf_nodes=None,
                           min_impurity_decrease=0.0, min_impurity_split=None,
                           min_samples_leaf=1, min_samples_split=2,
                           min_weight_fraction_leaf=0.0, n_estimators=100,
                           n_iter_no_change=None, presort='deprecated',
                           random_state=None, subsample=1.0, tol=0.0001,
                           validation_fraction=0.1, verbose=0,
                           warm_start=False)

In [5]:
# Make predictions on the test set
y_pred = gbm.predict(X_test)

In [6]:
# Evaluate the performance of the model
print('Confusion Matrix:\n', confusion_matrix(y_test, y_pred))
print('Classification Report:\n', classification_report(y_test, y_pred))
print('ROC AUC Score:', roc_auc_score(y_test, y_pred))

Confusion Matrix:
 [[56843    21]
 [   39    59]]
Classification Report:
               precision    recall  f1-score   support

           0       1.00      1.00      1.00     56864
           1       0.74      0.60      0.66        98

    accuracy                           1.00     56962
   macro avg       0.87      0.80      0.83     56962
weighted avg       1.00      1.00      1.00     56962

ROC AUC Score: 0.8008357570659101


## let's try in balanced datasets

In [7]:
non_fraud = df[df['Class']==0]
fraud = df[df['Class']==1]

In [8]:
legit = non_fraud.sample(n=508)

In [9]:
ndf = pd.concat([legit,fraud], axis = 0)

In [10]:
X = ndf.drop(columns='Class', axis=1)
Y = ndf['Class']

In [11]:
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, stratify=Y, random_state=2)

In [12]:
# Train a Gradient Boosting Machine (GBM) model
gbm = GradientBoostingClassifier()
gbm.fit(X_train, Y_train)

GradientBoostingClassifier(ccp_alpha=0.0, criterion='friedman_mse', init=None,
                           learning_rate=0.1, loss='deviance', max_depth=3,
                           max_features=None, max_leaf_nodes=None,
                           min_impurity_decrease=0.0, min_impurity_split=None,
                           min_samples_leaf=1, min_samples_split=2,
                           min_weight_fraction_leaf=0.0, n_estimators=100,
                           n_iter_no_change=None, presort='deprecated',
                           random_state=None, subsample=1.0, tol=0.0001,
                           validation_fraction=0.1, verbose=0,
                           warm_start=False)

In [13]:
# Make predictions on the test set
y_pred = gbm.predict(X_test)

In [18]:
# Evaluate the performance of the model
print('Confusion Matrix:\n', confusion_matrix(Y_test, y_pred))
print('Classification Report:\n', classification_report(Y_test, y_pred))
print('ROC AUC Score:', roc_auc_score(Y_test, y_pred))

Confusion Matrix:
 [[99  3]
 [13 85]]
Classification Report:
               precision    recall  f1-score   support

           0       0.88      0.97      0.93       102
           1       0.97      0.87      0.91        98

    accuracy                           0.92       200
   macro avg       0.92      0.92      0.92       200
weighted avg       0.92      0.92      0.92       200

ROC AUC Score: 0.9189675870348138
