Building a machine learning model to identify fraudulent credit card transactions.
Preprocess and normalize the transaction data, handle class
imbalance issues, and split the dataset into training and testing sets.
Training a classification algorithm, such as logistic regression or random forests, to classify transactions as fraudulent or genuine.
Evaluating the model's performance using metrics like precision, recall and F1-score, and consider techniques like oversampling or undersampling for improving results.

In [None]:
#Import libraries
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, roc_auc_score
from sklearn.utils import resample
from imblearn.over_sampling import RandomOverSampler
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import confusion_matrix


In [None]:
# Load the dataset
df = pd.read_csv('/content/drive/MyDrive/creditcard.csv')

In [None]:
# Preprocess and normalize the data
scaler = StandardScaler()
df[['Time', 'V1', 'V2', 'V3', 'V4', 'V5', 'V6', 'V7', 'V8', 'V9', 'V10', 'V11', 'V12', 'V13', 'V14', 'V15', 'V16', 'V17', 'V18', 'V19', 'V20', 'V21']] = scaler.fit_transform(df[['Time', 'V1', 'V2', 'V3', 'V4', 'V5', 'V6', 'V7', 'V8', 'V9', 'V10', 'V11', 'V12', 'V13', 'V14', 'V15', 'V16', 'V17', 'V18', 'V19', 'V20', 'V21']])

In [None]:
# Handle class imbalance issues
ros = RandomOverSampler(random_state=42)
X_rus, y_rus = ros.fit_resample(df.drop('Class', axis=1), df['Class'])

In [None]:
# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X_rus, y_rus, test_size=0.2, random_state=42)

In [None]:
# Train a classification algorithm
model_DT_final = DecisionTreeClassifier(max_depth=6, criterion='entropy')
model_DT_final.fit(X_train, y_train)

In [None]:
# Make predictions
y_pred_DT_final = model_DT_final.predict(X_test)
y_prob_DT_final = model_DT_final.predict_proba(X_test)

In [None]:
# Evaluate the model's performance
print('Evaluation Metrics Report'.center(65) + ('\n') + ('-' * 65))
print(classification_report(y_test, y_pred_DT_final, digits=4) + ('\n') + ('-' * 15))
print('AUC: {:.4f} \n'.format(roc_auc_score(y_test, y_pred_DT_final)) + ('-' * 65))

                    Evaluation Metrics Report                    
-----------------------------------------------------------------
              precision    recall  f1-score   support

           0     0.9637    0.9671    0.9654     56750
           1     0.9671    0.9637    0.9654     56976

    accuracy                         0.9654    113726
   macro avg     0.9654    0.9654    0.9654    113726
weighted avg     0.9654    0.9654    0.9654    113726

---------------
AUC: 0.9654 
-----------------------------------------------------------------
