In [4]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, precision_score, recall_score, f1_score
from imblearn.over_sampling import SMOTE
from imblearn.under_sampling import RandomUnderSampler

In [5]:
# Load the dataset
data = pd.read_csv('E:\Internship\creditcard.csv')

In [6]:
# Print the column names to verify them
print(data.columns)

Index(['Time', 'V1', 'V2', 'V3', 'V4', 'V5', 'V6', 'V7', 'V8', 'V9', 'V10',
       'V11', 'V12', 'V13', 'V14', 'V15', 'V16', 'V17', 'V18', 'V19', 'V20',
       'V21', 'V22', 'V23', 'V24', 'V25', 'V26', 'V27', 'V28', 'Amount',
       'Class'],
      dtype='object')


In [7]:
# Check if 'Amount' and 'Time' columns exist and handle accordingly
if 'Amount' in data.columns:
    data['Amount'] = StandardScaler().fit_transform(data['Amount'].values.reshape(-1, 1))
else:
    raise KeyError("The 'Amount' column is not found in the dataset.")

if 'Time' in data.columns:
    data = data.drop(columns=['Time'])
else:
    print("The 'Time' column is not found in the dataset and will be skipped.")

In [8]:
# Separate features and target
if 'Class' in data.columns:
    X = data.drop(columns=['Class'])
    y = data['Class']
else:
    raise KeyError("The 'Class' column is not found in the dataset.")

In [9]:
# Handle class imbalance
try:
    smote = SMOTE(random_state=42)
    X_resampled, y_resampled = smote.fit_resample(X, y)
except Exception as e:
    print(f"SMOTE encountered an error: {e}")
    print("Using RandomUnderSampler as an alternative.")
    rus = RandomUnderSampler(random_state=42)
    X_resampled, y_resampled = rus.fit_resample(X, y)

SMOTE encountered an error: 'NoneType' object has no attribute 'split'
Using RandomUnderSampler as an alternative.


In [10]:
# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X_resampled, y_resampled, test_size=0.2, random_state=42)

In [11]:
# Train a Random Forest classifier
model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

In [12]:
# Make predictions
y_pred = model.predict(X_test)

In [13]:
# Evaluate the model
print(classification_report(y_test, y_pred))
precision = precision_score(y_test, y_pred)
recall = recall_score(y_test, y_pred)
f1 = f1_score(y_test, y_pred)

print(f'Precision: {precision}')
print(f'Recall: {recall}')
print(f'F1-Score: {f1}')

              precision    recall  f1-score   support

           0       0.90      0.97      0.93        99
           1       0.97      0.89      0.93        98

    accuracy                           0.93       197
   macro avg       0.93      0.93      0.93       197
weighted avg       0.93      0.93      0.93       197

Precision: 0.9666666666666667
Recall: 0.8877551020408163
F1-Score: 0.9255319148936171
