In [19]:
# Import necessary libraries
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from imblearn.over_sampling import SMOTE
from sklearn.metrics import classification_report

In [20]:
# Load the dataset (replace 'file_path' with your dataset path)
file_path = '/content/creditcard.csv'
data = pd.read_csv(file_path)

In [21]:
# Separate features and target variable
X = data.drop('Class', axis=1)
y = data['Class']

In [22]:
# Normalize the data using StandardScaler
scaler = StandardScaler()
X_normalized = scaler.fit_transform(X)

In [23]:
# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X_normalized, y, test_size=0.2, random_state=42)

In [24]:
import numpy as np

In [25]:
# Find indices with NaN in y_train
nan_indices = np.isnan(y_train)
if np.any(nan_indices):
    # Remove rows with NaN in X_train and y_train
    X_train = X_train[~nan_indices]
    y_train = y_train[~nan_indices]

In [26]:
# Handle class imbalance using SMOTE after NaN handling
smote = SMOTE(random_state=42)
X_resampled, y_resampled = smote.fit_resample(X_train, y_train)

In [27]:
# Train a Random Forest Classifier
rf_classifier = RandomForestClassifier(n_estimators=100, random_state=42)
rf_classifier.fit(X_resampled, y_resampled)

In [28]:
# Predict on the test set
y_pred = rf_classifier.predict(X_test)

In [29]:
# Evaluate model performance
print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

           0       1.00      1.00      1.00     56864
           1       0.89      0.84      0.86        98

    accuracy                           1.00     56962
   macro avg       0.95      0.92      0.93     56962
weighted avg       1.00      1.00      1.00     56962

