In [3]:
import pandas as pd

# Load the dataset
file_path = "C:/Users/HP/OneDrive/Desktop/creditcard.csv"
df = pd.read_csv(file_path)

# Show the shape and first few rows of the dataset
df.shape, df.head()


((284807, 31),
    Time        V1        V2        V3        V4        V5        V6        V7  \
 0   0.0 -1.359807 -0.072781  2.536347  1.378155 -0.338321  0.462388  0.239599   
 1   0.0  1.191857  0.266151  0.166480  0.448154  0.060018 -0.082361 -0.078803   
 2   1.0 -1.358354 -1.340163  1.773209  0.379780 -0.503198  1.800499  0.791461   
 3   1.0 -0.966272 -0.185226  1.792993 -0.863291 -0.010309  1.247203  0.237609   
 4   2.0 -1.158233  0.877737  1.548718  0.403034 -0.407193  0.095921  0.592941   
 
          V8        V9  ...       V21       V22       V23       V24       V25  \
 0  0.098698  0.363787  ... -0.018307  0.277838 -0.110474  0.066928  0.128539   
 1  0.085102 -0.255425  ... -0.225775 -0.638672  0.101288 -0.339846  0.167170   
 2  0.247676 -1.514654  ...  0.247998  0.771679  0.909412 -0.689281 -0.327642   
 3  0.377436 -1.387024  ... -0.108300  0.005274 -0.190321 -1.175575  0.647376   
 4 -0.270533  0.817739  ... -0.009431  0.798278 -0.137458  0.141267 -0.206010   
 
   

In [5]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report
from imblearn.over_sampling import SMOTE

# Step 1: Normalize 'Time' and 'Amount'
df_normalized = df.copy()
scaler = StandardScaler()
df_normalized[['Time', 'Amount']] = scaler.fit_transform(df[['Time', 'Amount']])

# Step 2: Define features and target
X = df_normalized.drop('Class', axis=1)
y = df_normalized['Class']

# Step 3: Split dataset
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

# Step 4: Address class imbalance with SMOTE
smote = SMOTE(random_state=42)
X_resampled, y_resampled = smote.fit_resample(X_train, y_train)

# Step 5: Train models
log_reg = LogisticRegression(max_iter=1000, random_state=42)
log_reg.fit(X_resampled, y_resampled)

rf = RandomForestClassifier(n_estimators=100, random_state=42)
rf.fit(X_resampled, y_resampled)

# Step 6: Evaluate models
log_reg_preds = log_reg.predict(X_test)
rf_preds = rf.predict(X_test)

log_reg_report = classification_report(y_test, log_reg_preds, output_dict=True)
rf_report = classification_report(y_test, rf_preds, output_dict=True)

log_reg_report, rf_report


({'0': {'precision': 0.99985563215072,
   'recall': 0.9743598761958356,
   'f1-score': 0.9869431233189049,
   'support': 56864.0},
  '1': {'precision': 0.05813953488372093,
   'recall': 0.9183673469387755,
   'f1-score': 0.10935601458080195,
   'support': 98.0},
  'accuracy': 0.9742635441171307,
  'macro avg': {'precision': 0.5289975835172205,
   'recall': 0.9463636115673055,
   'f1-score': 0.5481495689498534,
   'support': 56962.0},
  'weighted avg': {'precision': 0.9982354612028571,
   'recall': 0.9742635441171307,
   'f1-score': 0.9854332827821202,
   'support': 56962.0}},
 {'0': {'precision': 0.9997186318473578,
   'recall': 0.9997362127180641,
   'f1-score': 0.9997274222054181,
   'support': 56864.0},
  '1': {'precision': 0.845360824742268,
   'recall': 0.8367346938775511,
   'f1-score': 0.841025641025641,
   'support': 98.0},
  'accuracy': 0.9994557775359011,
  'macro avg': {'precision': 0.9225397282948129,
   'recall': 0.9182354532978076,
   'f1-score': 0.9203765316155296,
   's