In [9]:
!pip install imbalanced-learn --quiet


[notice] A new release of pip is available: 25.0.1 -> 25.1
[notice] To update, run: python.exe -m pip install --upgrade pip


In [12]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.metrics import classification_report
from sklearn.neural_network import MLPClassifier
from imblearn.over_sampling import SMOTE
from imblearn.under_sampling import RandomUnderSampler
from imblearn.pipeline import Pipeline as ImbPipeline
import numpy as np

df = pd.read_csv("credit_card_fraud_dataset.csv") 
df = df.drop(columns=['TransactionID', 'TransactionDate'])

X = df.drop('IsFraud', axis=1)
y = df['IsFraud']

categorical_features = ['TransactionType', 'Location']
numeric_features = ['Amount', 'MerchantID']

preprocessor = ColumnTransformer(
    transformers=[
        ('num', StandardScaler(), numeric_features),
        ('cat', OneHotEncoder(handle_unknown='ignore'), categorical_features)
    ]
)

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, stratify=y, random_state=42
)

baseline_pipeline = Pipeline(steps=[
    ('preprocessor', preprocessor),
    ('classifier', MLPClassifier(hidden_layer_sizes=(64, 32), max_iter=100, random_state=42))
])

baseline_pipeline.fit(X_train, y_train)
y_pred_baseline = baseline_pipeline.predict(X_test)
report_baseline = classification_report(y_test, y_pred_baseline, output_dict=True)

balanced_pipeline = ImbPipeline(steps=[
    ('preprocessor', preprocessor),
    ('sampler', SMOTE(random_state=42)),
    ('classifier', MLPClassifier(hidden_layer_sizes=(64, 32), max_iter=100, random_state=42))
])

balanced_pipeline.fit(X_train, y_train)
y_pred_balanced = balanced_pipeline.predict(X_test)
report_balanced = classification_report(y_test, y_pred_balanced, output_dict=True)

def extract_metrics(report):
    return {
        "Precision": round(report["1"]["precision"], 3),
        "Recall": round(report["1"]["recall"], 3),
        "F1-Score": round(report["1"]["f1-score"], 3),
    }

comparison_df = pd.DataFrame({
    "Before Balancing": extract_metrics(report_baseline),
    "After Balancing (SMOTE)": extract_metrics(report_balanced)
})

metrics = ["precision", "recall", "f1-score"]
before = [report_baseline["1"][m] for m in metrics]
after = [report_balanced["1"][m] for m in metrics]

comparison_df = pd.DataFrame({
    "Metric": ["Precision", "Recall", "F1-Score"],
    "Before Balancing": np.round(before, 3),
    "After Balancing (SMOTE)": np.round(after, 3)
})

f1_before = report_baseline["1"]["f1-score"]
f1_after = report_balanced["1"]["f1-score"]
improvement = ((f1_after - f1_before) / f1_before) * 100 if f1_before > 0 else 0
note = f"🔹 F1-Score improved by {improvement:.2f}% after applying SMOTE."

print(" Comparison of Precision, Recall, and F1-Score (Class: Fraud)")
display(comparison_df)
print("\n" + note)


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


 Comparison of Precision, Recall, and F1-Score (Class: Fraud)




Unnamed: 0,Metric,Before Balancing,After Balancing (SMOTE)
0,Precision,0.0,0.011
1,Recall,0.0,0.28
2,F1-Score,0.0,0.021



🔹 F1-Score improved by 0.00% after applying SMOTE.
