In [1]:
import pandas as pd

In [2]:
df = pd.read_csv("credit_card_fraud_dataset.csv")
df.head()

Unnamed: 0,Time,V1,V2,V3,V4,V5,V6,V7,V8,V9,...,V21,V22,V23,V24,V25,V26,V27,V28,Amount,Class
0,0.0,-1.359807,-0.072781,2.536347,1.378155,-0.338321,0.462388,0.239599,0.098698,0.363787,...,-0.018307,0.277838,-0.110474,0.066928,0.128539,-0.189115,0.133558,-0.021053,149.62,0
1,0.0,1.191857,0.266151,0.16648,0.448154,0.060018,-0.082361,-0.078803,0.085102,-0.255425,...,-0.225775,-0.638672,0.101288,-0.339846,0.16717,0.125895,-0.008983,0.014724,2.69,0
2,1.0,-1.358354,-1.340163,1.773209,0.37978,-0.503198,1.800499,0.791461,0.247676,-1.514654,...,0.247998,0.771679,0.909412,-0.689281,-0.327642,-0.139097,-0.055353,-0.059752,378.66,0
3,1.0,-0.966272,-0.185226,1.792993,-0.863291,-0.010309,1.247203,0.237609,0.377436,-1.387024,...,-0.1083,0.005274,-0.190321,-1.175575,0.647376,-0.221929,0.062723,0.061458,123.5,0
4,2.0,-1.158233,0.877737,1.548718,0.403034,-0.407193,0.095921,0.592941,-0.270533,0.817739,...,-0.009431,0.798278,-0.137458,0.141267,-0.20601,0.502292,0.219422,0.215153,69.99,0


In [3]:
from sklearn.ensemble import IsolationForest

x = df.drop(columns=["Class"])
y = df["Class"]

x_train = x[y == 0]

model = IsolationForest(n_estimators=200, contamination=y.mean(), random_state=42)

model.fit(x_train)

scores = -model.score_samples(x)

In [4]:
import numpy as np

threshold = np.quantile(scores, 1 - y.mean())
y_pred = (scores >= threshold).astype(int)

In [5]:
from sklearn.metrics import average_precision_score, roc_auc_score

roc_auc = roc_auc_score(y, scores)
pr_auc = average_precision_score(y, scores)
roc_auc, pr_auc

(0.9488083325076431, 0.11440378774509713)

In [6]:
import plotly.io as pio

pio.templates.default = "plotly_dark"

In [7]:
import plotly.graph_objects as go
from sklearn.metrics import precision_recall_curve, roc_curve

fpr, tpr, _ = roc_curve(y, scores)
precision, recall, _ = precision_recall_curve(y, scores)

fig = go.Figure()

fig.add_trace(go.Scatter(x=fpr, y=tpr, mode="lines", name=f"ROC (AUC={roc_auc:.3f})"))

fig.add_trace(
    go.Scatter(x=[0, 1], y=[0, 1], mode="lines", line=dict(dash="dash"), name="Random")
)

fig.update_layout(
    title="ROC Curve",
    xaxis_title="False Positive Rate",
    yaxis_title="True Positive Rate",
)

fig.show()

In [8]:
fig = go.Figure()

fig.add_trace(
    go.Scatter(x=recall, y=precision, mode="lines", name=f"PR (AUC={pr_auc:.3f})")
)

fig.add_trace(
    go.Scatter(
        x=[0, 1],
        y=[y.mean(), y.mean()],
        mode="lines",
        line=dict(dash="dash"),
        name="Baseline",
    )
)

fig.update_layout(
    title="Precision–Recall Curve", xaxis_title="Recall", yaxis_title="Precision"
)

fig.show()

In [9]:
fig = go.Figure()

fig.add_trace(go.Histogram(x=scores[y == 0], nbinsx=100, name="Normal"))

fig.update_layout(
    title="Isolation Forest scores — Normal",
    xaxis_title="Anomaly score",
    yaxis_title="Count",
)

fig.show()

In [10]:
fig = go.Figure()

fig.add_trace(go.Histogram(x=scores[y == 1], nbinsx=100, name="Fraud"))

fig.update_layout(
    title="Isolation Forest scores — Fraud",
    xaxis_title="Anomaly score",
    yaxis_title="Count",
)

fig.show()