In [1]:
!nvidia-smi
!python -V

# Stable pins for Kaggle (Python 3.10) that work with PyCaret 3.3.x
!pip -q install -U pip setuptools wheel
!pip -q install "pycaret==3.3.2" "gradio>=4,<5"

# sanity import
import sys, pycaret
print("Python:", sys.version.split()[0], "| PyCaret:", pycaret.__version__)

Thu Oct 23 19:39:31 2025       
+-----------------------------------------------------------------------------------------+
| NVIDIA-SMI 560.35.03              Driver Version: 560.35.03      CUDA Version: 12.6     |
|-----------------------------------------+------------------------+----------------------+
| GPU  Name                 Persistence-M | Bus-Id          Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |           Memory-Usage | GPU-Util  Compute M. |
|                                         |                        |               MIG M. |
|   0  Tesla T4                       Off |   00000000:00:04.0 Off |                    0 |
| N/A   42C    P8              9W /   70W |       1MiB /  15360MiB |      0%      Default |
|                                         |                        |                  N/A |
+-----------------------------------------+------------------------+----------------------+
|   1  Tesla T4                       Off |   00

In [2]:

import pandas as pd
from sklearn.datasets import make_classification

# 3 000 samples, 20 features, only 1 % anomalies (class = 1)
X, y = make_classification(
    n_samples=3000,
    n_features=20,
    n_informative=5,
    n_redundant=2,
    n_clusters_per_class=1,
    weights=[0.99],
    random_state=42
)
df = pd.DataFrame(X, columns=[f"f{i}" for i in range(1, 21)])
df["label"] = y
print(df["label"].value_counts())
df.head()


label
0    2961
1      39
Name: count, dtype: int64


Unnamed: 0,f1,f2,f3,f4,f5,f6,f7,f8,f9,f10,...,f12,f13,f14,f15,f16,f17,f18,f19,f20,label
0,1.875347,0.586757,1.968232,-1.626624,0.58558,0.268618,2.291169,0.958054,-0.102534,1.144365,...,3.849926,0.839006,-1.401662,1.928679,-1.381849,0.31271,0.445882,0.114153,2.095883,0
1,2.061095,-0.433544,0.842837,0.1359,0.460137,0.62193,0.460245,-0.162247,1.081299,0.722114,...,2.639795,0.963455,-0.863897,-0.236649,0.308878,-1.847045,0.967645,0.098742,1.162062,0
2,3.45595,-1.27047,2.382859,-0.001503,0.251193,1.440519,0.619774,1.803122,0.019901,-0.031178,...,4.703807,-1.356619,-0.564021,-0.034518,0.204365,0.965029,1.481747,1.082514,2.677745,0
3,0.543679,0.50676,0.087673,-0.215006,-0.694896,0.280058,0.250717,-1.726858,0.087835,-1.008512,...,1.8086,-0.641737,-1.271171,-0.901821,0.707843,-0.342798,0.085805,-0.529548,1.006992,0
4,3.574631,1.634861,-0.120433,0.054048,1.03088,0.116625,1.629869,0.560022,3.640293,0.990933,...,2.19528,-0.167304,-1.771841,-0.69821,-0.674027,0.944927,-0.120068,-0.496621,-0.911557,0


In [3]:
from pycaret.anomaly import *

# only use features (unsupervised)
s = setup(
    data=df.drop(columns=["label"]),
    session_id=42,
    normalize=True,
    verbose=False
)

# create & fit Isolation Forest
iforest = create_model("iforest")
iforest


IForest(behaviour='new', bootstrap=False, contamination=0.05,
    max_features=1.0, max_samples='auto', n_estimators=100, n_jobs=-1,
    random_state=42, verbose=0)

In [4]:
labeled = assign_model(iforest)
labeled.head()

# quick anomaly ratio
print("Detected anomalies:", labeled["Anomaly"].value_counts(normalize=True).round(3))


Detected anomalies: Anomaly
0    0.95
1    0.05
Name: proportion, dtype: float64


In [6]:
# Keep only t-SNE to avoid UMAP/sklearn mismatch
plot_model(iforest, plot="tsne")
# plot_model(iforest, plot="umap")  


In [7]:
# merge true labels back for evaluation
merged = pd.concat([labeled, df["label"]], axis=1)

from sklearn.metrics import confusion_matrix, classification_report
import numpy as np

cm = confusion_matrix(merged["label"], merged["Anomaly"])
print("Confusion matrix (rows=true, cols=pred):\n", cm)
print(classification_report(merged["label"], merged["Anomaly"], digits=3))


Confusion matrix (rows=true, cols=pred):
 [[2815  146]
 [  35    4]]
              precision    recall  f1-score   support

           0      0.988     0.951     0.969      2961
           1      0.027     0.103     0.042        39

    accuracy                          0.940      3000
   macro avg      0.507     0.527     0.506      3000
weighted avg      0.975     0.940     0.957      3000



In [8]:
import os
os.makedirs("/kaggle/working/media/figures", exist_ok=True)
os.makedirs("/kaggle/working/notebooks", exist_ok=True)

labeled.to_csv("/kaggle/working/media/figures/anomaly_iforest_labels.csv", index=False)
save_model(iforest, "/kaggle/working/notebooks/anomaly_iforest_final")

print("Saved:")
print("- /kaggle/working/media/figures/anomaly_iforest_labels.csv")
print("- /kaggle/working/notebooks/anomaly_iforest_final.pkl")


Transformation Pipeline and Model Successfully Saved
Saved:
- /kaggle/working/media/figures/anomaly_iforest_labels.csv
- /kaggle/working/notebooks/anomaly_iforest_final.pkl
