In [6]:
# ==== 1) Install ====
# ==== HARD BLOCK RAPIDS (fixes cudaErrorInsufficientDriver) ====
import os, sys, textwrap, pathlib, shutil

# 1) Ensure no old stubs
for pkg in ("cuml","cudf","cupy","rmm"):
    shutil.rmtree(pkg, ignore_errors=True)

# 2) Create stub packages that raise ImportError on import
for pkg in ("cuml","cudf","cupy","rmm"):
    pathlib.Path(pkg).mkdir(exist_ok=True)
    with open(f"{pkg}/__init__.py","w") as f:
        f.write('raise ImportError("Stub: {pkg} not available")\n')

# 3) Put current dir first on sys.path so stubs shadow any preinstalled RAPIDS
if "" not in sys.path:
    sys.path.insert(0, "")

print("RAPIDS stubs installed & shadowing real packages.")



# 2) Install PyCaret (CPU-safe) + deps
!pip -q install -U pip setuptools wheel
!pip -q install -U pycaret pandas scikit-learn



RAPIDS stubs installed & shadowing real packages.


In [7]:
# ==== 2) Load data (Iris) ====
import pandas as pd

PATH = "/kaggle/input/iris/Iris.csv"
df = pd.read_csv(PATH)
print(df.shape)
df.head()


(150, 6)


Unnamed: 0,Id,SepalLengthCm,SepalWidthCm,PetalLengthCm,PetalWidthCm,Species
0,1,5.1,3.5,1.4,0.2,Iris-setosa
1,2,4.9,3.0,1.4,0.2,Iris-setosa
2,3,4.7,3.2,1.3,0.2,Iris-setosa
3,4,4.6,3.1,1.5,0.2,Iris-setosa
4,5,5.0,3.6,1.4,0.2,Iris-setosa


In [8]:
# ==== 3) Prep for anomaly detection ====
# Use only numeric features; drop label column
X = df.drop(columns=["Species"], errors="ignore")
X = X.select_dtypes(include=["number"]).dropna().reset_index(drop=True)
print("Modeling shape:", X.shape)


Modeling shape: (150, 5)


In [10]:
# ==== 4) PyCaret Anomaly (Isolation Forest baseline) ====
from pycaret.anomaly import setup, create_model, assign_model, save_model, plot_model

ano = setup(
    data=X,
    session_id=42,
    normalize=True,
    use_gpu=False,
    verbose=True
)

iforest = create_model("iforest")   # Isolation Forest
labeled = assign_model(iforest)     # adds 'Anomaly' & 'Anomaly_Score'
print(labeled.head())


Unnamed: 0,Description,Value
0,Session id,42
1,Original data shape,"(150, 5)"
2,Transformed data shape,"(150, 5)"
3,Numeric features,5
4,Preprocess,True
5,Imputation type,simple
6,Numeric imputation,mean
7,Categorical imputation,mode
8,Normalize,True
9,Normalize method,zscore


   Id  SepalLengthCm  SepalWidthCm  PetalLengthCm  PetalWidthCm  Anomaly  \
0   1            5.1           3.5            1.4           0.2        0   
1   2            4.9           3.0            1.4           0.2        0   
2   3            4.7           3.2            1.3           0.2        0   
3   4            4.6           3.1            1.5           0.2        0   
4   5            5.0           3.6            1.4           0.2        0   

   Anomaly_Score  
0      -0.075681  
1      -0.081439  
2      -0.074861  
3      -0.064391  
4      -0.099108  


In [12]:
# ==== 5) Save outputs ====
labeled.to_csv("/kaggle/working/iris_anomaly_flags.csv", index=False)
save_model(iforest, "/kaggle/working/iris_iforest_model")

print("✅ Saved:")
!ls -lh /kaggle/working | sed -n '1,200p'


Transformation Pipeline and Model Successfully Saved
✅ Saved:
total 1.3M
drwxr-xr-x 2 root root 4.0K Nov  2 10:11 cudf
drwxr-xr-x 3 root root 4.0K Nov  2 10:12 cuml
drwxr-xr-x 3 root root 4.0K Nov  2 10:12 cupy
-rw-r--r-- 1 root root 6.3K Nov  2 10:13 iris_anomaly_flags.csv
-rw-r--r-- 1 root root 1.2M Nov  2 10:13 iris_iforest_model.pkl
-rw-r--r-- 1 root root  15K Nov  2 10:13 logs.log
drwxr-xr-x 2 root root 4.0K Nov  2 10:11 rmm


In [13]:
# ==== Additional Anomaly Models (KNN & LOF) ====
# KNN (distance-based anomaly detection)
knn = create_model("knn")
knn_labeled = assign_model(knn)
print("KNN model sample:")
print(knn_labeled.head())

# LOF (Local Outlier Factor)
lof = create_model("lof")
lof_labeled = assign_model(lof)
print("\nLOF model sample:")
print(lof_labeled.head())


KNN model sample:
   Id  SepalLengthCm  SepalWidthCm  PetalLengthCm  PetalWidthCm  Anomaly  \
0   1            5.1           3.5            1.4           0.2        0   
1   2            4.9           3.0            1.4           0.2        0   
2   3            4.7           3.2            1.3           0.2        0   
3   4            4.6           3.1            1.5           0.2        0   
4   5            5.0           3.6            1.4           0.2        0   

   Anomaly_Score  
0       0.634739  
1       0.578645  
2       0.548796  
3       0.538036  
4       0.557574  



LOF model sample:
   Id  SepalLengthCm  SepalWidthCm  PetalLengthCm  PetalWidthCm  Anomaly  \
0   1            5.1           3.5            1.4           0.2        0   
1   2            4.9           3.0            1.4           0.2        0   
2   3            4.7           3.2            1.3           0.2        0   
3   4            4.6           3.1            1.5           0.2        0   
4   5            5.0           3.6            1.4           0.2        0   

   Anomaly_Score  
0       0.989666  
1       1.084554  
2       1.018067  
3       1.050109  
4       1.005007  


In [14]:
save_model(knn, "/kaggle/working/iris_knn_model")
save_model(lof, "/kaggle/working/iris_lof_model")
knn_labeled.to_csv("/kaggle/working/iris_knn_anomaly.csv", index=False)
lof_labeled.to_csv("/kaggle/working/iris_lof_anomaly.csv", index=False)

print("✅ Saved KNN & LOF models and results in /kaggle/working/")
!ls -lh /kaggle/working


Transformation Pipeline and Model Successfully Saved
Transformation Pipeline and Model Successfully Saved
✅ Saved KNN & LOF models and results in /kaggle/working/
total 1.4M
drwxr-xr-x 2 root root 4.0K Nov  2 10:11 cudf
drwxr-xr-x 3 root root 4.0K Nov  2 10:12 cuml
drwxr-xr-x 3 root root 4.0K Nov  2 10:12 cupy
-rw-r--r-- 1 root root 6.3K Nov  2 10:13 iris_anomaly_flags.csv
-rw-r--r-- 1 root root 1.2M Nov  2 10:13 iris_iforest_model.pkl
-rw-r--r-- 1 root root 6.0K Nov  2 10:13 iris_knn_anomaly.csv
-rw-r--r-- 1 root root  21K Nov  2 10:13 iris_knn_model.pkl
-rw-r--r-- 1 root root 6.0K Nov  2 10:13 iris_lof_anomaly.csv
-rw-r--r-- 1 root root  47K Nov  2 10:13 iris_lof_model.pkl
-rw-r--r-- 1 root root  24K Nov  2 10:13 logs.log
drwxr-xr-x 2 root root 4.0K Nov  2 10:11 rmm
