# Preprocessing


## Hapus outlier metode ABOD

In [4]:
import pandas as pd
from pycaret.anomaly import *
from tabulate import tabulate

# Hapus kolom target (class) kalau ada
if "class" in df.columns:
    target = df["class"]   # simpan target dulu biar tidak hilang
    df = df.drop("class", axis=1)
else:
    target = None

# === 2. Setup PyCaret ===
s = setup(data=df, session_id=123, verbose=False)

# === 3. Pilih model (misal LOF, bisa diganti 'abod' atau 'knn') ===
model = create_model("abod", fraction=0.05)

# === 4. Assign hasil deteksi ===
results = assign_model(model)

# === 5. Hapus data outlier (Anomaly = 1) ===
df_clean = results[results["Anomaly"] == 0].drop(columns=["Anomaly", "Anomaly_Score"])

# Gabungkan lagi dengan class kalau ada
if target is not None:
    df_clean["class"] = target.loc[df_clean.index]

# === 6. Tampilkan hasil bersih ===
print(tabulate(df_clean.head(10), headers="keys", tablefmt="grid"))
print(f"\nJumlah data asli: {len(results)}")
print(f"Jumlah data setelah buang outlier: {len(df_clean)}")


+----+------+-----------------+----------------+-----------------+----------------+-------------+
|    |   Id |   SepalLengthCm |   SepalWidthCm |   PetalLengthCm |   PetalWidthCm | Species     |
|  0 |    1 |             5.1 |            3.5 |             1.4 |            0.2 | Iris-setosa |
+----+------+-----------------+----------------+-----------------+----------------+-------------+
|  1 |    2 |             4.9 |            3   |             1.4 |            0.2 | Iris-setosa |
+----+------+-----------------+----------------+-----------------+----------------+-------------+
|  2 |    3 |             4.7 |            3.2 |             1.3 |            0.2 | Iris-setosa |
+----+------+-----------------+----------------+-----------------+----------------+-------------+
|  3 |    4 |             4.6 |            3.1 |             1.5 |            0.2 | Iris-setosa |
+----+------+-----------------+----------------+-----------------+----------------+-------------+
|  4 |    5 |       

## Hapus outlier metode knn

In [3]:
import pandas as pd
from pycaret.anomaly import *
from tabulate import tabulate

# Hapus kolom target (class) kalau ada
if "class" in df.columns:
    target = df["class"]   # simpan target dulu biar tidak hilang
    df = df.drop("class", axis=1)
else:
    target = None

# === 2. Setup PyCaret ===
s = setup(data=df, session_id=123, verbose=False)

# === 3. Pilih model (misal LOF, bisa diganti 'abod' atau 'knn') ===
model = create_model("knn", fraction=0.05)

# === 4. Assign hasil deteksi ===
results = assign_model(model)

# === 5. Hapus data outlier (Anomaly = 1) ===
df_clean = results[results["Anomaly"] == 0].drop(columns=["Anomaly", "Anomaly_Score"])

# Gabungkan lagi dengan class kalau ada
if target is not None:
    df_clean["class"] = target.loc[df_clean.index]

# === 6. Tampilkan hasil bersih ===
print(tabulate(df_clean.head(10), headers="keys", tablefmt="grid"))
print(f"\nJumlah data asli: {len(results)}")
print(f"Jumlah data setelah buang outlier: {len(df_clean)}")


+----+------+-----------------+----------------+-----------------+----------------+-------------+
|    |   Id |   SepalLengthCm |   SepalWidthCm |   PetalLengthCm |   PetalWidthCm | Species     |
|  2 |    3 |             4.7 |            3.2 |             1.3 |            0.2 | Iris-setosa |
+----+------+-----------------+----------------+-----------------+----------------+-------------+
|  3 |    4 |             4.6 |            3.1 |             1.5 |            0.2 | Iris-setosa |
+----+------+-----------------+----------------+-----------------+----------------+-------------+
|  4 |    5 |             5   |            3.6 |             1.4 |            0.2 | Iris-setosa |
+----+------+-----------------+----------------+-----------------+----------------+-------------+
|  5 |    6 |             5.4 |            3.9 |             1.7 |            0.4 | Iris-setosa |
+----+------+-----------------+----------------+-----------------+----------------+-------------+
|  6 |    7 |       

## Hapus outlier metode lof

In [2]:
import pandas as pd
from pycaret.anomaly import *
from tabulate import tabulate

# Hapus kolom target (class) kalau ada
if "class" in df.columns:
    target = df["class"]   # simpan target dulu biar tidak hilang
    df = df.drop("class", axis=1)
else:
    target = None

# === 2. Setup PyCaret ===
s = setup(data=df, session_id=123, verbose=False)

# === 3. Pilih model (misal LOF, bisa diganti 'abod' atau 'knn') ===
model = create_model("lof", fraction=0.05)

# === 4. Assign hasil deteksi ===
results = assign_model(model)

# === 5. Hapus data outlier (Anomaly = 1) ===
df_clean = results[results["Anomaly"] == 0].drop(columns=["Anomaly", "Anomaly_Score"])

# Gabungkan lagi dengan class kalau ada
if target is not None:
    df_clean["class"] = target.loc[df_clean.index]

# === 6. Tampilkan hasil bersih ===
print(tabulate(df_clean.head(10), headers="keys", tablefmt="grid"))
print(f"\nJumlah data asli: {len(results)}")
print(f"Jumlah data setelah buang outlier: {len(df_clean)}")


+----+------+-----------------+----------------+-----------------+----------------+-------------+
|    |   Id |   SepalLengthCm |   SepalWidthCm |   PetalLengthCm |   PetalWidthCm | Species     |
|  4 |    5 |             5   |            3.6 |             1.4 |            0.2 | Iris-setosa |
+----+------+-----------------+----------------+-----------------+----------------+-------------+
|  5 |    6 |             5.4 |            3.9 |             1.7 |            0.4 | Iris-setosa |
+----+------+-----------------+----------------+-----------------+----------------+-------------+
|  6 |    7 |             4.6 |            3.4 |             1.4 |            0.3 | Iris-setosa |
+----+------+-----------------+----------------+-----------------+----------------+-------------+
|  7 |    8 |             5   |            3.4 |             1.5 |            0.2 | Iris-setosa |
+----+------+-----------------+----------------+-----------------+----------------+-------------+
|  8 |    9 |       