# Isolation Forest

In [1]:
import numpy as np
import pandas as pd
from sklearn.ensemble import IsolationForest

In [2]:
# Data
X = np.random.rand(1000, 2)  # Data normal
outliers = np.random.rand(50, 2) + 3  # Data anomali
data = np.concatenate([X, outliers])

In [3]:
data = pd.DataFrame(data)
data.head()

Unnamed: 0,0,1
0,0.771339,0.666399
1,0.690479,0.126156
2,0.895373,0.169714
3,0.214728,0.425373
4,0.835286,0.654194


In [4]:
# Model Isolation Forest
model = IsolationForest(contamination=0.05, random_state=42)
preds = model.fit_predict(data)

In [5]:
# Filter data
normal_data = data[preds == 1]  # Data normal
anomaly_data = data[preds == -1]  # Data anomali

print("Jumlah data normal:", normal_data.shape[0])
print("Jumlah data anomali:", anomaly_data.shape[0])

Jumlah data normal: 997
Jumlah data anomali: 53


In [6]:
normal_data.head()

Unnamed: 0,0,1
0,0.771339,0.666399
1,0.690479,0.126156
2,0.895373,0.169714
3,0.214728,0.425373
4,0.835286,0.654194


In [7]:
anomaly_data.head()

Unnamed: 0,0,1
41,0.020493,0.991328
315,0.065713,0.002401
781,0.02412,0.004871
1000,3.255975,3.037876
1001,3.662896,3.641989


# Auto Encoder

In [8]:
import numpy as np
import tensorflow as tf
from sklearn.preprocessing import MinMaxScaler

In [9]:
# Data
X = np.random.rand(1000, 10)  # Data normal
outliers = np.random.rand(50, 10) + 3  # Data anomali
data = np.concatenate([X, outliers])

# Normalisasi
scaler = MinMaxScaler()
data = scaler.fit_transform(data)

In [10]:
data = pd.DataFrame(data)
data.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9
0,0.080915,0.11307,0.0,0.08483,0.006401,0.216193,0.190933,0.222907,0.246211,0.051854
1,0.06882,0.210612,0.200093,0.024267,0.23821,0.196327,0.000518,0.1931,0.034119,0.090122
2,0.232231,0.13042,0.057637,0.021454,0.184661,0.238155,0.154433,0.190983,0.214,0.126283
3,0.154353,0.249847,0.040954,0.157374,0.023243,0.057549,0.074845,0.174974,0.137132,0.181077
4,0.057609,0.222529,0.163692,0.119702,0.223365,0.033435,0.039452,0.043035,0.042572,0.161598


In [11]:
# Autoencoder
input_dim = data.shape[1]
autoencoder = tf.keras.Sequential([
    tf.keras.layers.InputLayer(input_dim),
    tf.keras.layers.Dense(16, activation='relu'),
    tf.keras.layers.Dense(8, activation='relu'),
    tf.keras.layers.Dense(16, activation='relu'),
    tf.keras.layers.Dense(input_dim, activation='sigmoid')
])

autoencoder.compile(optimizer='adam', loss='mse')
autoencoder.fit(data, data, epochs=50, batch_size=32, verbose=0)

<keras.callbacks.History at 0x24888d597e0>

In [12]:
# Rekonstruksi dan deteksi anomali
reconstructed = autoencoder.predict(data)
mse = np.mean(np.power(data - reconstructed, 2), axis=1)

# Threshold untuk anomali
threshold = np.percentile(mse, 95)  # Misalnya ambil 5% teratas sebagai anomali
anomalies = mse > threshold

# Filter data
normal_data = data[~anomalies]  # Data normal
anomaly_data = data[anomalies]  # Data anomali

print("Jumlah data normal:", normal_data.shape[0])
print("Jumlah data anomali:", anomaly_data.shape[0])

Jumlah data normal: 997
Jumlah data anomali: 53


In [13]:
normal_data.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9
0,0.080915,0.11307,0.0,0.08483,0.006401,0.216193,0.190933,0.222907,0.246211,0.051854
1,0.06882,0.210612,0.200093,0.024267,0.23821,0.196327,0.000518,0.1931,0.034119,0.090122
2,0.232231,0.13042,0.057637,0.021454,0.184661,0.238155,0.154433,0.190983,0.214,0.126283
3,0.154353,0.249847,0.040954,0.157374,0.023243,0.057549,0.074845,0.174974,0.137132,0.181077
4,0.057609,0.222529,0.163692,0.119702,0.223365,0.033435,0.039452,0.043035,0.042572,0.161598


In [14]:
anomaly_data.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9
10,0.036123,0.047968,0.229823,0.247714,0.199125,0.061212,0.022542,0.164448,0.237363,0.225685
57,0.008377,0.13428,0.145379,0.240689,0.034867,0.182693,0.048094,0.217906,0.239301,0.004601
100,0.081146,0.242462,0.231775,0.030984,0.028222,0.248189,0.040195,0.238681,0.173679,0.226518
155,0.243656,0.234846,0.240244,0.2132,0.154697,0.206933,0.245235,0.032564,0.120962,0.000226
168,0.00518,0.039441,0.247315,0.049824,0.074614,0.198693,0.192403,0.244957,0.09092,0.165792


# Local Outlier Factor (LOF)

In [15]:
import numpy as np
from sklearn.neighbors import LocalOutlierFactor

In [16]:
# Data
X = np.random.rand(1000, 2)  # Data normal
outliers = np.random.rand(50, 2) + 3  # Data anomali
data = np.concatenate([X, outliers])

In [17]:
data = pd.DataFrame(data)
data.head()

Unnamed: 0,0,1
0,0.289562,0.922925
1,0.682335,0.209691
2,0.329903,0.527057
3,0.506996,0.741328
4,0.729221,0.550058


In [18]:
# Model LOF
lof = LocalOutlierFactor(n_neighbors=20, contamination=0.05)
preds = lof.fit_predict(data)

# Filter data
normal_data = data[preds == 1]  # Data normal
anomaly_data = data[preds == -1]  # Data anomali

print("Jumlah data normal:", normal_data.shape[0])
print("Jumlah data anomali:", anomaly_data.shape[0])

Jumlah data normal: 997
Jumlah data anomali: 53


In [19]:
normal_data.head()

Unnamed: 0,0,1
0,0.289562,0.922925
1,0.682335,0.209691
2,0.329903,0.527057
3,0.506996,0.741328
4,0.729221,0.550058


In [20]:
anomaly_data.head()

Unnamed: 0,0,1
15,0.979599,0.72446
56,0.961855,0.019484
74,0.009329,0.95383
85,0.535301,0.992544
112,0.779087,0.962791
