In [None]:
import time
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.ensemble import IsolationForest
from sklearn.svm import OneClassSVM
from sklearn.neighbors import LocalOutlierFactor

df = pd.read_csv("rba-dataset.csv", nrows=1000000) 


In [6]:
print("▶️ Memulai Preprocessing Data...")
cleaning_times = []
encoding_times = []
normalization_times = []
splitting_times = []

for _ in range(5):  # Loop untuk mendapatkan rataan waktu
    # ✅ Cleaning: Menghapus nilai yang hilang
    start_cleaning = time.time()
    df_cleaned = df.dropna()
    end_cleaning = time.time()
    cleaning_times.append(end_cleaning - start_cleaning)

    # ✅ Encoding: Mengonversi data kategorikal menjadi numerik
    start_encoding = time.time()
    label_encoders = {}
    for col in df_cleaned.select_dtypes(include=['object']).columns:
        le = LabelEncoder()
        df_cleaned.loc[:, col] = le.fit_transform(df_cleaned[col])
        label_encoders[col] = le
    end_encoding = time.time()
    encoding_times.append(end_encoding - start_encoding)

    # ✅ Normalisasi: Standarisasi fitur numerik
    start_normalization = time.time()
    scaler = StandardScaler()
    df_scaled = scaler.fit_transform(df_cleaned)
    end_normalization = time.time()
    normalization_times.append(end_normalization - start_normalization)

    # ✅ Splitting dataset (Train-Test Split)
    start_splitting = time.time()
    X_train, X_test = train_test_split(df_scaled, test_size=0.2, random_state=42)
    end_splitting = time.time()
    splitting_times.append(end_splitting - start_splitting)

# 📌 Rataan waktu untuk setiap tahap preprocessing
avg_cleaning_time = np.mean(cleaning_times)
avg_encoding_time = np.mean(encoding_times)
avg_normalization_time = np.mean(normalization_times)
avg_splitting_time = np.mean(splitting_times)

print(f"✅ Rata-rata waktu cleaning: {avg_cleaning_time:.2f} detik")
print(f"✅ Rata-rata waktu encoding: {avg_encoding_time:.2f} detik")
print(f"✅ Rata-rata waktu normalisasi: {avg_normalization_time:.2f} detik")
print(f"✅ Rata-rata waktu splitting: {avg_splitting_time:.2f} detik")

▶️ Memulai Preprocessing Data...
✅ Rata-rata waktu cleaning: 0.20 detik
✅ Rata-rata waktu encoding: 0.17 detik
✅ Rata-rata waktu normalisasi: 0.05 detik
✅ Rata-rata waktu splitting: 0.00 detik


In [7]:
print("\n▶️ Memulai Pelatihan Model...")

model_training_times = {}
model_testing_times = {}

models = {
    "Isolation Forest": IsolationForest(contamination=0.1, random_state=42),
    "One-Class SVM": OneClassSVM(nu=0.1, kernel="rbf"),
    "Local Outlier Factor": LocalOutlierFactor(n_neighbors=20, novelty=True)
}

for model_name, model in models.items():
    training_times = []
    testing_times = []

    for _ in range(5):  # Loop untuk mendapatkan rataan waktu
        # ✅ Training
        start_train = time.time()
        if model_name == "Local Outlier Factor":
            model.fit(X_train)  # LOF tidak memiliki metode 'fit' seperti model lainnya
        else:
            model.fit(X_train)
        end_train = time.time()
        training_times.append(end_train - start_train)

        # ✅ Testing (Inference)
        start_test = time.time()
        if model_name == "Local Outlier Factor":
            predictions = model.predict(X_test)
        else:
            predictions = model.predict(X_test)
        end_test = time.time()
        testing_times.append(end_test - start_test)

    # 📌 Rataan waktu training & testing
    model_training_times[model_name] = np.mean(training_times)
    model_testing_times[model_name] = np.mean(testing_times)

# Menampilkan hasil pelatihan dan evaluasi model
for model_name in models.keys():
    print(f"✅ {model_name} - Rata-rata waktu training: {model_training_times[model_name]:.2f} detik")
    print(f"✅ {model_name} - Rata-rata waktu testing: {model_testing_times[model_name]:.2f} detik")



▶️ Memulai Pelatihan Model...
✅ Isolation Forest - Rata-rata waktu training: 0.26 detik
✅ Isolation Forest - Rata-rata waktu testing: 0.03 detik
✅ One-Class SVM - Rata-rata waktu training: 11.74 detik
✅ One-Class SVM - Rata-rata waktu testing: 1.88 detik
✅ Local Outlier Factor - Rata-rata waktu training: 1.46 detik
✅ Local Outlier Factor - Rata-rata waktu testing: 0.40 detik
