In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import OrdinalEncoder
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score
import time
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from sklearn.preprocessing import StandardScaler

In [None]:
try:
    df = pd.read_csv('online_shoppers_intention.csv')
    print("Dataset berhasil dimuat.")
    print(f"Jumlah baris: {df.shape[0]}, Jumlah kolom: {df.shape[1]}\n")
except FileNotFoundError:
    print("Error: File 'online_shoppers_intention.csv' tidak ditemukan.")
    print("Pastikan Anda sudah mengunduh dataset dan meletakkannya di folder yang sama.")
    exit()

Dataset berhasil dimuat.
Jumlah baris: 12330, Jumlah kolom: 18



In [None]:
categorical_cols = ['Month', 'OperatingSystems', 'Browser', 'Region', 'TrafficType', 'VisitorType']
target_col = 'Revenue'

In [None]:
df_processed = df.copy()

In [None]:
encoder = OrdinalEncoder()
df_processed[categorical_cols] = encoder.fit_transform(df_processed[categorical_cols])

In [None]:
df_processed['Weekend'] = df_processed['Weekend'].astype(int)
df_processed[target_col] = df_processed[target_col].astype(int)


In [None]:
X = df_processed.drop(target_col, axis=1) # Semua kolom KECUALI 'Revenue'
y = df_processed[target_col]  

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

print(f"Data Latih: {X_train.shape[0]} baris")
print(f"Data Uji: {X_test.shape[0]} baris\n")


Data Latih: 9864 baris
Data Uji: 2466 baris



In [None]:
print("Memulai pelatihan model Random Forest (Baseline)...")
start_time_baseline = time.time()
model_rf_baseline = RandomForestClassifier(n_estimators=100, random_state=42, n_jobs=-1)
model_rf_baseline.fit(X_train, y_train)
end_time_baseline = time.time()
print(f"Pelatihan model baseline selesai. (Durasi: {end_time_baseline - start_time_baseline:.2f} detik)\n")

Memulai pelatihan model Random Forest (Baseline)...
Pelatihan model baseline selesai. (Durasi: 2.29 detik)



In [None]:
print("--- Hasil Evaluasi Model Baseline (Sebelum Tuning) ---")
y_pred_baseline = model_rf_baseline.predict(X_test)
accuracy_baseline = accuracy_score(y_test, y_pred_baseline)

print(f"Akurasi: {accuracy_baseline:.4f}")
print("\nLaporan Klasifikasi Baseline:")
print(classification_report(y_test, y_pred_baseline, target_names=['Tidak Beli (0)', 'Beli (1)']))
print("\nConfusion Matrix Baseline:")
print(confusion_matrix(y_test, y_pred_baseline))
print("-" * 60 + "\n")

--- Hasil Evaluasi Model Baseline (Sebelum Tuning) ---
Akurasi: 0.8998

Laporan Klasifikasi Baseline:
                precision    recall  f1-score   support

Tidak Beli (0)       0.92      0.96      0.94      2084
      Beli (1)       0.73      0.56      0.63       382

      accuracy                           0.90      2466
     macro avg       0.83      0.76      0.79      2466
  weighted avg       0.89      0.90      0.89      2466


Confusion Matrix Baseline:
[[2006   78]
 [ 169  213]]
------------------------------------------------------------



In [None]:
print("Memulai Optimasi Hyperparameter (Grid Search)...")
print("Ini mungkin akan memakan waktu beberapa menit...")
start_time_grid = time.time()

Memulai Optimasi Hyperparameter (Grid Search)...
Ini mungkin akan memakan waktu beberapa menit...


In [None]:
param_grid = {
    'n_estimators': [100, 200],         # Jumlah pohon
    'max_depth': [10, 20, None],       # Kedalaman maksimum
    'min_samples_split': [2, 5],       # Batas minimum sampel untuk split
    'criterion': ['gini', 'entropy']   # Fungsi evaluasi
    }

In [None]:
# rf_grid = RandomForestClassifier(random_state=42, n_jobs=-1)

# # Inisialisasi GridSearch
# # cv=5 (cross-validation 5-fold) adalah praktik umum
# # scoring='accuracy' karena kita ingin memaksimalkan akurasi
# grid_search = GridSearchCV(estimator=rf_grid, 
#                            param_grid=param_grid, 
#                            cv=5, 
#                            scoring='accuracy', 
#                            verbose=1, 
#                            n_jobs=-1)

# # Latih GridSearch untuk menemukan parameter terbaik
# grid_search.fit(X_train, y_train)

# end_time_grid = time.time()
# print(f"Optimasi Grid Search selesai. (Durasi: {end_time_grid - start_time_grid:.2f} detik)\n")

# # --- Tampilkan Hasil Grid Search ---
# print("--- Hasil Optimasi Grid Search ---")
# print(f"Parameter Terbaik Ditemukan: {grid_search.best_params_}")
# print(f"Akurasi Terbaik (Cross-Validation): {grid_search.best_score_:.4f}\n")

Fitting 5 folds for each of 24 candidates, totalling 120 fits
Optimasi Grid Search selesai. (Durasi: 166.11 detik)

--- Hasil Optimasi Grid Search ---
Parameter Terbaik Ditemukan: {'criterion': 'gini', 'max_depth': 10, 'min_samples_split': 5, 'n_estimators': 100}
Akurasi Terbaik (Cross-Validation): 0.9060



In [None]:
# Ini adalah "Arsitektur Deep Learning" yang harus kamu jelasin ke dosen
model = Sequential([
    Dense(64, activation='relu', input_shape=(X_train.shape[1],)), # Input & Hidden 1
    Dropout(0.2),                                                  # Biar gak gampang 'lupa'
    Dense(32, activation='relu'),                                  # Hidden 2
    Dense(1, activation='sigmoid')                                 # Output (Beli/Nggak)
])

model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

In [None]:
print("--- Hasil Evaluasi Model Optimal (Setelah Tuning) ---")

# Gunakan model terbaik yang sudah dilatih (best_estimator_) untuk prediksi
best_model = grid_search.best_estimator_
y_pred_optimal = best_model.predict(X_test)
accuracy_optimal = accuracy_score(y_test, y_pred_optimal)

print(f"Akurasi pada Data Uji: {accuracy_optimal:.4f}")
print("\nLaporan Klasifikasi Model Optimal:")
print(classification_report(y_test, y_pred_optimal, target_names=['Tidak Beli (0)', 'Beli (1)']))
print("\nConfusion Matrix Model Optimal:")
print(confusion_matrix(y_test, y_pred_optimal))
print("-" * 60 + "\n")

# ===================================================================
# TAHAP 6: ANALISIS HASIL (Perbandingan)
# ===================================================================
print("--- Perbandingan Performa Model ---")
print(f"Akurasi Model Baseline (Sebelum Tuning): {accuracy_baseline:.4f}")
print(f"Akurasi Model Optimal (Setelah Tuning):   {accuracy_optimal:.4f}")
print(f"Peningkatan Akurasi: {accuracy_optimal - accuracy_baseline:.4f}")

--- Hasil Evaluasi Model Optimal (Setelah Tuning) ---
Akurasi pada Data Uji: 0.9023

Laporan Klasifikasi Model Optimal:
                precision    recall  f1-score   support

Tidak Beli (0)       0.92      0.96      0.94      2084
      Beli (1)       0.74      0.56      0.64       382

      accuracy                           0.90      2466
     macro avg       0.83      0.76      0.79      2466
  weighted avg       0.90      0.90      0.90      2466


Confusion Matrix Model Optimal:
[[2010   74]
 [ 167  215]]
------------------------------------------------------------

--- Perbandingan Performa Model ---
Akurasi Model Baseline (Sebelum Tuning): 0.8998
Akurasi Model Optimal (Setelah Tuning):   0.9023
Peningkatan Akurasi: 0.0024
