In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report
import joblib
import os
import sys

# --- Konfigurasi Path ---
# Pastikan folder 'models' ada
if not os.path.exists('models'):
    os.makedirs('models')

input_csv = "data/voice_features.csv"
model_output = "models/model_voice.joblib"
scaler_output = "models/scaler_voice.joblib"
# ------------------------

print("Memulai pelatihan model...")

# 1. Muat dataset fitur
try:
    data = pd.read_csv(input_csv)
except FileNotFoundError:
    print(f"Error: File {input_csv} tidak ditemukan.", file=sys.stderr)
    print("Pastikan Anda sudah menjalankan '1_extract_features.py' terlebih dahulu.", file=sys.stderr)
    sys.exit(1)

print(f"Dataset dimuat. Bentuk data: {data.shape}")

# 2. Pisahkan fitur (X) dan label (y)
X = data.drop('label', axis=1)
y = data['label']

# 3. Split data training dan testing
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)
print(f"Data di-split: {len(X_train)} train, {len(X_test)} test.")

# 4. Scaling Fitur (Penting)
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)
print("Fitur berhasil di-scale.")

# 5. Latih Model (Random Forest)
model = RandomForestClassifier(n_estimators=100, random_state=42, n_jobs=-1)
model.fit(X_train_scaled, y_train)

# 6. Tes Model
y_pred = model.predict(X_test_scaled)
accuracy = accuracy_score(y_test, y_pred)

print(f"\nModel berhasil dilatih.")
print(f"Akurasi di data test: {accuracy * 100:.2f}%")

print("\n--- Classification Report (Test Data) ---")
print(classification_report(y_test, y_pred))
print("-----------------------------------------")

# 7. SIMPAN MODEL DAN SCALER
joblib.dump(model, model_output)
joblib.dump(scaler, scaler_output)

print(f"Berhasil! Model disimpan ke '{model_output}'")
print(f"Berhasil! Scaler disimpan ke '{scaler_output}'")

Memulai pelatihan model...
Dataset dimuat. Bentuk data: (200, 41)
Data di-split: 160 train, 40 test.
Fitur berhasil di-scale.

Model berhasil dilatih.
Akurasi di data test: 97.50%

--- Classification Report (Test Data) ---
              precision    recall  f1-score   support

        buka       0.95      1.00      0.98        20
       tutup       1.00      0.95      0.97        20

    accuracy                           0.97        40
   macro avg       0.98      0.97      0.97        40
weighted avg       0.98      0.97      0.97        40

-----------------------------------------
Berhasil! Model disimpan ke 'models/model_voice.joblib'
Berhasil! Scaler disimpan ke 'models/scaler_voice.joblib'
