In [None]:
import pandas as pd
import os
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
import joblib

In [None]:
# === Load dataset ===
df = pd.read_csv("data/voice_dataset.csv")

# === Fitur 36 kolom ===
feature_cols = [col for col in df.columns if col.startswith("mfcc")]
X = df[feature_cols].to_numpy()
y = df["status"].to_numpy()  # 0 = buka, 1 = tutup

print("=== Daftar Fitur yang Digunakan ===")
for i, col in enumerate(feature_cols, start=1):
    print(f"{i}. {col}")
print(f"\nTotal fitur: {len(feature_cols)} kolom\n")

=== Daftar Fitur yang Digunakan ===
1. mfcc0
2. mfcc1
3. mfcc2
4. mfcc3
5. mfcc4
6. mfcc5
7. mfcc6
8. mfcc7
9. mfcc8
10. mfcc9
11. mfcc10
12. mfcc11
13. mfcc12
14. mfcc13
15. mfcc14
16. mfcc15
17. mfcc16
18. mfcc17
19. mfcc18
20. mfcc19
21. mfcc20
22. mfcc21
23. mfcc22
24. mfcc23
25. mfcc24
26. mfcc25
27. mfcc26
28. mfcc27
29. mfcc28
30. mfcc29
31. mfcc30
32. mfcc31
33. mfcc32
34. mfcc33
35. mfcc34
36. mfcc35
37. mfcc36

Total fitur: 37 kolom



In [None]:
# === Split data ===
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)

# === Pipeline RandomForest + StandardScaler ===
model_status = Pipeline([
    ("scaler", StandardScaler()),
    ("rf", RandomForestClassifier(n_estimators=200, random_state=42))
])

# === Training ===
model_status.fit(X_train, y_train)

# === Prediksi ===
y_pred = model_status.predict(X_test)

# === Evaluasi ===
accuracy = accuracy_score(y_test, y_pred)
conf_mat = confusion_matrix(y_test, y_pred)
class_report = classification_report(y_test, y_pred, target_names=["buka", "tutup"])

print("=== Hasil Evaluasi Model Status ===")
print(f"Akurasi: {accuracy:.4f}")
print("\nConfusion Matrix:")
print(conf_mat)
print("\nClassification Report:")
print(class_report)




=== Hasil Evaluasi Model Status ===
Akurasi: 0.9250

Confusion Matrix:
[[37  3]
 [ 3 37]]

Classification Report:
              precision    recall  f1-score   support

        buka       0.93      0.93      0.93        40
       tutup       0.93      0.93      0.93        40

    accuracy                           0.93        80
   macro avg       0.93      0.93      0.93        80
weighted avg       0.93      0.93      0.93        80



In [None]:
# === Simpan model dan feature order ===
os.makedirs("models", exist_ok=True)
joblib.dump(model_status, "models/status_model.pkl")
joblib.dump(feature_cols, "models/feature_cols.pkl")

print("\n[INFO] Model status berhasil disimpan.")


[INFO] Model status berhasil disimpan.


In [None]:
import pandas as pd
import os
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
import joblib


In [None]:
# === Load dataset ===
df = pd.read_csv("data/voice_dataset.csv")

# === Fitur 36 kolom ===
feature_cols = [col for col in df.columns if col.startswith("mfcc")]
X = df[feature_cols].to_numpy()
y = df["user"].to_numpy()  # 0 = user1, 1 = user2

# === Tampilkan fitur yang digunakan ===
print("=== Daftar Fitur yang Digunakan ===")
for i, col in enumerate(feature_cols, start=1):
    print(f"{i}. {col}")
print(f"\nTotal fitur: {len(feature_cols)} kolom\n")

=== Daftar Fitur yang Digunakan ===
1. mfcc0
2. mfcc1
3. mfcc2
4. mfcc3
5. mfcc4
6. mfcc5
7. mfcc6
8. mfcc7
9. mfcc8
10. mfcc9
11. mfcc10
12. mfcc11
13. mfcc12
14. mfcc13
15. mfcc14
16. mfcc15
17. mfcc16
18. mfcc17
19. mfcc18
20. mfcc19
21. mfcc20
22. mfcc21
23. mfcc22
24. mfcc23
25. mfcc24
26. mfcc25
27. mfcc26
28. mfcc27
29. mfcc28
30. mfcc29
31. mfcc30
32. mfcc31
33. mfcc32
34. mfcc33
35. mfcc34
36. mfcc35
37. mfcc36

Total fitur: 37 kolom



In [None]:
# === Split data ===
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)

# === Pipeline RandomForest + StandardScaler ===
model_user = Pipeline([
    ("scaler", StandardScaler()),
    ("rf", RandomForestClassifier(n_estimators=200, random_state=42))
])

# === Training ===
model_user.fit(X_train, y_train)

# === Prediksi pada data uji ===
y_pred = model_user.predict(X_test)

# === Evaluasi ===
accuracy = accuracy_score(y_test, y_pred)
conf_mat = confusion_matrix(y_test, y_pred)
report = classification_report(y_test, y_pred, target_names=["user1", "user2"])

print("=== Hasil Evaluasi Model User ===")
print(f"Akurasi: {accuracy:.4f}")
print("\nConfusion Matrix:")
print(conf_mat)
print("\nClassification Report:")
print(report)


=== Hasil Evaluasi Model User ===
Akurasi: 1.0000

Confusion Matrix:
[[40  0]
 [ 0 40]]

Classification Report:
              precision    recall  f1-score   support

       user1       1.00      1.00      1.00        40
       user2       1.00      1.00      1.00        40

    accuracy                           1.00        80
   macro avg       1.00      1.00      1.00        80
weighted avg       1.00      1.00      1.00        80



In [None]:
# === Simpan model dan feature order ===
os.makedirs("models", exist_ok=True)
joblib.dump(model_user, "models/user_model.pkl")
joblib.dump(feature_cols, "models/feature_cols.pkl")

print("\n[INFO] Model user berhasil disimpan.")


[INFO] Model user berhasil disimpan.
