In [17]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.naive_bayes import BernoulliNB  # <-- PERUBAHAN 1: Import Naive Bayes
from sklearn.metrics import accuracy_score
import pickle

In [19]:
# 1. Muat Dataset
try:
    df = pd.read_csv('Data Prediksi.csv')
except FileNotFoundError:
    print("Error: File 'Data Prediksi.csv' tidak ditemukan.")
    exit()

print("Dataset berhasil dimuat.")

# 2. Pisahkan Fitur (X) dan Target (Y)
X = df.drop('TARGET', axis=1)
Y = df['TARGET']

print("Pemeriksaan data:")
print(f"Jumlah Fitur (Gejala): {X.shape[1]}")
print(f"Kelas Penyakit Unik: {Y.unique()}")

Dataset berhasil dimuat.
Pemeriksaan data:
Jumlah Fitur (Gejala): 20
Kelas Penyakit Unik: ['ALLERGY' 'COLD' 'COVID' 'FLU']


In [21]:
# 3. Encoding Target (Y)
encoder = LabelEncoder()
Y_encoded = encoder.fit_transform(Y)

# 4. Bagi Data (Train/Test Split)
X_train, X_test, Y_train, Y_test = train_test_split(
    X, Y_encoded, test_size=0.2, stratify=Y_encoded, random_state=2
)

print(f"Data latih: {X_train.shape[0]} baris, Data tes: {X_test.shape[0]} baris")

Data latih: 35562 baris, Data tes: 8891 baris


In [23]:
# 5. Latih Model (Menggunakan Bernoulli Naive Bayes)
classifier = BernoulliNB()  # <-- PERUBAHAN 2: Gunakan BernoulliNB()
classifier.fit(X_train, Y_train)

print("Model Naive Bayes berhasil dilatih.")

Model Naive Bayes berhasil dilatih.


In [29]:
# 6. Evaluasi Model
Y_pred = classifier.predict(X_test)
test_accuracy = accuracy_score(Y_test, Y_pred)
print(f"Akurasi Model pada Data Tes: {test_accuracy * 100:.2f}%")

Akurasi Model pada Data Tes: 93.17%


In [31]:
# 7. Simpan Model dan Metadata untuk Streamlit
model_filename = 'prediksi_penyakit_model.sav'
pickle.dump(classifier, open(model_filename, 'wb'))
print(f"Model disimpan sebagai: {model_filename}")

class_names_filename = 'class_names.sav'
pickle.dump(encoder.classes_, open(class_names_filename, 'wb'))
print(f"Nama kelas disimpan sebagai: {class_names_filename}")

feature_names_filename = 'feature_names.sav'
pickle.dump(list(X.columns), open(feature_names_filename, 'wb'))
print(f"Nama fitur disimpan sebagai: {feature_names_filename}")

print("\n--- Persiapan Deployment Selesai ---")
print("Anda sekarang memiliki 3 file .sav yang siap digunakan oleh Streamlit.")

Model disimpan sebagai: prediksi_penyakit_model.sav
Nama kelas disimpan sebagai: class_names.sav
Nama fitur disimpan sebagai: feature_names.sav

--- Persiapan Deployment Selesai ---
Anda sekarang memiliki 3 file .sav yang siap digunakan oleh Streamlit.
