In [11]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.preprocessing import OneHotEncoder

In [12]:
data = pd.read_csv('../data/revisi_data.csv')

In [13]:
data.head()

Unnamed: 0,NO,Produk_Tahu,aroma,tekstur,cita_rasa,masa_kadaluarsa,Kualitas
0,1,Tahu Putih,Beraroma Gurih,Hancur,Gurih,1 Hari Setelah Produksi,Tinggi
1,2,Tahu Kuning,Beraroma Gurih,Kenyal,Tidak Begitu Gurih,1 Hari Setelah Produksi,Tinggi
2,3,Tahu Matang,Tidak Begitu Beraroma,Lembek,Gurih,2 Hari Setelah Produksi,Rendah
3,4,Tahu Sumedang,Beraroma Gurih,Kenyal,Gurih,1 Hari Setelah Produksi,Tinggi
4,5,Tahu Pletok,Beraroma Gurih,Hancur,Asam,3 Hari Setelah Produksi,Rendah


In [14]:
# Define features and target variable
X = data[['Produk_Tahu', 'aroma', 'tekstur', 'cita_rasa', 'masa_kadaluarsa']]
y = data['Kualitas']

In [15]:
# One-Hot Encoding untuk fitur kategorikal
encoder = OneHotEncoder(sparse_output=False)
X_encoded = encoder.fit_transform(X)

In [16]:
# Membagi data menjadi set pelatihan dan pengujian
X_train, X_test, y_train, y_test = train_test_split(X_encoded, y, test_size=0.2, random_state=42)

In [17]:
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
import numpy as np

# Asumsi X_train, X_test, y_train, y_test sudah didefinisikan

# Membuat model Naive Bayes
nb_model = GaussianNB()
nb_model.fit(X_train, y_train)

# Membuat prediksi
y_pred_nb = nb_model.predict(X_test)

# Menghitung akurasi
accuracy_nb = accuracy_score(y_test, y_pred_nb)
print(f"Akurasi Naive Bayes: {accuracy_nb:.2f}")

# Menampilkan classification report
report_nb = classification_report(y_test, y_pred_nb)
print("Classification Report:")
print(report_nb)

# Menghitung confusion matrix
conf_matrix = confusion_matrix(y_test, y_pred_nb)

# Menghitung spesifisitas untuk setiap kelas
# Spesifisitas = TN / (TN + FP)
specificities = {}
for i in range(len(conf_matrix)):
    TN = np.sum(conf_matrix) - (np.sum(conf_matrix[i, :]) + np.sum(conf_matrix[:, i]) - conf_matrix[i, i])
    FP = np.sum(conf_matrix[:, i]) - conf_matrix[i, i]
    specificity = TN / (TN + FP) if (TN + FP) != 0 else 0
    specificities[i] = specificity

print("Specificities for each class:")
for class_label, specificity in specificities.items():
    print(f"Class {class_label}: {specificity:.2f}")


Akurasi Naive Bayes: 0.91
Classification Report:
              precision    recall  f1-score   support

      Rendah       0.86      1.00      0.92        12
      Tinggi       1.00      0.80      0.89        10

    accuracy                           0.91        22
   macro avg       0.93      0.90      0.91        22
weighted avg       0.92      0.91      0.91        22

Specificities for each class:
Class 0: 0.80
Class 1: 1.00


In [19]:
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import classification_report, accuracy_score, confusion_matrix
import numpy as np

# Asumsi X_train, X_test, y_train, y_test sudah didefinisikan

# Membuat model CART
cart_model = DecisionTreeClassifier()
cart_model.fit(X_train, y_train)

# Membuat prediksi
y_pred_cart = cart_model.predict(X_test)

# Menghitung akurasi
accuracy_cart = accuracy_score(y_test, y_pred_cart)
print(f"Akurasi CART: {accuracy_cart:.2f}")

# Menampilkan classification report
report_cart = classification_report(y_test, y_pred_cart)
print("Classification Report:")
print(report_cart)

# Menghitung confusion matrix
conf_matrix = confusion_matrix(y_test, y_pred_cart)

# Menghitung spesifisitas untuk setiap kelas
# Spesifisitas = TN / (TN + FP)
specificities = {}
for i in range(len(conf_matrix)):
    TN = np.sum(conf_matrix) - (np.sum(conf_matrix[i, :]) + np.sum(conf_matrix[:, i]) - conf_matrix[i, i])
    FP = np.sum(conf_matrix[:, i]) - conf_matrix[i, i]
    specificity = TN / (TN + FP) if (TN + FP) != 0 else 0
    specificities[i] = specificity

print("Specificities for each class:")
for class_label, specificity in specificities.items():
    print(f"Class {class_label}: {specificity:.2f}")


Akurasi CART: 1.00
Classification Report:
              precision    recall  f1-score   support

      Rendah       1.00      1.00      1.00        12
      Tinggi       1.00      1.00      1.00        10

    accuracy                           1.00        22
   macro avg       1.00      1.00      1.00        22
weighted avg       1.00      1.00      1.00        22

Specificities for each class:
Class 0: 1.00
Class 1: 1.00


In [39]:
# Train CART model
cart_model = DecisionTreeClassifier()
cart_model.fit(X_train, y_train)

In [40]:
import joblib

# Menyimpan model Naive Bayes
joblib.dump(nb_model, '../model/naive_bayes_model.pkl')

# Menyimpan model CART
joblib.dump(cart_model, '../model/cart_model.pkl')

# Menyimpan encoder
joblib.dump(encoder, '../model/onehot_encoder.pkl')


['../model/onehot_encoder.pkl']