# Read Dataset Fruit

In [None]:
import pandas as pd
import numpy as np

In [None]:
df_fruit = pd.read_excel('fruit.xlsx')
df_fruit

In [None]:
df_fruit.info()

In [None]:
df_fruit.describe()

In [None]:
# untuk mengecek banyaknya class/target/label
pd.unique(df_fruit['name'])

In [None]:
# Untuk mengecek jumlah data orange & grapefruit
df_fruit['name'].value_counts()

# Split Dependent (Target/Label/Class) & Independent (Atribut/Fitur) Variable 

In [None]:
atribut = df_fruit.iloc[:,:-1]  # Mengambil semua baris dan semua kolom kecuali kolom terakhir.
label = df_fruit.iloc[:,-1:]    # Mengambil semua baris dan hanya kolom terakhir.

In [None]:
atribut

# Transformasi Dataset

In [None]:
# Mengubah data kategorikal menjadi numerik
from sklearn.preprocessing import LabelEncoder

le = LabelEncoder()
df_fruit['diameter'] = le.fit_transform(df_fruit['diameter'].values)
df_fruit['weight'] = le.fit_transform(df_fruit['weight'].values)
df_fruit['red'] = le.fit_transform(df_fruit['red'].values)
df_fruit['green'] = le.fit_transform(df_fruit['green'].values)
df_fruit['blue'] = le.fit_transform(df_fruit['blue'].values)
df_fruit['name'] = le.fit_transform(df_fruit['name'].values)

In [None]:
# Data yang sudah di ubah tersebut, di split lagi dengan variabel baru

x = df_fruit.iloc[:,:-1]    # Mengambil semua baris dan semua kolom kecuali kolom terakhir.
y = df_fruit.iloc[:,-1:]    # Mengambil semua baris dan hanya kolom terakhir.
print(x)    # bisa diganti x atau y

In [None]:
# Lalu disimpan ke dalam file baru berupa csv
df_fruit.to_csv('FruitTransform.csv', index=False)

# Split Train Test

In [None]:
from sklearn.model_selection import train_test_split

x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=0)

# Klasifikasi ID3 & Akurasi

In [None]:
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score
from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix

# Training / Fit Model

In [None]:
# Membuat model Decision Tree (Pohon Keputusan)
DTC = DecisionTreeClassifier(
    ccp_alpha=0.0, class_weight=None, criterion='entropy',
    max_depth=4, max_features=None, max_leaf_nodes=None,
    min_impurity_decrease=0.0, min_samples_leaf=1,
    min_samples_split=2, min_weight_fraction_leaf=0.0,
    random_state=42, splitter='best')
Model_ID3 = DTC.fit(x_train, y_train)

# CEK AKURASI
DTC_Akurasi = accuracy_score(y_test, DTC.predict(x_test))
print(f'Akurasi Data Training = {accuracy_score(y_train, DTC.predict(x_train))}')
print(f'Akurasi Data Testing = {DTC_Akurasi} \n')

print(f'Confusion Matrix = \n{confusion_matrix(y_test, DTC.predict(x_test))}\n')
Confusion = confusion_matrix(y_test, DTC.predict(x_test))
print(f'classification_report = \n{classification_report(y_test, DTC.predict(x_test))}')

# Simulasi Model

In [None]:
input_data = (682,5381,39,41,31)

input_data_as_numpy_array = np.array(input_data)    # Mengubah data input menjadi array NumPy agar bisa diproses oleh model.

input_data_reshape = input_data_as_numpy_array.reshape(1,-1)    # Mengubah bentuk array agar sesuai dengan format input yang dibutuhkan model (1 baris dan banyak kolom).

Prediksi = Model_ID3.predict(input_data_reshape)    # Menggunakan model ID3 untuk melakukan prediksi berdasarkan input yang diberikan.
print(Prediksi)

if (Prediksi[0] == 0):
    print('grapefruit') # Jika prediksi 0, berarti buahnya adalah grapefruit.
elif (Prediksi[0] == 1):
    print('orange')
else:
    print('Name tidak diketahui')

# Visualisasi Pohon Keputusan

In [None]:
import matplotlib.pyplot as plt         # Digunakan untuk membuat plot (grafik).
from sklearn.tree import plot_tree      # Digunakan untuk menggambar pohon keputusan yang telah dilatih.

fig, ax = plt.subplots(figsize=(25, 20))
plot_tree(
    Model_ID3, 
    feature_names=x.columns, 
    class_names=['grapefruit','orange'], 
    filled=True, fontsize=10, rounded=True, ax=ax)
plt.show()

# Aturan (Rules)

In [None]:
from sklearn.tree import export_text

# Menampilkan aturan pohon keputusan dalam bentuk teks
rules = export_text(Model_ID3, feature_names=list(x.columns))
print(rules)    # Jika Anda ingin menampilkannya di terminal

# Save Model

In [None]:
import pickle
filename = 'ID3_Fruit.sav'
pickle.dump(Model_ID3,open(filename,'wb'))