In [36]:
import math
import json
import pandas as pd
from collections import Counter

In [37]:
df = pd.read_csv("../../dataset/weather_data.csv")
df.head()

Unnamed: 0,No,Outlook,Temperature,Humidity,Wind,Play
0,1,Sunny,Hot,High,Weak,No
1,2,Sunny,Hot,High,Strong,No
2,3,Overcast,Hot,High,Weak,Yes
3,4,Rain,Mild,High,Weak,Yes
4,5,Rain,Cool,Normal,Weak,Yes


In [38]:
def entropy(data):
    n = len(data)
    nGrup = Counter(data)
    ent = 0.0
    for c in nGrup.values():
        p = c / n
        ent -= p * math.log2(p)
    return ent

In [39]:
def gain(data_input, data_output, fitur):
    base_ent = entropy(data_output)
    base_n = len(data_output)

    # kelompok berdasarkan nilai atribut
    subsets = {}
    for x, label in zip(data_input, data_output):
        key = x[fitur]
        subsets.setdefault(key, []).append(label)

    subset_ent = 0.0
    for labels in subsets.values():
        subset_ent += (len(labels) / base_n) * entropy(labels)

    return base_ent - subset_ent

In [40]:
def buat_tree(data_input, data_output, feature_indices):
    # semua output (Label) sama -> Leaf
    if len(set(data_output)) == 1:
        return data_output[0]

    # fitur kosong -> habis
    if not feature_indices:
        return Counter(data_output).most_common(1)[0][0]

    # Pilih fitur terbaik berdasarkan gain
    gains = [(gain(data_input, data_output, idx), idx) for idx in feature_indices]
    gains.sort(reverse=True)
    best_gain, best_fitur = gains[0]

    # Jika gain = 0 -> tidak ada peningkatan
    if best_gain == 0:
        return Counter(data_output).most_common(1)[0][0]

    tree = {best_fitur: {}}

    # Nilai-nilai unik fitur pada data saat ini
    values = set(x[best_fitur] for x in data_input)
    for val in values:
        # buat subset data untuk nilai fitur = val
        sub_input = [x for x, label in zip(data_input, data_output) if x[best_fitur] == val]
        sub_output = [label for x, label in zip(data_input, data_output) if x[best_fitur] == val]

        # jika subset kosong
        if not sub_input:
            tree[best_fitur][val] = Counter(data_output).most_common(1)[0][0]
        else:
            sisa_fitur = [i for i in feature_indices if i != best_fitur]
            tree[best_fitur][val] = buat_tree(sub_input, sub_output, sisa_fitur)

    return tree

In [41]:
def konversi_ke_teks(node, feature_names, mapping):
    """
    Mengubah Tree Angka {0: {1: ...}} menjadi Tree Teks {'Outlook': {'Sunny': ...}}
    """
    # Jika node adalah Leaf (Keputusan Akhir: 0 atau 1)
    if not isinstance(node, dict):
        # Cari teks untuk label ini (0->No, 1->Yes)
        for teks, angka in mapping['Play'].items():
            if angka == node: return teks
        return node

    # Jika node adalah Dictionary (Cabang)
    tree_baru = {}

    # Loop setiap fitur (Key)
    for fitur_idx, cabang in node.items():
        # Ubah Index Fitur ke Nama (0 -> Outlook)
        nama_fitur = feature_names[fitur_idx]

        cabang_baru = {}
        # Loop setiap nilai (Value)
        for nilai_idx, subtree in cabang.items():
            # Ubah Nilai Angka ke Teks (1 -> Sunny)
            nama_nilai = str(nilai_idx) # Default jika tidak ketemu
            for teks, angka in mapping[nama_fitur].items():
                if angka == nilai_idx:
                    nama_nilai = teks
                    break

            # Rekursif: Lakukan hal yang sama untuk anak-anaknya
            cabang_baru[nama_nilai] = konversi_ke_teks(subtree, feature_names, mapping)

        tree_baru[nama_fitur] = cabang_baru

    return tree_baru

In [42]:
if __name__ == "__main__":
    if 'No' in df.columns: df = df.drop(columns=['No'])

    # 1. Definisi Mapping (Kamus)
    mapping = {
        'Outlook':     {'Sunny': 1, 'Overcast': 2, 'Rain': 3},
        'Temperature': {'Hot': 1, 'Cool': 2, 'Mild': 3},
        'Humidity':    {'Normal': 1, 'High': 2},
        'Wind':        {'Strong': 1, 'Weak': 2},
        'Play':        {'No': 0, 'Yes': 1}
    }

    # 2. Encode Data (Ubah Text ke Angka)
    for col, map_val in mapping.items():
        if col in df.columns: df[col] = df[col].map(map_val)

    X = df[df.columns[:-1]].values.tolist()
    y = df[df.columns[-1]].tolist()
    feature_names = df.columns[:-1].tolist() # ['Outlook', 'Temperature', ...]
    f_ids = list(range(len(feature_names)))

    # 3. Buat Tree (Hasilnya Angka)
    tree_angka = buat_tree(X, y, f_ids)

    print("--- Hasil Tree (Versi Angka/Raw) ---")
    print(tree_angka)

    # 4. Konversi ke Teks (Agar sesuai request Anda)
    tree_teks = konversi_ke_teks(tree_angka, feature_names, mapping)

    print("\n--- Hasil Tree (Versi Teks Dictionary) ---")
    print(tree_teks)

--- Hasil Tree (Versi Angka/Raw) ---
{0: {1: {2: {1: 1, 2: 0}}, 2: 1, 3: {3: {1: 0, 2: 1}}}}

--- Hasil Tree (Versi Teks Dictionary) ---
{'Outlook': {'Sunny': {'Humidity': {'Normal': 'Yes', 'High': 'No'}}, 'Overcast': 'Yes', 'Rain': {'Wind': {'Strong': 'No', 'Weak': 'Yes'}}}}


In [43]:
json_output = json.dumps(tree_teks, indent=4)

print(json_output)


{
    "Outlook": {
        "Sunny": {
            "Humidity": {
                "Normal": "Yes",
                "High": "No"
            }
        },
        "Overcast": "Yes",
        "Rain": {
            "Wind": {
                "Strong": "No",
                "Weak": "Yes"
            }
        }
    }
}
