In [None]:
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.cluster import KMeans, AgglomerativeClustering
from sklearn.metrics import silhouette_score, davies_bouldin_score
import time

# === Fungsi untuk ambil kolom tahunan ===
def extract_year_columns(df, keyword):
    return [col for col in df.columns if keyword in col and col.split()[-1].isdigit()]

# === Fungsi evaluasi clustering ===
def evaluasi_klaster_skor(file_path, method="kmeans", is_ekspor=True):
    print("📥 Membaca file:", file_path)
    df = pd.read_excel(file_path)
    lokasi_col = 'Negara Tujuan' if is_ekspor else 'Lokasi'

    # Validasi kolom dan fitur
    if is_ekspor:
        berat_cols = extract_year_columns(df, 'Berat')
        value_cols = extract_year_columns(df, 'Value')
        if not berat_cols or not value_cols:
            raise ValueError("Kolom 'Berat' atau 'Value' tidak ditemukan.")
        df['Total_Berat'] = df[berat_cols].sum(axis=1)
        df['Total_Value'] = df[value_cols].sum(axis=1)
        fitur_cols = ['Total_Berat', 'Total_Value']
    else:
        raise NotImplementedError("Versi ini hanya untuk dataset ekspor.")

    X_scaled = StandardScaler().fit_transform(df[fitur_cols])
    n_samples = X_scaled.shape[0]
    hasil = []

    print("🔄 Memulai evaluasi dari cluster 2 sampai 7...\n")

    for n_clusters in range(2, 8):
        if n_samples <= n_clusters:
            hasil.append({
                "Jumlah Cluster": n_clusters,
                "Silhouette": None,
                "Davies-Bouldin Index": None,
                "Waktu Eksekusi (s)": None,
                "Catatan": "Jumlah data tidak cukup"
            })
            continue

        try:
            start = time.time()
            if method == "kmeans":
                model = KMeans(n_clusters=n_clusters, random_state=60, init="k-means++")
            else:
                model = AgglomerativeClustering(n_clusters=n_clusters)

            labels = model.fit_predict(X_scaled)

            if len(set(labels)) < 2:
                hasil.append({
                    "Jumlah Cluster": n_clusters,
                    "Silhouette": None,
                    "Davies-Bouldin Index": None,
                    "Waktu Eksekusi (s)": None,
                    "Catatan": "Semua data masuk 1 cluster"
                })
                continue

            silhouette = round(silhouette_score(X_scaled, labels), 4)
            dbi = round(davies_bouldin_score(X_scaled, labels), 4)
            exec_time = round(time.time() - start, 4)

            hasil.append({
                "Jumlah Cluster": n_clusters,
                "Silhouette": silhouette,
                "Davies-Bouldin Index": dbi,
                "Waktu Eksekusi (s)": exec_time,
                "Catatan": "-"
            })

        except Exception as e:
            hasil.append({
                "Jumlah Cluster": n_clusters,
                "Silhouette": None,
                "Davies-Bouldin Index": None,
                "Waktu Eksekusi (s)": None,
                "Catatan": f"Error: {str(e)}"
            })

    df_hasil = pd.DataFrame(hasil)
    return df_hasil

# === Panggil fungsi dan tampilkan hasil ===
file_path = "../dataset/data ekspor1.xlsx"
hasil_df = evaluasi_klaster_skor(file_path, method="kmeans", is_ekspor=True)
hasil_df 


📥 Membaca file: ../dataset/data ekspor1.xlsx
🔄 Memulai evaluasi dari cluster 2 sampai 7...



Unnamed: 0,Jumlah Cluster,Silhouette,Davies-Bouldin Index,Waktu Eksekusi (s),Catatan
0,2,0.7884,0.5525,0.0089,-
1,3,0.7447,0.3518,0.0162,-
2,4,0.6996,0.3641,0.0139,-
3,5,0.6653,0.386,0.0113,-
4,6,0.6396,0.4794,0.0147,-
5,7,0.618,0.4911,0.0116,-


In [14]:
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.cluster import AgglomerativeClustering
from sklearn.metrics import silhouette_score, davies_bouldin_score
import time

# Fungsi bantu untuk ambil kolom tahunan ekspor
def extract_year_columns(df, keyword):
    return [col for col in df.columns if keyword in col and col.split()[-1].isdigit()]

# Fungsi evaluasi clustering ekspor dengan Agglomerative
def evaluasi_ekspor_agglomerative(file_path):
    print("📥 Membaca file:", file_path)
    df = pd.read_excel(file_path)
    lokasi_col = 'Negara Tujuan'

    # Validasi kolom
    berat_cols = extract_year_columns(df, 'Berat')
    value_cols = extract_year_columns(df, 'Value')
    if not berat_cols or not value_cols:
        raise ValueError("Kolom 'Berat' atau 'Value' tidak ditemukan atau tidak sesuai format.")

    # Hitung fitur total
    df['Total_Berat'] = df[berat_cols].sum(axis=1)
    df['Total_Value'] = df[value_cols].sum(axis=1)
    fitur_cols = ['Total_Berat', 'Total_Value']

    # Normalisasi
    X_scaled = StandardScaler().fit_transform(df[fitur_cols])
    n_samples = X_scaled.shape[0]
    hasil = []

    print("🔄 Evaluasi Agglomerative dari cluster 2 sampai 7...\n")

    for n_clusters in range(2, 8):
        if n_samples <= n_clusters:
            hasil.append({
                "Jumlah Cluster": n_clusters,
                "Silhouette": None,
                "Davies-Bouldin Index": None,
                "Waktu Eksekusi (s)": None,
                "Catatan": "Jumlah data tidak cukup"
            })
            continue

        try:
            start = time.time()
            model = AgglomerativeClustering(n_clusters=n_clusters, linkage='ward')
            labels = model.fit_predict(X_scaled)

            if len(set(labels)) < 2:
                hasil.append({
                    "Jumlah Cluster": n_clusters,
                    "Silhouette": None,
                    "Davies-Bouldin Index": None,
                    "Waktu Eksekusi (s)": None,
                    "Catatan": "Semua data masuk 1 cluster"
                })
                continue

            silhouette = round(silhouette_score(X_scaled, labels), 4)
            dbi = round(davies_bouldin_score(X_scaled, labels), 4)
            exec_time = round(time.time() - start, 4)

            hasil.append({
                "Jumlah Cluster": n_clusters,
                "Silhouette": silhouette,
                "Davies-Bouldin Index": dbi,
                "Waktu Eksekusi (s)": exec_time,
                "Catatan": "-"
            })

        except Exception as e:
            hasil.append({
                "Jumlah Cluster": n_clusters,
                "Silhouette": None,
                "Davies-Bouldin Index": None,
                "Waktu Eksekusi (s)": None,
                "Catatan": f"Error: {str(e)}"
            })

    df_hasil = pd.DataFrame(hasil)
    return df_hasil

file_path = "../dataset/data ekspor1.xlsx"  # Ganti path jika perlu
hasil_df = evaluasi_ekspor_agglomerative(file_path)
hasil_df  # WAJIB agar output tampil di VSCode/Jupyter


📥 Membaca file: ../dataset/data ekspor1.xlsx
🔄 Evaluasi Agglomerative dari cluster 2 sampai 7...



Unnamed: 0,Jumlah Cluster,Silhouette,Davies-Bouldin Index,Waktu Eksekusi (s),Catatan
0,2,0.7884,0.5525,0.2235,-
1,3,0.7581,0.2137,0.0036,-
2,4,0.6996,0.3641,0.0048,-
3,5,0.663,0.345,0.0039,-
4,6,0.6416,0.4126,0.0051,-
5,7,0.6308,0.4522,0.0042,-


In [19]:
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.cluster import KMeans
from sklearn.metrics import silhouette_score, davies_bouldin_score
import time

# Fungsi bantu untuk ambil kolom produksi per tahun
def extract_year_columns(df, keyword):
    return [col for col in df.columns if keyword in col and col.split()[-1].isdigit()]

# Fungsi evaluasi clustering produksi dengan KMeans
def evaluasi_kopi_kmeans(file_path):
    print("📥 Membaca file:", file_path)
    df = pd.read_excel(file_path)
    lokasi_col = 'Lokasi'

    # Validasi kolom
    produksi_cols = extract_year_columns(df, 'Produksi')
    luas_cols = extract_year_columns(df, 'Luas')
    produktivitas_cols = extract_year_columns(df, 'Produktivitas')
    if not produksi_cols or not luas_cols or not produktivitas_cols:
        raise ValueError("Kolom produksi, luas, atau produktivitas tidak ditemukan atau tidak sesuai format.")

    # Hitung total per baris
    df['Total_Produksi'] = df[produksi_cols].sum(axis=1)
    df['Total_Luas'] = df[luas_cols].sum(axis=1)
    df['Total_Produktivitas'] = df[produktivitas_cols].sum(axis=1)
    fitur_cols = ['Total_Produksi', 'Total_Luas', 'Total_Produktivitas']

    # Normalisasi
    X_scaled = StandardScaler().fit_transform(df[fitur_cols])
    n_samples = X_scaled.shape[0]
    hasil = []

    print("🔄 Evaluasi KMeans dari cluster 2 sampai 7...\n")

    for n_clusters in range(2, 8):
        if n_samples <= n_clusters:
            hasil.append({
                "Jumlah Cluster": n_clusters,
                "Silhouette": None,
                "Davies-Bouldin Index": None,
                "Waktu Eksekusi (s)": None,
                "Catatan": "Jumlah data tidak cukup"
            })
            continue

        try:
            start = time.time()
            model = KMeans(n_clusters=n_clusters, init="k-means++", random_state=60)
            labels = model.fit_predict(X_scaled)

            if len(set(labels)) < 2:
                hasil.append({
                    "Jumlah Cluster": n_clusters,
                    "Silhouette": None,
                    "Davies-Bouldin Index": None,
                    "Waktu Eksekusi (s)": None,
                    "Catatan": "Semua data masuk 1 cluster"
                })
                continue

            silhouette = round(silhouette_score(X_scaled, labels), 4)
            dbi = round(davies_bouldin_score(X_scaled, labels), 4)
            exec_time = round(time.time() - start, 4)

            hasil.append({
                "Jumlah Cluster": n_clusters,
                "Silhouette": silhouette,
                "Davies-Bouldin Index": dbi,
                "Waktu Eksekusi (s)": exec_time,
                "Catatan": "-"
            })

        except Exception as e:
            hasil.append({
                "Jumlah Cluster": n_clusters,
                "Silhouette": None,
                "Davies-Bouldin Index": None,
                "Waktu Eksekusi (s)": None,
                "Catatan": f"Error: {str(e)}"
            })

    return pd.DataFrame(hasil)

file_path = "../dataset/data kopi (prov).xlsx"  # Ganti sesuai path file
hasil_df = evaluasi_kopi_kmeans(file_path)
hasil_df  # ← WAJIB untuk memunculkan output di Jupyter/VSCode


📥 Membaca file: ../dataset/data kopi (prov).xlsx
🔄 Evaluasi KMeans dari cluster 2 sampai 7...



Unnamed: 0,Jumlah Cluster,Silhouette,Davies-Bouldin Index,Waktu Eksekusi (s),Catatan
0,2,0.7246,0.411,0.0066,-
1,3,0.5041,0.6448,0.0074,-
2,4,0.4332,0.5559,0.0073,-
3,5,0.4829,0.5088,0.0076,-
4,6,0.51,0.381,0.0076,-
5,7,0.4834,0.4013,0.0075,-


In [22]:
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.cluster import AgglomerativeClustering
from sklearn.metrics import silhouette_score, davies_bouldin_score
import time

# Fungsi bantu
def extract_year_columns(df, keyword):
    return [col for col in df.columns if keyword in col and col.split()[-1].isdigit()]

# Fungsi evaluasi AHC untuk dataset produksi kopi
def evaluasi_kopi_agglomerative(file_path):
    print("📥 Membaca file:", file_path)
    df = pd.read_excel(file_path)
    lokasi_col = 'Lokasi'

    # Validasi kolom
    produksi_cols = extract_year_columns(df, 'Produksi')
    luas_cols = extract_year_columns(df, 'Luas')
    produktivitas_cols = extract_year_columns(df, 'Produktivitas')
    if not produksi_cols or not luas_cols or not produktivitas_cols:
        raise ValueError("Kolom Produksi, Luas, atau Produktivitas tidak ditemukan.")

    # Hitung total
    df['Total_Produksi'] = df[produksi_cols].sum(axis=1)
    df['Total_Luas'] = df[luas_cols].sum(axis=1)
    df['Total_Produktivitas'] = df[produktivitas_cols].sum(axis=1)
    fitur_cols = ['Total_Produksi', 'Total_Luas', 'Total_Produktivitas']

    # Scaling
    X_scaled = StandardScaler().fit_transform(df[fitur_cols])
    n_samples = X_scaled.shape[0]
    hasil = []

    print("🔄 Evaluasi Agglomerative dari cluster 2 sampai 7...\n")

    for n_clusters in range(2, 8):
        if n_samples <= n_clusters:
            hasil.append({
                "Jumlah Cluster": n_clusters,
                "Silhouette": None,
                "Davies-Bouldin Index": None,
                "Waktu Eksekusi (s)": None,
                "Catatan": "Jumlah data tidak cukup"
            })
            continue

        try:
            start = time.time()
            model = AgglomerativeClustering(n_clusters=n_clusters, linkage='ward')
            labels = model.fit_predict(X_scaled)

            if len(set(labels)) < 2:
                hasil.append({
                    "Jumlah Cluster": n_clusters,
                    "Silhouette": None,
                    "Davies-Bouldin Index": None,
                    "Waktu Eksekusi (s)": None,
                    "Catatan": "Semua data masuk 1 cluster"
                })
                continue

            silhouette = round(silhouette_score(X_scaled, labels), 4)
            dbi = round(davies_bouldin_score(X_scaled, labels), 4)
            exec_time = round(time.time() - start, 4)

            hasil.append({
                "Jumlah Cluster": n_clusters,
                "Silhouette": silhouette,
                "Davies-Bouldin Index": dbi,
                "Waktu Eksekusi (s)": exec_time,
                "Catatan": "-"
            })

        except Exception as e:
            hasil.append({
                "Jumlah Cluster": n_clusters,
                "Silhouette": None,
                "Davies-Bouldin Index": None,
                "Waktu Eksekusi (s)": None,
                "Catatan": f"Error: {str(e)}"
            })

    return pd.DataFrame(hasil)

file_path = "../dataset/data kopi (prov).xlsx"  # Ganti sesuai path kamu
hasil_df = evaluasi_kopi_agglomerative(file_path)
hasil_df


📥 Membaca file: ../dataset/data kopi (prov).xlsx
🔄 Evaluasi Agglomerative dari cluster 2 sampai 7...



Unnamed: 0,Jumlah Cluster,Silhouette,Davies-Bouldin Index,Waktu Eksekusi (s),Catatan
0,2,0.7246,0.411,0.0044,-
1,3,0.5041,0.6448,0.0041,-
2,4,0.5133,0.5843,0.0025,-
3,5,0.5226,0.5037,0.0056,-
4,6,0.51,0.381,0.0041,-
5,7,0.4834,0.4013,0.0045,-
