In [None]:
# Install library tambahan jika diperlukan
!pip install scikit-fuzzy pandas scikit-learn matplotlib numpy
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
import numpy as np
import skfuzzy as fuzz
import matplotlib.pyplot as plt
from google.colab import files

# Upload file CSV
uploaded = files.upload()  # Pilih file CSV

# Membaca data dari file CSV
file_name = list(uploaded.keys())[0]
data = pd.read_csv(file_name)

Collecting scikit-fuzzy
  Downloading scikit_fuzzy-0.5.0-py2.py3-none-any.whl.metadata (2.6 kB)
Downloading scikit_fuzzy-0.5.0-py2.py3-none-any.whl (920 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m920.8/920.8 kB[0m [31m8.5 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: scikit-fuzzy
Successfully installed scikit-fuzzy-0.5.0


Saving exported_columns_sungaibatanghari2.csv to exported_columns_sungaibatanghari2.csv


In [None]:
# Pastikan kolom target tersedia
target_columns = ['TSS', 'pH', 'EC', 'TDS', 'CHLA']
data = data[target_columns]

# Mengisi nilai NaN dengan median
data = data.fillna(data.median())

# Menstandarisasi data
scaler = StandardScaler()
scaled_data = scaler.fit_transform(data)


# Fungsi untuk menghitung WL Index
def calculate_wl_index(u, cntr, data_points):
    numerator = 0
    denominator = 0
    num_clusters = len(cntr)

    for i in range(num_clusters):
        cluster_distances = np.linalg.norm(data_points - cntr[i], axis=1)**2
        weighted_distances = np.sum((u[i] ** 2) * cluster_distances)
        sum_membership = np.sum(u[i])
        numerator += weighted_distances / sum_membership if sum_membership != 0 else 0

    inter_cluster_distances = [np.linalg.norm(cntr[i] - cntr[j])**2 for i in range(num_clusters) for j in range(i + 1, num_clusters)]
    if inter_cluster_distances:
        min_distance = np.min(inter_cluster_distances)
        median_distance = np.median(inter_cluster_distances)
        denominator = min_distance + median_distance

    return numerator / denominator if denominator != 0 else float('inf')

# Fungsi untuk menghitung Fuzzy Silhouette Index
def calculate_fsi(u, cntr, data_points, alpha=1):
    """
    Menghitung Fuzzy Silhouette Index (FSI) berdasarkan rumus teoretis.

    Parameters:
    - u: matriks partisi fuzzy (shape: [n_cluster, n_data])
    - cntr: array centroid (shape: [n_cluster, n_features])
    - data_points: array data asli (shape: [n_data, n_features])
    - alpha: parameter kontrol bobot (default = 1)

    Returns:
    - FSI (float)
    """
    n_data = data_points.shape[0]
    n_cluster = cntr.shape[0]
    silhouette_scores = []
    weights = []

    for j in range(n_data):
        # Hitung keanggotaan tertinggi dan kedua tertinggi
        membership = u[:, j]
        sorted_indices = np.argsort(membership)[::-1]  # descending
        p = sorted_indices[0]
        q = sorted_indices[1]

        # Hitung a(i): jarak ke centroid klaster sendiri
        a = np.linalg.norm(data_points[j] - cntr[p])

        # Hitung b(i): jarak ke centroid klaster tetangga
        b = np.linalg.norm(data_points[j] - cntr[q])

        # Silhouette untuk titik j
        if max(a, b) != 0:
            s_j = (b - a) / max(a, b)
        else:
            s_j = 0

        # Bobot berdasarkan beda keanggotaan
        weight = (membership[p] - membership[q]) ** alpha

        silhouette_scores.append(s_j * weight)
        weights.append(weight)

    fsi = np.sum(silhouette_scores) / np.sum(weights)
    return fsi

# Fuzzy C-Means
for n_clusters in [2, 3, 4, 5]:
    # Fuzzy C-Means clustering
    cntr, u, _, _, _, _, _ = fuzz.cluster.cmeans(
        scaled_data.T, n_clusters, 2, error=0.005, maxiter=1000, init=None
    )
    cluster_labels = np.argmax(u, axis=0)  # Klasterisasi berbasis derajat keanggotaan

    # Simpan hasil klasterisasi
    data[f'Cluster_{n_clusters}'] = cluster_labels

    # Hitung jumlah data per klaster
    print(f"Jumlah data per klaster untuk {n_clusters} klaster:")
    for i in range(n_clusters):
        cluster_size = np.sum(cluster_labels == i)
        print(f"  Cluster {i+1}: {cluster_size}")

    # Evaluasi Fuzzy C-Means
    # Partition Coefficient (PC)
    pc = np.mean(np.sum(u**2, axis=0))
    # Partition Entropy (PE)
    pe = -np.mean(np.sum(u * np.log(u), axis=0))
    # Xie-Beni Index (XB)
    min_dist = np.min([np.linalg.norm(cntr[i] - cntr[j]) for i in range(len(cntr)) for j in range(i + 1, len(cntr))])
    distances = np.linalg.norm(scaled_data - cntr[:, None], axis=2)**2  # Menghitung jarak antar data dan pusat klaster
    xb = np.sum(u * distances) / (len(data) * min_dist**2)
    # WL Index (menggantikan MDI)
    wl_index = calculate_wl_index(u, cntr, scaled_data)
    # Fuzzy Silhouette Index (FSI)
    fsi = calculate_fsi(u, cntr, scaled_data, alpha=1)

    # Output evaluasi
    print(f"Evaluasi untuk {n_clusters} Klaster:")
    print(f"  Partition Coefficient (PC): {pc:.4f}")
    print(f"  Partition Entropy (PE): {pe:.4f}")
    print(f"  Xie-Beni Index (XB): {xb:.4f}")
    print(f"  WL Index: {wl_index:.4f}")
    print(f"  Fuzzy Silhouette Index (FSI): {fsi:.4f}")
    print("-" * 50)


Jumlah data per klaster untuk 2 klaster:
  Cluster 1: 81148
  Cluster 2: 48614
Evaluasi untuk 2 Klaster:
  Partition Coefficient (PC): 0.8305
  Partition Entropy (PE): 0.2867
  Xie-Beni Index (XB): 0.1666
  WL Index: 0.0853
  Fuzzy Silhouette Index (FSI): 0.7421
--------------------------------------------------
Jumlah data per klaster untuk 3 klaster:
  Cluster 1: 32016
  Cluster 2: 53894
  Cluster 3: 43852
Evaluasi untuk 3 Klaster:
  Partition Coefficient (PC): 0.7192
  Partition Entropy (PE): 0.5049
  Xie-Beni Index (XB): 0.4051
  WL Index: 0.1493
  Fuzzy Silhouette Index (FSI): 0.6801
--------------------------------------------------
Jumlah data per klaster untuk 4 klaster:
  Cluster 1: 40193
  Cluster 2: 29454
  Cluster 3: 37457
  Cluster 4: 22658
Evaluasi untuk 4 Klaster:
  Partition Coefficient (PC): 0.6408
  Partition Entropy (PE): 0.6745
  Xie-Beni Index (XB): 0.7344
  WL Index: 0.1563
  Fuzzy Silhouette Index (FSI): 0.6396
--------------------------------------------------
J

In [None]:
# Contoh lihat keanggotaan fuzzy untuk 5 data pertama
for i in range(5):
    print(f"Data ke-{i+1}: ", u[:, i])

Data ke-1:  [0.86366321 0.00534141 0.0259492  0.09494113 0.01010505]
Data ke-2:  [0.82971696 0.00594553 0.03052078 0.12236259 0.01145415]
Data ke-3:  [0.84160228 0.00581989 0.02892443 0.11258295 0.01107045]
Data ke-4:  [0.82777992 0.00664287 0.03218866 0.12085689 0.01253165]
Data ke-5:  [0.88406564 0.00410666 0.02087247 0.08307989 0.00787534]


In [None]:
# Simpan hasil ke CSV
output_file = 'fcm_hasil_bobotsama.csv'
data.to_csv(output_file, index=False)
print(f"Hasil klasterisasi disimpan di {output_file}.")

# mengunduh file CSV
files.download(output_file)

Hasil klasterisasi disimpan di fcm_hasil_bobotsama.csv.


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>