### Import Library

In [20]:
import pandas as pd
import numpy as np
import random
import operator
import math

### reading the input csv file

In [21]:
df_full = pd.read_csv("Data.csv")
columns = list(df_full.columns)
features = columns[:len(columns)-1]
class_labels = list(df_full[columns[-1]])
df = df_full[features]

In [22]:
# Jumlah atribut
num_attr = len(df.columns) - 1

In [23]:
# Jumlah Cluster
k = 2

In [24]:
# Maksimum Iterasi
MAX_ITER = 100

In [25]:
# Jumlah data poin
n = len(df)

In [26]:
# Fuzzy parameter
m = 2.00

#### Fungsi akurasi untuk menghitung akurasi dari hasil

In [27]:
def accuracy(cluster_labels, class_labels):
    county = [0,0]
    countn = [0,0]
    tp = [0,0]
    tn = [0,0]
    fp = [0,0]
    fn = [0,0]

    for i in range(len(df)):
        # Yes = 1, No = 0
        if cluster_labels[i] == 1 and class_labels[i] == "Yes":
            tp[0] = tp[0] + 1
        if cluster_labels[i] == 0 and class_labels[i] == "No":
            tn[0] = tn[0] + 1
        if cluster_labels[i] == 1 and class_labels[i] == "No":
            fp[0] = fp[0] + 1
        if cluster_labels[i] == 0 and class_labels[i] == "Yes":
            fn[0] = fn[0] + 1
    
    for i in range(len(df)):
        # Yes = 0, No = 1
        if cluster_labels[i] == 0 and class_labels[i] == "Yes":
            tp[1] = tp[1] + 1
        if cluster_labels[i] == 1 and class_labels[i] == "No":
            tn[1] = tn[1] + 1
        if cluster_labels[i] == 0 and class_labels[i] == "No":
            fp[1] = fp[1] + 1
        if cluster_labels[i] == 1 and class_labels[i] == "Yes":
            fn[1] = fn[1] + 1
    
    a0 = float((tp[0] + tn[0])) / (tp[0] + tn[0] + fn[0] + fp[0])
    a1 = float((tp[1] + tn[1])) / (tp[1] + tn[1] + fn[1] + fp[1])
    p0 = float(tp[0]) / (tp[0] + fp[0])
    p1 = float(tp[1]) / (tp[1] + fp[1])
    r0 = float(tp[0]) / (tp[0] + fn[0])
    r1 = float(tp[1]) / (tp[1] + fn[1])

    accuracy = [a0 * 100, a1 * 100]
    precision = [p0 * 100, p1 * 100]
    recall = [r0 * 100, r1 * 100]

    return accuracy, precision, recall

### Inisialisasi Jumlah Matriks dengan Random Value

In [28]:
def initializeMembershipMatrix():
    membership_mat = list()
    for i in range(n):
        random_num_list = [random.random() for i in range(k)]
        summation = sum(random_num_list)
        temp_list = [x/summation for x in random_num_list]
        membership_mat.append(temp_list)
    return membership_mat

### menghitung pusat cluster dengan setiap iterasi

In [29]:
def calculateClusterCenter(membership_mat):
    cluster_mem_val = list(zip(*membership_mat))
    cluster_centers = list()
    for j in range(k):
        x = list(cluster_mem_val[j])
        xraised = [e ** m for  e in x]
        denominator = sum(xraised)
        temp_num = list()
        for i in range(n):
            data_point = list(df.iloc[i])
            prod = [xraised[i] * val for val in data_point]
            temp_num.append(prod)
        numerator = map(sum, zip(*temp_num))
        center = [z / denominator for z in numerator]
        cluster_centers.append(center)
    return cluster_centers

### update nilai anggota dengan pusat cluster

In [30]:
def updateMembershipValue(membership_mat, cluster_centers):
    p = float(2/(m-1))
    for i in range(n):
        x = list(df.iloc[i])
        distances = [np.linalg.norm(list(map(operator.sub, x, cluster_centers[j]))) for j in range(k)]
        for j in range(k):
            den = sum([math.pow(float(distances[j]/ distances[c]), p) for c in range(k)])
            membership_mat[i][j] = float(1 / den)
    return membership_mat

def getClusters(membership_mat):
    cluster_labels = list()
    for i in range(n):
        max_val, idx = max((val, idx) for (idx, val) in enumerate(membership_mat[i]))
        cluster_labels.append(idx)
    return cluster_labels    

### final fungsi FCM

In [31]:
def fuzzyCMeansClustering():
    # membership matrix
    membership_mat = initializeMembershipMatrix()
    curr = 0
    while curr <= MAX_ITER:
        cluster_centers = calculateClusterCenter(membership_mat)
        membership_mat = updateMembershipValue(membership_mat, cluster_centers)
        cluster_labels = getClusters(membership_mat)
        curr += 1
    
    print(membership_mat)
    return cluster_labels, cluster_centers

### memanggil fungsi utama dan mengirimkan hasil dalam label dan pusat cluster

In [32]:
print("printing the final membership matrix")

labels, centers = fuzzyCMeansClustering()

print("printing cluster centers")
print(centers)

printing the final membership matrix
[[0.49713553994914, 0.5028644600508599], [0.5016284632112363, 0.4983715367887637], [0.5002596462368533, 0.4997403537631467], [0.5018221790854075, 0.4981778209145925], [0.49867687191880694, 0.5013231280811931], [0.49639711902920375, 0.5036028809707963], [0.4978978714375035, 0.5021021285624965], [0.4972078906989582, 0.5027921093010417], [0.4990773682882467, 0.5009226317117534], [0.4962012665796543, 0.5037987334203456], [0.4979701200698051, 0.5020298799301948], [0.4982240780021051, 0.5017759219978949], [0.5006191912774172, 0.49938080872258284], [0.4973335274880465, 0.5026664725119535], [0.49704813410337967, 0.5029518658966203], [0.498377684370484, 0.501622315629516], [0.49591885145568704, 0.504081148544313], [0.49894645734094306, 0.5010535426590569], [0.4992936447993235, 0.5007063552006765], [0.5028774302671215, 0.49712256973287855], [0.49692290606944545, 0.5030770939305546], [0.4966410851374045, 0.5033589148625954], [0.5026652845231515, 0.497334715476

### menghitung hasil akurasi

In [33]:
a,p,r = accuracy(labels, class_labels)

### cetak nilai akurasi

In [34]:
print("Accuracy = " + str(a))
print("Precision = " + str(p))
print("Recall = " + str(r))

Accuracy = [25.454545454545453, 74.54545454545455]
Precision = [32.0, 88.57142857142857]
Recall = [43.63636363636363, 56.36363636363636]
