In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import datetime
import operator
import math
import random

from scipy.linalg import norm
from scipy.spatial.distance import cdist
from collections import defaultdict

from sklearn import datasets
from sklearn.datasets import make_blobs

In [2]:
# ignoring warnings
import warnings
warnings.simplefilter("ignore")
np.random.seed(42)

In [3]:
#Load Dataset Iris
iris=datasets.load_iris()
iris

X = pd.DataFrame(iris.data,columns=['Sepal Length', 'Sepal Width', 'Petal Length', 'Petal Width'])
y = pd.DataFrame(iris.target,columns=['Classes'])
X = X.values
y = y.values
data = X

In [4]:

# Mendefinisikan fungsi jarak euclidean
def euclidean_distance(point1,point2):
    dis=0
    for i in range(len(point1)):
        dis+=(point1[i]-point2[i])**2
    return dis**0.5

# Initializing the Membership Matrix
def initialize_membership_matrix(n,c):
    member_mat=list()
    for i in range(n):
        random_list=[random.random() for x in range(c)]
        summation=sum(random_list)
        for i in range(len(random_list)):
            random_list[i]=random_list[i]/summation
        member_mat.append(random_list)
    return member_mat

def update_centroid(member_mat):
    centroids={}
    for j in range(c):
        temp=[]
        for k in range(p):      
            add=0
            for i in range(n):
                add+=member_mat[i][j]**m
            x=0
            for i in range(n):
                x+=(member_mat[i][j]**m)*(data[i][k])
            val=x/add
            temp.append(val)
        centroids[j]=temp
    return centroids

def update_membership_matrix(member_mat,centroids):
    ratio=float(2/(m-1))
    for i in range(n):
        distances=list()
        for j in range(c):
            distances.append(euclidean_distance(data[i],centroids[j]))
        for j in range(c):
            den = sum([math.pow(float(distances[j]/distances[q]), ratio) for q in range(c)])
            member_mat[i][j] = float(1/den)         
            
    return member_mat
        
def find_cluster(member_mat):
    clusters=list()
    for i in range(n):
        max_val, idx = max((val, idx) for (idx, val) in enumerate(member_mat[i]))
        clusters.append(idx)
    return clusters

def check(old_member_mat,member_mat):
    diff=0
    for i in range(n):
        for j in range(c):
            diff+=old_member_mat[i][j]-member_mat[i][j]
    if(diff<0.01):
        return True
    return False

def fuzzy_c_mean(max_iter):
    member_mat=initialize_membership_matrix(n,c)
    for i in range(max_iter):
        centroids=update_centroid(member_mat)
        old_member_mat=member_mat
        member_mat=update_membership_matrix(member_mat,centroids)
        cluster=find_cluster(member_mat)
        if(check(old_member_mat,member_mat))<0.01:
            print(i)
            break    
    
    return cluster,centroids

# labeling the clusters
def label_clusters(clusters):
    z=0
    o=0
    t=0
    dict=defaultdict(int)

    for i in range(50):
        if(clusters[i]==0):
            z=z+1
        elif(clusters[i]==1):
            o=o+1
        else:
            t=t+1
    dict[z]=0
    dict[o]=1
    dict[t]=2
    classes=[]
    fin1=max(z,max(o,t))
    
    classes.append(dict[fin1])
    z=0
    o=0
    t=0
    for i in range(50,100):
        if(clusters[i]==0):
            z=z+1
        elif(clusters[i]==1):
            o=o+1
        else:
            t=t+1
    dict[z]=0
    dict[o]=1
    dict[t]=2

    fin1=max(z,max(o,t))
    
    classes.append(dict[fin1])
    z=0
    o=0
    t=0
    for i in range(100,150):
        if(clusters[i]==0):
            z=z+1
        elif(clusters[i]==1):
            o=o+1
        else:
            t=t+1
    dict[z]=0
    dict[o]=1
    dict[t]=2
    fin1=max(z,max(o,t))
    classes.append(dict[fin1])
    
    return classes

def confusion_matrix(clusters,classes,labels):
    c_matrix=[[0 for i in range(labels)] for j in range(labels)]
    m=0
    n=0
    q=0
    for i in range(50):
        if clusters[i]==classes[0]:
            m=m+1
        elif clusters[i]==classes[1]:
            n=n+1
        else:
            q=q+1
    c_matrix[0][0]=m
    c_matrix[0][1]=n
    c_matrix[0][2]=q
    m=0
    n=0
    q=0
    for i in range(50,100):
        if clusters[i]==classes[0]:
            m=m+1
        elif clusters[i]==classes[1]:
            n=n+1
        else:
            q=q+1
    c_matrix[1][0]=m
    c_matrix[1][1]=n
    c_matrix[1][2]=q
    m=0
    n=0
    q=0

    for i in range(100,150):
        if clusters[i]==classes[0]:
            m=m+1
        elif clusters[i]==classes[1]:
            n=n+1
        else:
            q=q+1
    c_matrix[2][0]=m
    c_matrix[2][1]=n
    c_matrix[2][2]=q
    
    return c_matrix

# Performance Metric
class Metrics:
    def __init__(self,confusion_m):
        self.confusion_m=confusion_m
        self.total=np.sum(confusion_m)
        self.diagonal=np.sum(np.diag(confusion_m))
    
    def accuracy(self):
        accuracy=(self.diagonal/self.total)
        return accuracy
    
    def recall(self):
        recall=np.diag(self.confusion_m)/np.sum(self.confusion_m,axis=1)
        recall=np.mean(recall)
        return recall
    
    def precision(self):
        precision=np.diag(self.confusion_m)/np.sum(self.confusion_m,axis=0)
        precision=np.mean(precision)
        return precision
    
    def f1_score(self,precision,recall):
        f1_score=(2*precision*recall)/(precision+recall)
        return f1_score



In [5]:
# utility values
m = 2
n = len(data)
c = 3
p = len(data[0])
max_iter = 100

# Prediksi menggunakan FCM Clustering
clusters,centroids=fuzzy_c_mean(max_iter)
print("Final Centroid points are:")
print(centroids)

classes=label_clusters(clusters)

Final Centroid points are:
{0: [5.888932360609777, 2.761069363204441, 4.363951643133037, 1.397315040707886], 1: [5.00396596061125, 3.4140888587783076, 1.482815532605614, 0.2535463174787418], 2: [6.7750112237526245, 3.052382271043554, 5.646781781918657, 2.053546658540863]}


In [6]:
# Performance
matrix=confusion_matrix(clusters,classes,3)
performance=Metrics(matrix)

accuracy=performance.accuracy()
recall=performance.recall()
precision=performance.precision()
f1_score=performance.f1_score(precision,recall)

print('confusion matrix is:',end='\n')
print(np.array(matrix),end='\n')

print("Accuracy of the model is {}".format(accuracy*100))
print("Recall of the model is {}".format(recall*100))
print("Precision of the model is {}".format(precision*100))
print("F1-Score of the model is {}".format(f1_score*100))


confusion matrix is:
[[50  0  0]
 [ 0 47  3]
 [ 0 13 37]]
Accuracy of the model is 89.33333333333333
Recall of the model is 89.33333333333331
Precision of the model is 90.27777777777777
F1-Score of the model is 89.80307248169913
