# Implementing Fuzzy C-means clustering algorithm using IRIS data to evaluate performance of the algorithm

**Results**:
*   Accuracy using own approach --> 87.88
*   Accuracy using fuzzy-c-means --> 81.79

In [243]:
pip install fuzzy-c-means



In [244]:
import numpy as np
import math
import random
from sklearn.datasets import load_iris 
from fcmeans import FCM

In [245]:
iris = load_iris()
data = iris.data
target = iris.target

In [246]:
def update_mat_num(distance_list, m):
  update_val = []
  for i in range(len(distance)):
    sum = 0.0
    for j in range(len(distance[i])):
      sum += distance[i][j]**2/(distance[i][j])**2
  update_val.append((sum**(1/m-1))**-1)
  return update_val

In [247]:
k = 3
max_epoch = 15
m = 2

mem_mat = []
for i in range(len(data)):
  random_list = [random.random() for i in range(k)]
  sum = np.sum(random_list)
  temp_list = [x / sum for x in random_list]  #First normalization
  mem_mat.append(temp_list) 

for iter in range(max_epoch):
  centroids = []
  updated_mem = []
  denom = []
  numer = []
  distance = []

  for i in range(k):
    sumd = 0.0
    for j in range(len(mem_mat)):
      sumd = sumd + (mem_mat[j][i])**m
    denom.append(sumd)
    temp1 = []
    for col in range(len(data[0])):
      sumn = 0.0
      for j in range(len(mem_mat)):
        sumn = sumn + (mem_mat[j][i])**m * data[j][col]
      temp1.append(sumn)
    numer.append(temp1)

  for i in range(len(numer)):
    temp2 = []
    for j in range(len(numer[0])):
      temp2.append(numer[i][j]/denom[i])
    centroids.append(temp2)
  
  for i in range(len(data)):
    temp3 = []
    for j in range(len(centroids)):
      temp3.append(np.linalg.norm(data[i] - centroids[j]))
    distance.append(temp3)

  for i in range(len(distance)):
    temp_list = []
    for j in range(len(distance[i])):
      sum = 0.0
      for x in range(len(distance[i])):
        sum += distance[i][j]**2/(distance[i][x])**2
      temp_list.append((sum**(1/m-1))**-1)
    updated_mem.append(temp_list)
  mem_mat = updated_mem
print("Cluster centers:\n", centroids[0],"\n", centroids[1], "\n", centroids[2])

Cluster centers:
 [6.197068149173729, 2.912740748925745, 4.782195223504566, 1.6451799833700966] 
 [5.043571859203614, 3.40006072817463, 1.5665887040879574, 0.3101327091532451] 
 [6.207404358053732, 2.907614917536784, 4.812203595119024, 1.6575236082812925]


In [248]:
pred = [[0 for i in range(0)] for j in range(k)]

for i in range(len(data)):
  d1 = np.linalg.norm(data[i] - centroids[0])
  d2 = np.linalg.norm(data[i] - centroids[1])
  d3 = np.linalg.norm(data[i] - centroids[2])
  point = min(d1,d2,d3)

  if point == d1:
    pred[0].append(i)
  if point == d2:
    pred[1].append(i)
  if point == d3:
    pred[2].append(i)

In [249]:
labels = [[0 for i in range(0)] for j in range(k)]
for i in range(len(target)):
  labels[target[i]].append(i)

In [250]:
jaccard = [0,0,0]

for i in range(len(pred)):
  sim = [0, 0, 0]
  for j in range(len(pred[i])):
    # finds the no of similar elements and updates the sim matrix accordingly
    for l in range(3):
      if pred[i][j] in labels[l]:
        sim[l] += 1
  # for finding out the most similar set between target and predicted labels
  max_sim = sim.index(np.amax(sim))
  u_list = list(set(pred[i]) | set(labels[max_sim]))
  i_list = list(set(pred[i]) & set(labels[max_sim]))

  jaccard[i] = len(i_list)/len(u_list)
accuracy = (jaccard[0]*len(pred[0]))/len(data) + (jaccard[1]*len(pred[1]))/len(data) + (jaccard[2]*len(pred[2]))/len(data)

In [251]:
acc = 0
X, y = load_iris(return_X_y=True)
mod = FCM(n_clusters=3, max_iter=15, m=2)
mod.fit(X)
labels2 = np.array(mod.predict(X))
print("Cluster centers:\n", mod.centers)

Cluster centers:
 [[5.8870544  2.7603922  4.361212   1.3959042 ]
 [6.7727475  3.051721   5.6439543  2.0524507 ]
 [5.0039616  3.414181   1.4826653  0.25347885]]


In [252]:
fcm_labels = [[0 for i in range(0)] for j in range(k)]
for i in range(len(labels2)):
  fcm_labels[labels2[i]].append(i)

jaccard2 = [0,0,0]

for i in range(len(fcm_labels)):
  sim = [0, 0, 0]
  for j in range(len(fcm_labels[i])):
    # finds the no of similar elements and updates the sim matrix accordingly
    for l in range(3):
      if fcm_labels[i][j] in labels[l]:
        sim[l] += 1
  # for finding out the most similar set between target and predicted labels
  max_sim = sim.index(np.amax(sim))
  u_list = list(set(fcm_labels[i]) | set(labels[max_sim]))
  i_list = list(set(fcm_labels[i]) & set(labels[max_sim]))

  jaccard2[i] = len(i_list)/len(u_list)

accuracy2 = (jaccard2[0]*len(fcm_labels[0]))/len(data) + (jaccard2[1]*len(fcm_labels[1]))/len(data) + (jaccard2[2]*len(fcm_labels[2]))/len(data)

In [254]:
print("Accuracy using own approach -->",round(accuracy*100,2))
print("Accuracy using fuzzy-c-means -->",round(accuracy2*100,2))

Accuracy using own approach --> 87.88
Accuracy using fuzzy-c-means --> 81.79
