In [1]:
import pandas as pd
import numpy as np
import random
import math
import matplotlib.pyplot as plt
from sklearn import datasets
from sklearn import preprocessing
import time

from sklearn.cluster import KMeans
from sklearn.metrics import silhouette_score

wine = datasets.load_iris()
df = pd.DataFrame(wine.data, columns=wine.feature_names)
df['target'] = wine.target
mat=df.to_numpy()
print(wine.DESCR)
print("Shape of matrix loaded:",mat.shape)
print("Number of Features:",mat.shape[1]-1)
print("Number of Patterns:",mat.shape[0])

.. _iris_dataset:

Iris plants dataset
--------------------

**Data Set Characteristics:**

    :Number of Instances: 150 (50 in each of three classes)
    :Number of Attributes: 4 numeric, predictive attributes and the class
    :Attribute Information:
        - sepal length in cm
        - sepal width in cm
        - petal length in cm
        - petal width in cm
        - class:
                - Iris-Setosa
                - Iris-Versicolour
                - Iris-Virginica
                
    :Summary Statistics:

                    Min  Max   Mean    SD   Class Correlation
    sepal length:   4.3  7.9   5.84   0.83    0.7826
    sepal width:    2.0  4.4   3.05   0.43   -0.4194
    petal length:   1.0  6.9   3.76   1.76    0.9490  (high!)
    petal width:    0.1  2.5   1.20   0.76    0.9565  (high!)

    :Missing Attribute Values: None
    :Class Distribution: 33.3% for each of 3 classes.
    :Creator: R.A. Fisher
    :Donor: Michael Marshall (MARSHALL%PLU@io.arc.nasa.gov)
    :

1.

In [2]:
def EuclideanDistance(x, y):
    S = 0 
    for i in range(x.shape[0]):
        S += math.pow(x[i] - y[i], 2)
    return S



In [3]:
partitionMatrix=[]

np.random.shuffle(mat)
X=mat[:,:-1]
X = preprocessing.normalize(X)

for i in range(0,X.shape[0]):
    partitionMatrix.append(np.random.dirichlet(np.ones(3),size=1)[0])
numItr=30
m=2
c = np.zeros(shape=(3,X[0].shape[0]))
for itr in range(0,numItr):
    cDiv=[0,0,0]
    for row in range(len(partitionMatrix)):
        cDiv+=partitionMatrix[row]**m
        for clust in range(len(partitionMatrix[0])):
            c[clust]=c[clust]+(partitionMatrix[row][clust]**m)*X[row]
    for clust in range(len(partitionMatrix[0])):
        c[clust]=c[clust]/cDiv[clust]
    for row in range(len(partitionMatrix)):
        for clust in range(len(partitionMatrix[0])):
            partitionMatrix[row][clust]=0
            for clustItr in range(len(partitionMatrix[0])):
                den=EuclideanDistance(X[row],c[clustItr])
                if den!=0:
                    partitionMatrix[row][clust]+=EuclideanDistance(X[row],c[clust])/den
                
print("Partition Matrix:")
for row in partitionMatrix:
    print(row)
Y=np.zeros(shape=(X.shape[0],1))
for sample in range(X.shape[0]):
    hp_min=min(partitionMatrix[sample])
    best, = np.where(np.isclose(partitionMatrix[sample], hp_min))
    Y[sample]= best[0]
SSE=0
for row in range(X.shape[0]):
    SSE+=EuclideanDistance(X[row,:],c[int(Y[row])])

print("SSE Fuzzy clustering : ",SSE)
print(f'Silhouette Score fuzzy clustering(n=3): {silhouette_score(X, Y)}')
            
    

Partition Matrix:
[ 2.02547749 79.50067651  2.02547749]
[  2.01645906 122.51360662   2.01645906]
[231.16774217   1.00872723 231.16774217]
[ 2.02218108 91.16695151  2.02218108]
[ 2.03889034 52.42664775  2.03889034]
[  2.00576197 348.10328068   2.00576197]
[112.18183878   1.01815181 112.18183878]
[12.94588918  1.182717   12.94588918]
[21.42495345  1.10296035 21.42495345]
[14.81770181  1.15603421 14.81770181]
[ 2.02301269 87.90858612  2.02301269]
[18.54330032  1.12089486 18.54330032]
[ 2.03792544 53.7350466   2.03792544]
[22.60958997  1.0970422  22.60958997]
[20.58854365  1.10759315 20.58854365]
[38.39800527  1.05494807 38.39800527]
[ 2.02386801 84.79417884  2.02386801]
[55.88534878  1.03711584 55.88534878]
[25.10150453  1.08657445 25.10150453]
[590.90394141   1.00339614 590.90394141]
[  2.00755778 265.62789659   2.00755778]
[15.55861524  1.14750769 15.55861524]
[  2.01194041 168.498454     2.01194041]
[83.48902219  1.02454318 83.48902219]
[  2.0024107  830.63615028   2.0024107 ]
[20.6111

  y = column_or_1d(y, warn=True)


2.

In [4]:
print("-"*100 + "\nKMeans using Library function")
KMean= KMeans(
    init="random",
n_clusters=3,
n_init=30,
max_iter=500,
)

X=mat[:,:-1]
X = preprocessing.normalize(X)
KMean.fit(X)
label=KMean.predict(X)
print(f'Silhouette Score(n=3): {silhouette_score(X, label)}')

----------------------------------------------------------------------------------------------------
KMeans using Library function
Silhouette Score(n=3): 0.5763264768222824
