In [None]:
%matplotlib inline

# Fuzzy C-Means

Hi class, lets explore the vallina fuzzy c-means

First, lets read in the IRIS data set.

In [None]:
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
from sklearn import datasets
import numpy as np

# import some data to play with
iris = datasets.load_iris()
print( "Shape of iris:", np.shape(iris.data) )
N = iris.data.shape[0]
D = iris.data.shape[1]

# lets plot all the individual features
fig, axs = plt.subplots(1, 4, sharey=True, tight_layout=True)
for i in range(4):
    X = iris.data[:,i]
    x_min, x_max = X[:].min() - .5, X[:].max() + .5
    axs[i].hist(X, bins=N)

Now, plot all the tuples

In [None]:
fig, axs = plt.subplots(4, 4, sharey=True, figsize=(10, 9))
for i in range(4):
    for j in range(4):
        X1 = iris.data[:,i]
        X2 = iris.data[:,j]
        y = iris.target
        axs[i,j].scatter(X1, X2, c=y, cmap=plt.cm.Set1, edgecolor='k')
        plt.xticks(())
        plt.yticks(())

Now, the tripples

In [None]:
# (1,2,3)
fig = plt.figure(figsize=(8, 6))
ax = Axes3D(fig, elev=-150, azim=110)
ax.scatter(iris.data[:,0], iris.data[:,1], iris.data[:,2], c=y, cmap=plt.cm.Set1, edgecolor='k', s=40)
plt.show()
# (1,2,4)
fig = plt.figure(figsize=(8, 6))
ax = Axes3D(fig, elev=-150, azim=110)
ax.scatter(iris.data[:,0], iris.data[:,1], iris.data[:,3], c=y, cmap=plt.cm.Set1, edgecolor='k', s=40)
plt.show()
# (2,3,4)
fig = plt.figure(figsize=(8, 6))
ax = Axes3D(fig, elev=-150, azim=110)
ax.scatter(iris.data[:,1], iris.data[:,2], iris.data[:,3], c=y, cmap=plt.cm.Set1, edgecolor='k', s=40)
plt.show()

Now, run the simple version of the FCM

In [None]:
from sklearn.metrics.pairwise import euclidean_distances
from IPython import display

X = iris.data  

# lets pick prototype's from the data randomly
C = 3
Prototypes = np.zeros((C,D))
for i in range(C):
    WhichSample = np.random.randint(0, N)
    Prototypes[i,:] = X[WhichSample,:]
print( "Prototypes start" )
print( Prototypes )

# this is our membership matrix
MembMatrix = np.zeros((N,C))

# fuzzy factor
M = 1.5

# run that algorithm
T = 300
for t in range(T):
    
    # get pair wise distances
    X_P_DistMatrix = euclidean_distances(Prototypes,X)

    # update memb matrix
    for i in range(C):
        for n in range(N):
            sumv = 0
            top = X_P_DistMatrix[i,n]
            for m in range(C):                
                bottom = X_P_DistMatrix[m,n]
                sumv = sumv + pow(top / (bottom + np.finfo(float).eps),1.0/(M-1.0))
            MembMatrix[n,i] = min(max(1.0 / (sumv + np.finfo(float).eps),0.0),1.0)            
    
    # update protos
    for c in range(C):
        Top = np.zeros((1,D))
        Bottom = 0        
        for n in range(N):
            Top = Top + ( X[n,:] * pow(MembMatrix[n,c],M) )
            Bottom = Bottom + pow(MembMatrix[n,c],M)
        Prototypes[c,:] = Top / (Bottom + np.finfo(float).eps)
    
print( "Prototypes end" )
print( Prototypes )

Plot it (well, in a few dims)

In [None]:
import pylab as pl

Dim1 = 2
X1 = iris.data[:,Dim1]
X2 = iris.data[:,Dim1+1]
pl.scatter(X1, X2, edgecolor='b')  
pl.plot(Prototypes[:,Dim1],Prototypes[:,Dim1+1],'xr')

Show the membership matrix as an "image"

In [None]:
colors = ["skyblue","red","green"]

plt.figure( figsize=(10, 4) )

for i in range(C):
    b=range(0,N)
    plt.fill_between( b, MembMatrix[:,i], color=colors[i], alpha=0.4)

Harden the matrix

In [None]:
Harden = np.argmax( MembMatrix, axis=1 )
print(Harden)

Show the results

In [None]:
# what we got
plt.figure( figsize=(10, 9) )
i = 1
j = 2
X1 = iris.data[:,i]
X2 = iris.data[:,j]
y = Harden
plt.scatter(X1, X2, c=y, cmap=plt.cm.Set1, edgecolor='k')
plt.xticks(())
plt.yticks(())

# real results
plt.figure( figsize=(10, 9) )
i = 1
j = 2
X1 = iris.data[:,i]
X2 = iris.data[:,j]
y = iris.target
plt.scatter(X1, X2, c=y, cmap=plt.cm.Set1, edgecolor='k')
plt.xticks(())
plt.yticks(())