<a href="https://colab.research.google.com/github/OmarK920/Data-Science-Projects/blob/main/FCM_implementation.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import numpy as np
import pandas as pd

In [None]:
#Given data points and initial clusters
DataPoints = np.array([[3, 5], [4, 6], [2, 8], [2, 3], [1, 4]])
Clusters = np.array([[2, 4], [1, 7]])

In [None]:
# Compute the "Euclidean distance"
def distance(x, y):
    return np.sqrt(np.sum(pow((x-y), 2)))

In [None]:
from scipy.spatial.distance import cdist
#This implementation uses the cdist function from SciPy's spatial.distance module to compute the Euclidiean distances between each point in data and each point in clusters.
#The resulting array contains the Euclidean distances between each pair of points.
#i chose this method because it is more efficient than using nested loops to compute the same outcome and it gave me the same results in both implementations

def calculateDistancescipy(data, clusters):
    return cdist(data, clusters)


In [None]:
distances = np.array(calculateDistancescipy(DataPoints,Clusters))

In [None]:
#Calculate the membership for each point for each cluster
#Note ===> "q" represents how much each data point can belong to multiple clusters
#Function steps:
#1) initialize an array of 'membership'
#2)iterates through each element in the 'distances' array which i created above.
#3)Calculate the membership for each point for each cluster

def calculateMembership(distances, q):
    membership = np.zeros((len(distances), len(distances[0])))
    for i in range(len(distances)):
        for j in range(len(distances[0])):
            if distances[i][j] == 0:
                membership[i][j] = 1
            else:
                membership[i][j] = 1 / np.sum(pow((distances[i] / distances[i][j]), (2/(q-1))))
    return membership


In [None]:
calculateMembership(distances,2)

array([[0.2       , 0.8       ],
       [0.44444444, 0.55555556],
       [0.88888889, 0.11111111],
       [0.05555556, 0.94444444],
       [0.1       , 0.9       ]])

In [None]:
membership = np.array(calculateMembership(distances,2))

In [None]:
# this function is used to update the cluster centers
def updateClusterMean(data, memberships, q):

    n_features = data.shape[1]
    K = memberships.shape[1]
    means = np.zeros((K, n_features))
    Um = memberships ** q

    # Loop over all clusters
    for j in range(K):

        uj = Um[:, j].reshape(-1, 1)
        means[j, :] = (data * uj).sum(axis=0) / uj.sum()

    return means


In [None]:
updateClusterMean(DataPoints,membership ,2 )

array([[2.4084223 , 7.45183867],
       [2.16796477, 4.15572554]])

In [None]:
#Calculate the objective function value
def calculateObjectiveFunc(data, membership, clusters, q, lamda):
    J = 0
    for i in range(len(data)):
        for j in range(len(clusters)):
            J += pow(membership[i][j], q) * pow(distance(data[i], clusters[j]), 2)#sums up each data point and cluster pair to the objective function value.
    J *= lamda# scales the objective function
    for i in range(len(data)):
        for j in range(len(clusters)):
            J += pow(membership[i][j], q)#adds the membership value of each data point in each cluster to the objective function value.
    return J

In [None]:
calculateObjectiveFunc(DataPoints ,membership , Clusters , 2 ,1 )

48.70370370370371

In [None]:
# putting all the functions together and repeat till convergence
def reachConvergence(data, clusters, q, lamda):
    # Initialize the memberships and objective function
    membership = np.zeros((len(data), len(clusters)))
    prev_obj = calculateObjectiveFunc(data, membership, clusters, q, lamda)

    # Keep iterating until convergence
    while True:
        # Calculate the distance between data points and cluster centers
        distances = calculateDistancescipy(data, clusters)

        # Calculate the membership for each point for each cluster
        membership = calculateMembership(distances, q)

        # Update the cluster centers
        new_clusters = updateClusterMean(data, membership, q)

        # Calculate the new objective function value
        new_obj = calculateObjectiveFunc(data, membership, new_clusters, q, lamda)

        # Check for convergence
        if abs(new_obj - prev_obj) < 0.01:
            return membership, new_clusters

        # Update the objective function and cluster centers for the next iteration
        prev_obj = new_obj
        clusters = new_clusters


In [None]:
reachConvergence(DataPoints,Clusters,2,1)

(array([[0.48895406, 0.51104594],
        [0.84985091, 0.15014909],
        [0.88759019, 0.11240981],
        [0.03958363, 0.96041637],
        [0.05019534, 0.94980466]]),
 [[2.9588325693135946, 6.756786238891717],
  [1.7190448119462898, 3.733318693008251]])