# Importing libaries

In [None]:
!pip install scikit-fuzzy

Collecting scikit-fuzzy
  Downloading scikit-fuzzy-0.4.2.tar.gz (993 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m994.0/994.0 kB[0m [31m12.2 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Building wheels for collected packages: scikit-fuzzy
  Building wheel for scikit-fuzzy (setup.py) ... [?25l[?25hdone
  Created wheel for scikit-fuzzy: filename=scikit_fuzzy-0.4.2-py3-none-any.whl size=894075 sha256=3061752e5599fdf7e4dc89208042246b5dcf05b7256eb7c79631aee9d80475c2
  Stored in directory: /root/.cache/pip/wheels/4f/86/1b/dfd97134a2c8313e519bcebd95d3fedc7be7944db022094bc8
Successfully built scikit-fuzzy
Installing collected packages: scikit-fuzzy
Successfully installed scikit-fuzzy-0.4.2


In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import skfuzzy as fuzz
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import LabelEncoder
import plotly.graph_objects as go
import plotly.express as px
from sklearn.decomposition import PCA
from sklearn.cluster import AffinityPropagation
from sklearn.metrics import silhouette_score, davies_bouldin_score

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


# Loading & viewing dataset

In [None]:
df_full = pd.read_csv("/content/drive/MyDrive/Data sets/Mall_Customers.csv")

In [None]:
df_full.head()

Unnamed: 0,CustomerID,Genre,Age,Annual Income (k$),Spending Score (1-100)
0,1,Male,19,15,39
1,2,Male,21,15,81
2,3,Female,20,16,6
3,4,Female,23,16,77
4,5,Female,31,17,40


In [None]:
df_full.shape

(200, 5)

# Simple nescessary preprocessing

In [None]:
columns = list(df_full.columns)
features = columns[1:len(columns)-1]
df = df_full[features]

In [None]:
df

Unnamed: 0,Genre,Age,Annual Income (k$)
0,Male,19,15
1,Male,21,15
2,Female,20,16
3,Female,23,16
4,Female,31,17
...,...,...,...
195,Female,35,120
196,Female,45,126
197,Male,32,126
198,Male,32,137


In [None]:
le = LabelEncoder()
df['Genre'] = le.fit_transform(df['Genre'])

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['Genre'] = le.fit_transform(df['Genre'])


In [None]:
# Normalize the dataset
scaler = StandardScaler()
X_normalized = scaler.fit_transform(df)

In [None]:
# Transpose the data for FCM
X_transposed = X_normalized.T
X_transposed.shape

(3, 200)

# Applying fuzzy c means clustering

In [None]:
# Apply Fuzzy C-Means
cntr, u, u0, d, jm, p, fpc = fuzz.cluster.cmeans(X_transposed, c=3, m=1.5, error=0.005, maxiter=1000, init=None)


In [None]:
# Extract the cluster membership
cluster_membership = np.argmax(u, axis=0)
cluster_membership

array([0, 0, 2, 2, 2, 2, 2, 2, 1, 2, 1, 2, 1, 2, 1, 0, 2, 0, 1, 2, 0, 0,
       2, 0, 1, 0, 2, 0, 2, 2, 1, 2, 1, 0, 2, 2, 2, 2, 2, 2, 1, 0, 1, 2,
       2, 2, 2, 2, 2, 2, 2, 0, 2, 1, 2, 1, 2, 1, 2, 1, 1, 0, 1, 2, 1, 0,
       2, 1, 0, 2, 1, 2, 1, 1, 1, 0, 2, 0, 2, 2, 1, 0, 1, 2, 2, 1, 2, 2,
       2, 2, 1, 0, 1, 2, 2, 0, 2, 2, 1, 0, 2, 2, 1, 0, 1, 2, 1, 1, 1, 1,
       1, 2, 2, 0, 2, 2, 1, 2, 2, 2, 0, 2, 2, 0, 2, 2, 0, 0, 1, 0, 0, 0,
       2, 2, 0, 2, 2, 0, 0, 2, 2, 0, 2, 2, 0, 0, 0, 2, 2, 0, 0, 0, 2, 2,
       2, 2, 0, 2, 0, 2, 2, 2, 0, 2, 0, 2, 0, 2, 2, 0, 0, 0, 0, 0, 2, 2,
       1, 0, 1, 0, 2, 2, 0, 2, 2, 0, 2, 0, 2, 2, 2, 2, 0, 2, 2, 2, 2, 0,
       0, 0])

# Visualizing the clusters with plotly

In [None]:
# Plot the distribution of Spending Score
fig1 = px.histogram(df_full, x='Spending Score (1-100)', title='Distribution of Spending Score')

# Plot the FCM clustering results
df_clustered = pd.DataFrame(X_normalized, columns=['Genre' ,'Age', 'Annual Income (k$)'])
df_clustered['Cluster'] = cluster_membership
fig2 = px.scatter(df_clustered, x='Age', y='Annual Income (k$)', color='Cluster', title='Fuzzy C-Means Clustering Results')

# Display the plots
fig1.show()
fig2.show()

In [None]:
# Display the Fuzzy Partition Coefficient (FPC)
print(f'Fuzzy Partition Coefficient (FPC): {fpc:.4f}')


Fuzzy Partition Coefficient (FPC): 0.7692


# Applying PCA for better clusters visualization

In [None]:
pca = PCA(n_components=2)
X_pca = pca.fit_transform(X_normalized)

In [None]:
df_pca = pd.DataFrame(X_pca, columns=['PC1', 'PC2'])
df_pca['Cluster'] = cluster_membership
fig = px.scatter(df_pca, x='PC1', y='PC2', color='Cluster', title='Fuzzy C-Means Clustering Results')
fig.show()

# Applying affinity propagation clustering

In [None]:
af = AffinityPropagation(preference= -50, max_iter=2000, damping=0.9, random_state=40, convergence_iter=10).fit(X_normalized)

In [None]:
# Predict cluster labels
cluster_labels = af.labels_
cluster_labels

array([0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 2, 0, 1, 0, 2, 3, 0, 3, 2, 0, 3, 3,
       1, 3, 1, 3, 1, 3, 0, 0, 2, 0, 2, 3, 1, 0, 1, 0, 0, 0, 1, 3, 2, 0,
       1, 0, 1, 0, 0, 0, 1, 3, 0, 2, 1, 2, 1, 2, 0, 2, 2, 3, 1, 1, 2, 3,
       1, 1, 3, 0, 2, 1, 1, 1, 2, 3, 1, 3, 0, 1, 2, 3, 2, 1, 0, 2, 1, 0,
       0, 1, 1, 3, 2, 4, 0, 3, 1, 0, 2, 3, 0, 1, 2, 3, 2, 0, 1, 2, 2, 2,
       2, 0, 4, 3, 0, 0, 1, 1, 1, 1, 3, 4, 4, 3, 4, 4, 3, 3, 2, 3, 2, 3,
       4, 4, 3, 4, 4, 3, 3, 4, 1, 3, 4, 4, 3, 3, 2, 4, 4, 3, 3, 3, 4, 4,
       4, 4, 3, 4, 3, 4, 1, 4, 3, 4, 2, 4, 3, 4, 4, 3, 3, 3, 3, 3, 4, 4,
       2, 3, 2, 3, 4, 4, 3, 4, 4, 3, 4, 3, 4, 4, 4, 4, 3, 4, 4, 4, 4, 3,
       3, 3])

In [None]:
# Assuming 'cluster_labels' are the predicted labels from AffinityPropagation
silhouette = silhouette_score(X_normalized, cluster_labels)
davies_bouldin = davies_bouldin_score(X_normalized, cluster_labels)

print("Silhouette Score:", silhouette)
print("Davies-Bouldin Index:", davies_bouldin)

Silhouette Score: 0.3906970748351351
Davies-Bouldin Index: 0.8505959171356032


#Visualize the clusters with plotly

In [None]:
df_pca = pd.DataFrame(X_pca, columns=['PC1', 'PC2'])
df_pca['Cluster'] = af.labels_
fig = px.scatter(df_pca, x='PC1', y='PC2', color='Cluster', title='Affinity propagation clustering results')
fig.show()