In [27]:
import pandas as pd
import numpy as np
import skfuzzy as fuzz
from sklearn.preprocessing import StandardScaler

In [28]:
pathSmall = '../Datasets/ml-latest-small/'
ratings = pd.read_csv(pathSmall + 'ratings.csv')

In [29]:
# Create user-item matrix
user_item = ratings.pivot_table(index='userId', columns='movieId', values='rating')

# Fill missing values (e.g., with zeros or user mean)
# Here, we fill with user mean to preserve relative preferences
def fill_with_user_mean(row):
    return row.fillna(row.mean())

user_item_filled = user_item.apply(fill_with_user_mean, axis=1)

In [30]:
# Standardize data
scaler = StandardScaler()
user_item_scaled = scaler.fit_transform(user_item_filled)

# Transpose data: features (movies) x samples (users)
data = user_item_scaled.T

In [33]:
# Fuzzy c-means parameters
n_clusters = 5    # Number of clusters
m = 2.0           # Fuzziness parameter
error = 0.005     # Stopping criterion
maxiter = 1000    # Maximum iterations
seed = 42         # For reproducibility

# Run fuzzy c-means
cntr, u, u0, d, jm, p, fpc = fuzz.cluster.cmeans(
    data, c=n_clusters, m=m, error=error, maxiter=maxiter, init=None, seed=seed
)

In [34]:
# u: final membership matrix (shape: n_clusters x n_samples)
# Assign each user to the cluster with highest membership
cluster_labels = np.argmax(u, axis=0)

# Create a DataFrame with user cluster labels and membership degrees
user_clusters = pd.DataFrame(u.T, columns=[f"Cluster_{i}" for i in range(n_clusters)])
user_clusters['AssignedCluster'] = cluster_labels
user_clusters['UserId'] = user_item_filled.index.values

# Merge cluster labels back to user-item matrix if needed
user_item_clusters = user_item_filled.merge(user_clusters[['UserId', 'AssignedCluster']], left_index=True, right_on='UserId')

# Display Fuzzy Partition Coefficient (FPC) as a measure of clustering quality
print(f"Fuzzy Partition Coefficient (FPC): {fpc:.4f}")

# Inspect first few users with cluster assignments
print(user_clusters.head())

Fuzzy Partition Coefficient (FPC): 0.6219
   Cluster_0  Cluster_1  Cluster_2  Cluster_3  Cluster_4  AssignedCluster  \
0   0.009325   0.905891   0.019841   0.061026   0.003918                1   
1   0.003539   0.011186   0.014679   0.969592   0.001004                3   
2   0.078244   0.015093   0.038928   0.023751   0.843984                4   
3   0.185318   0.034478   0.635012   0.119111   0.026080                2   
4   0.023345   0.006865   0.932487   0.033819   0.003484                2   

   UserId  
0       1  
1       2  
2       3  
3       4  
4       5  
