In [4]:
!pip install scikit-fuzzy


Collecting scikit-fuzzy
  Downloading scikit_fuzzy-0.5.0-py2.py3-none-any.whl.metadata (2.6 kB)
Downloading scikit_fuzzy-0.5.0-py2.py3-none-any.whl (920 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m920.8/920.8 kB[0m [31m1.4 MB/s[0m eta [36m0:00:00[0mta [36m0:00:01[0m
[?25hInstalling collected packages: scikit-fuzzy
Successfully installed scikit-fuzzy-0.5.0


In [2]:
import numpy as np
import pandas as pd
import skfuzzy as fuzz
from sklearn.metrics.pairwise import cosine_similarity

class FuzzyClusteringRecommender:
    """
    A recommender system that uses fuzzy c-means clustering to
    partition movies by average rating, then performs user-based
    collaborative filtering within high-rated clusters.
    """
    def __init__(self, ratings_df, n_clusters=3, m=2.0, error=0.005, maxiter=1000):
        """
        :param ratings_df: DataFrame with columns ['userId', 'movieId', 'rating']
        :param n_clusters: Number of fuzzy clusters
        :param m: Fuzziness parameter
        :param error: Stopping criterion for c-means
        :param maxiter: Maximum iterations for c-means
        """
        self.ratings = ratings_df
        self.n_clusters = n_clusters
        self.m = m
        self.error = error
        self.maxiter = maxiter
        self.movie_ids = None
        self.centers = None
        self.u = None

    def preprocess(self):
        """
        Compute average rating per movie and prepare data for clustering.
        """
        # Compute mean rating per movie
        avg_rating = self.ratings.groupby('movieId')['rating'].mean().reset_index()
        avg_rating.columns = ['movieId', 'avg_rating']
        self.movie_ids = avg_rating['movieId'].values
        # Feature matrix: one feature = avg_rating
        self.features = np.vstack([avg_rating['avg_rating'].values])

    def cluster_movies(self):
        """
        Perform fuzzy c-means clustering on average ratings.
        """
        # Fuzzy C-Means
        cntr, u, _, _, _, _, _ = fuzz.cluster.cmeans(
            self.features, self.n_clusters, self.m,
            error=self.error, maxiter=self.maxiter, init=None
        )
        self.centers = cntr
        self.u = u

    def filter_movies(self, cluster_idx, threshold=0.5):
        """
        Return movies whose membership in cluster cluster_idx >= threshold.
        """
        if self.u is None:
            raise ValueError("Must run cluster_movies() before filtering.")
        membership = self.u[cluster_idx]
        selected = self.movie_ids[membership >= threshold]
        return set(selected)

    def recommend(self, user_id, top_n=10, cluster_idx=None, threshold=0.5):
        """
        Recommend top_n movies for the given user_id.
        Optionally restrict to a fuzzy cluster.
        """
        # Filter movies by cluster if specified
        if cluster_idx is not None:
            candidates = self.filter_movies(cluster_idx, threshold)
        else:
            candidates = set(self.movie_ids)
        
        # Build user-item matrix
        pivot = self.ratings.pivot(index='userId', columns='movieId', values='rating').fillna(0)
        # Similarity between users
        sim_matrix = cosine_similarity(pivot)
        users = pivot.index.tolist()
        user_idx = users.index(user_id)
        user_sims = sim_matrix[user_idx]

        # Weighted sum of other users' ratings
        ratings_array = pivot.values
        weighted_ratings = user_sims.dot(ratings_array) / (np.abs(user_sims).sum() + 1e-9)

        # Create recommendations for candidate movies not yet rated
        user_rated = set(self.ratings[self.ratings['userId'] == user_id]['movieId'])
        unrated = [m for m in candidates if m not in user_rated]
        scores = {m: weighted_ratings[pivot.columns.get_loc(m)] for m in unrated}

        # Return top_n
        recommended = sorted(scores.items(), key=lambda x: x[1], reverse=True)[:top_n]
        return recommended

# Example usage:
if __name__ == '__main__':
    # Load MovieLens 100K
    pathSmall = '../Datasets/ml-latest-small/'
    ratings = pd.read_csv(pathSmall + 'ratings.csv')
    rec = FuzzyClusteringRecommender(ratings, n_clusters=4)
    rec.preprocess()
    rec.cluster_movies()
    # Recommend from highest-rated cluster (e.g., cluster 3)
    suggestions = rec.recommend(user_id=1, top_n=10, cluster_idx=3, threshold=0.6)
    print("Top 10 recommendations for User 1:")
    for movieId, score in suggestions:
        print(f"Movie {movieId}: score {score:.4f}")

Top 10 recommendations for User 1:
Movie 1882: score 0.1838
Movie 1499: score 0.1458
Movie 2053: score 0.1281
Movie 546: score 0.1279
Movie 2412: score 0.1076
Movie 3248: score 0.0933
Movie 2643: score 0.0878
Movie 1381: score 0.0842
Movie 1556: score 0.0823
Movie 2404: score 0.0784
