<a href="https://colab.research.google.com/github/Mohibl-6568/Clothing-Recommender-System/blob/main/ClothingRecommenderSystem(K_means).ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [8]:
import pandas as pd
from collections import defaultdict
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.cluster import KMeans
import time
import math
import numpy as np

In [9]:
# Load and Preprocess Data
df = pd.read_csv('Clothify-fyp-data - main.csv')
df = df[df['Title'].notna() & df['Title'].str.strip().ne('')]
df = df.drop('Owner', axis=1)

# Standardize gender labels
df['For (Male, Female, both)'] = df['For (Male, Female, both)'].replace({
    'male': 'Male', 'female': 'Female', 'both': 'Both', 'unisex': 'Both'
})
tag_columns = [col for col in df.columns if 'Tag' in col]

def combine_tags(df, tag_columns):
    return df[tag_columns].fillna('').apply(lambda row: ' '.join(map(str, row)), axis=1)

df['combined_tags'] = combine_tags(df, tag_columns).str.lower()

In [10]:
# Login + Gender-Based Filtering
print("Welcome to Clothify Interactive Recommender!")
username = input("Enter your username: ").strip()
gender = input("Enter your gender (Male/Female): ").strip().capitalize()

if gender not in ['Male', 'Female']:
    print("Invalid gender input. Showing unfiltered recommendations.")
    filtered_df = df.copy()
else:
    if gender == 'Male':
        filtered_df = df[df['For (Male, Female, both)'].isin(['Male', 'Both'])].copy()
    else:
        filtered_df = df[df['For (Male, Female, both)'].isin(['Female', 'Both'])].copy()

filtered_df['combined_tags'] = combine_tags(filtered_df, tag_columns).str.lower()

Welcome to Clothify Interactive Recommender!
Enter your username: Mohib
Enter your gender (Male/Female): Female


In [11]:
# TF-IDF Tag Vectorization and Clustering
vectorizer = TfidfVectorizer()
tfidf_matrix = vectorizer.fit_transform(filtered_df['combined_tags'])
# Determine optimal number of clusters (can adjust this depending on apni ajeeb si logic)
NUM_CLUSTERS = min(20, len(filtered_df) // 10)  # At least 10 items per cluster
kmeans = KMeans(n_clusters=NUM_CLUSTERS, random_state=42)
item_clusters = kmeans.fit_predict(tfidf_matrix)
filtered_df['cluster'] = item_clusters

In [12]:
# User Profile and Cluster Management
class UserProfile:
    def __init__(self, username):
        self.username = username
        self.tag_history = defaultdict(list)
        self.current_cluster = None
        self.cluster_history = []
        self.DECAY_LAMBDA = 0.01

    def update_profile(self, tags, feedback, timestamp):
        """Update user profile based on item interaction"""
        weight = 1 if feedback == 'L' else -1

        for tag in tags:
            self.tag_history[tag].append((weight, timestamp))

        # Update cluster assignment
        self.update_cluster_assignment(timestamp)

    def get_weighted_tags(self, timestamp=None):
        """Get weighted tags with time decay"""
        if timestamp is None:
            timestamp = time.time()

        weighted_tags = defaultdict(float)

        for tag, entries in self.tag_history.items():
            total_weight = 0
            for weight, entry_time in entries:
                age = timestamp - entry_time
                decayed_weight = weight * math.exp(-self.DECAY_LAMBDA * age)
                total_weight += decayed_weight
            weighted_tags[tag] = total_weight

        return weighted_tags

    def update_cluster_assignment(self, timestamp=None):
        """Update user's cluster assignment based on current profile"""
        if timestamp is None:
            timestamp = time.time()

        weighted_tags = self.get_weighted_tags(timestamp)

        if not weighted_tags:
            # No history yet, assign to random cluster
            self.current_cluster = np.random.choice(NUM_CLUSTERS)
            return

        # Create user vector
        weighted_text = ' '.join([tag for tag, weight in weighted_tags.items()
                                for _ in range(int(round(abs(weight))))])
        user_vector = vectorizer.transform([weighted_text])

        # Find closest cluster centroid
        distances = kmeans.transform(user_vector)
        self.current_cluster = np.argmin(distances)

        # Record cluster history
        self.cluster_history.append((self.current_cluster, timestamp))

    def get_recommendation_candidates(self, filtered_df, seen_items):
        """Get items from current cluster that haven't been seen"""
        if self.current_cluster is None:
            self.update_cluster_assignment()

        cluster_items = filtered_df[filtered_df['cluster'] == self.current_cluster]
        unseen_items = cluster_items[~cluster_items.index.isin(seen_items)]
        return unseen_items.index.tolist()

In [13]:
# Recommendation Loop
user_profile = UserProfile(username)
seen_items = set()

print(f"\nHello, {username}! Let's get started with your personalized recommendations.\n")

for round_num in range(3):  # Number of recommendation rounds
    print(f"\n🔁 Round {round_num + 1}")

    # Get user vector from profile
    weighted_tags = user_profile.get_weighted_tags()
    if weighted_tags:
        weighted_text = ' '.join([tag for tag, weight in weighted_tags.items()
                                for _ in range(int(round(abs(weight))))])
        user_vector = vectorizer.transform([weighted_text])
    else:
        user_vector = None

    # Get candidates from current cluster
    candidate_indices = user_profile.get_recommendation_candidates(filtered_df, seen_items)

    # If not enough in current cluster, look in nearby clusters
    if len(candidate_indices) < 5 and user_vector is not None:
        distances = kmeans.transform(user_vector)
        closest_clusters = np.argsort(distances[0])[:3]  # Top 3 closest clusters
        for cluster in closest_clusters:
            if cluster == user_profile.current_cluster:
                continue
            additional_items = filtered_df[(filtered_df['cluster'] == cluster) &
                                          (~filtered_df.index.isin(seen_items))].index.tolist()
            candidate_indices.extend(additional_items)
            if len(candidate_indices) >= 5:
                break
        candidate_indices = candidate_indices[:5]  # Take top 5

    for idx in candidate_indices:
        item = filtered_df.loc[idx]
        print(f"\nItem {idx}: {item['Title']}")
        print("Tags:", item['combined_tags'])
        feedback = input("Do you like this item? (L = Like / D = Dislike): ").strip().upper()
        seen_items.add(idx)

        tags = item['combined_tags'].split()
        timestamp = time.time()

        if feedback in ['L', 'D']:
            user_profile.update_profile(tags, feedback, timestamp)
        else:
            print("Invalid input. Skipping this item.")

    print("\nCurrent cluster:", user_profile.current_cluster)
    print("Seen items:", len(seen_items))


Hello, Mohib! Let's get started with your personalized recommendations.


🔁 Round 1

Item 7: Dripzora Sweater
Tags: dripzora sweater unisex cotton white tee unique design custom made custom design hilly design mountains winter collection casual clothing casual style round neck comfy breahtable
Do you like this item? (L = Like / D = Dislike): L

Item 8: Running shoes
Tags: electric design unisex sporty  spotswear unique design electrify  flash hiking shoes running shoes sporty shoes washable indoor  outdoor  
Do you like this item? (L = Like / D = Dislike): D

Item 9: Casual Sneaker
Tags: casual sneaks  casual shoes outdoor indoor sporty unique design electrify flash unisex fiber washable water proof party wear  
Do you like this item? (L = Like / D = Dislike): D

Item 12: Electric High Top Shoes
Tags: electric design outdoor indoor comfy hiking shoe trekking shoe comfortable electrics style custom made unisex washable water proof sporty look  
Do you like this item? (L = Like / D = Di