In [34]:
import numpy as np
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

# from sklearn.neighbors import NearestNeighbors
from IPython.display import Image, display

In [35]:
# --- Load data ---
styles = pd.read_csv("styles.csv", on_bad_lines='skip')
images = pd.read_csv("images.csv")

In [36]:
images['id'] = images['filename'].str.replace('.jpg', '', regex=False).astype(int)

In [37]:
# merge datset
df = pd.merge(styles, images, on='id')

In [38]:
df.head(1)

Unnamed: 0,id,gender,masterCategory,subCategory,articleType,baseColour,season,year,usage,productDisplayName,filename,link
0,15970,Men,Apparel,Topwear,Shirts,Navy Blue,Fall,2011.0,Casual,Turtle Check Men Navy Blue Shirt,15970.jpg,http://assets.myntassets.com/v1/images/style/p...


In [39]:
# remove null rows
df.dropna(subset=[
    'productDisplayName', 'gender', 'masterCategory', 'subCategory',
    'articleType', 'baseColour', 'season', 'usage', 'link'
], inplace=True)

In [40]:
df['image_url'] = df['link']

In [41]:
df.head(1)

Unnamed: 0,id,gender,masterCategory,subCategory,articleType,baseColour,season,year,usage,productDisplayName,filename,link,image_url
0,15970,Men,Apparel,Topwear,Shirts,Navy Blue,Fall,2011.0,Casual,Turtle Check Men Navy Blue Shirt,15970.jpg,http://assets.myntassets.com/v1/images/style/p...,http://assets.myntassets.com/v1/images/style/p...


In [42]:
def build_tfidf(df, max_features=3000):
    df['combined'] = (
        df['gender'].str.lower() + ' ' +
        df['masterCategory'].str.lower() + ' ' +
        df['subCategory'].str.lower() + ' ' +
        df['articleType'].str.lower() + ' ' +
        df['baseColour'].str.lower() + ' ' +
        df['season'].str.lower() + ' ' +
        df['usage'].str.lower() + ' ' +
        df['productDisplayName'].str.lower()
    )
    tfidf = TfidfVectorizer(stop_words='english', max_features=max_features)
    tfidf_matrix = tfidf.fit_transform(df['combined'])
    return tfidf_matrix

In [57]:
def find_product(name):
    name = name.lower()
    match = df[df['productDisplayName'].str.lower().str.contains(name)]
    if match.empty:
        return None
    return match.iloc[0]

In [58]:
def build_tfidf(data, max_features=3000):
    data['combined'] = (
        data['gender'].str.lower() + ' ' +
        data['masterCategory'].str.lower() + ' ' +
        data['subCategory'].str.lower() + ' ' +
        data['articleType'].str.lower() + ' ' +
        data['baseColour'].str.lower() + ' ' +
        data['season'].str.lower() + ' ' +
        data['usage'].str.lower() + ' ' +
        data['productDisplayName'].str.lower()
    )
    vectorizer = TfidfVectorizer(stop_words='english', max_features=max_features)
    tfidf_matrix = vectorizer.fit_transform(data['combined'])
    return tfidf_matrix

In [59]:
def recommend(name, max_features=3000, top_n=5):
    product = find_product(name)
    if product is None:
        print("❌ Product not found.")
        return None

    print(f"🔍 Closest match: {product['productDisplayName']}")

    # 🧽 Filter by same gender + articleType
    filtered = df[
        (df['gender'] == product['gender']) &
        (df['articleType'] == product['articleType'])
    ].copy()

    if len(filtered) <= 1:
        print("⚠️ Not enough items in this category to recommend.")
        return None

    tfidf_matrix = build_tfidf(filtered, max_features=max_features)

    # Cosine similarity
    from sklearn.metrics.pairwise import cosine_similarity
    idx = filtered.index.get_loc(product.name)
    cosine_sim = cosine_similarity(tfidf_matrix[idx], tfidf_matrix).flatten()

    # Top results
    similar_indices = cosine_sim.argsort()[-top_n-1:-1][::-1]
    results = filtered.iloc[similar_indices]

    for _, row in results.iterrows():
        print(f"\n🛍️ {row['productDisplayName']} ({row['articleType']} - {row['baseColour']})")
        display(Image(url=row['image_url'], width=200))

    return results[['productDisplayName', 'articleType', 'baseColour', 'image_url']]


In [87]:
recommend("white shirt", max_features=5000)

🔍 Closest match: Scullers Men Scul Purple White Shirt

🛍️ Scullers Men Scul Purple White Shirts (Shirts - Purple)



🛍️ Scullers Men Scul Purple Shirt (Shirts - Purple)



🛍️ Scullers Men's Scul Purple Shirt (Shirts - Purple)



🛍️ Scullers Men Scul Purple Shirts (Shirts - Purple)



🛍️ Scullers Men Scul Purple Shirts (Shirts - Purple)


Unnamed: 0,productDisplayName,articleType,baseColour,image_url
32775,Scullers Men Scul Purple White Shirts,Shirts,Purple,http://assets.myntassets.com/v1/images/style/p...
28857,Scullers Men Scul Purple Shirt,Shirts,Purple,http://assets.myntassets.com/v1/images/style/p...
9538,Scullers Men's Scul Purple Shirt,Shirts,Purple,http://assets.myntassets.com/v1/images/style/p...
36108,Scullers Men Scul Purple Shirts,Shirts,Purple,http://assets.myntassets.com/v1/images/style/p...
27435,Scullers Men Scul Purple Shirts,Shirts,Purple,http://assets.myntassets.com/v1/images/style/p...
