<a href="https://colab.research.google.com/github/Maheshdudala/Ai_Stylist__Infosys_Internship_Oct2024_Team2/blob/Collab_Branch/Another_copy_of_HybridModel2.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

**Hybrid Model using Content and Collaborative**

In [None]:
# from google.colab import drive
import pandas as pd
import numpy as np
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
from scipy.sparse import csr_matrix
from scipy.sparse.linalg import svds
from IPython.display import Image, display

In [None]:
# # Mount Google Drive
# drive.mount('/content/drive')

# # Paths
# data_path = 'Final Fashion Dataset.csv'
# image_folder_path = '/content/drive/MyDrive/Images'

# # Load Dataset
df = pd.read_csv('https://raw.githubusercontent.com/Maheshdudala/Ai_Stylist__Infosys_Internship_Oct2024_Team2/refs/heads/Ayush/Final%20Fashion%20Dataset.csv')
df.head()

Unnamed: 0,user_id,id,gender,masterCategory,subCategory,articleType,baseColour,season,year,usage,productDisplayName,filename,link,ratings,review,Month,Price (USD)
0,93810,8493,Women,Apparel,Bottomwear,Skirts,Black,Fall,2011.0,Casual,Forever New Women Black Skirts,8493.jpg,http://assets.myntassets.com/v1/images/style/p...,5,Amazing quality! Definitely recommend. Would r...,November,46
1,24592,30757,Men,Apparel,Topwear,Kurtas,Grey,Summer,2012.0,Ethnic,Fabindia Men Grey Mangalgiri Kurta,30757.jpg,http://assets.myntassets.com/v1/images/style/p...,4,"Perfect for any occasion, great buy..",September,29
2,13278,14881,Women,Accessories,Bags,Handbags,Black,Summer,2011.0,Casual,United Colors of Benetton Women Solid Black Ha...,14881.jpg,http://assets.myntassets.com/v1/images/style/p...,3,"It's okay, nothing special.!",October,39
3,46048,48449,Men,Apparel,Topwear,Tshirts,Blue,Summer,2012.0,Casual,French Connection Men Blue T-shirt,48449.jpg,http://assets.myntassets.com/v1/images/style/p...,5,"Comfortable and stylish, worth the price. and ...",August,39
4,42098,4697,Unisex,Accessories,Watches,Watches,Black,Winter,2016.0,Sports,ADIDAS Unisex Digital Duramo Black Watch,4697.jpg,http://assets.myntassets.com/v1/images/style/p...,4,"Perfect for any occasion, great buy.!",March,33


In [None]:
# ----------- Content-Based Filtering Features ------------
# Encoding categorical features
categorical_features = ['gender', 'baseColour', 'subCategory', 'articleType']
encoder = OneHotEncoder(sparse_output=False)
encoded_categorical = encoder.fit_transform(df[categorical_features])

# Scaling numerical features
numerical_features = ['ratings', 'Price (USD)', 'year']
scaler = StandardScaler()
scaled_numerical = scaler.fit_transform(df[numerical_features])

# Creating TF-IDF features for product descriptions
tfidf = TfidfVectorizer(stop_words='english')
tfidf_matrix = tfidf.fit_transform(df['productDisplayName'].astype(str))

# Combining all content-based features
content_features = np.hstack([encoded_categorical, scaled_numerical, tfidf_matrix.toarray()])

In [None]:
# ----------- Collaborative Filtering Features ------------
# Creating user-item matrix
user_mapping = {user_id: i for i, user_id in enumerate(df['user_id'].unique())}
item_mapping = {item_id: i for i, item_id in enumerate(df['id'].unique())}
df['user_index'] = df['user_id'].map(user_mapping)
df['item_index'] = df['id'].map(item_mapping)

# User-item matrix
num_users = len(user_mapping)
num_items = len(item_mapping)
user_item_matrix = csr_matrix((df['ratings'], (df['user_index'], df['item_index'])),
                              shape=(num_users, num_items), dtype=np.float64)

# SVD decomposition
k = 50  # Latent features
u, sigma, vt = svds(user_item_matrix, k=k)
sigma = np.diag(sigma)

# Reconstructed user-item prediction matrix
predicted_ratings = np.dot(np.dot(u, sigma), vt)


In [None]:
# Define complementary items with diverse categories
complementary_mapping = {
    'Skirts': ['Tshirts', 'Watches', 'Handbags', 'Heels','Necklace and Chains'],
    'Tshirts': ['Jeans', 'Belts', 'Casual Shoes', 'Backpacks'],
    'Jeans': ['Shirts', 'Belts', 'Watches', 'Casual Shoes'],
    'Dresses': ['Heels', 'Clutches', 'Earrings', 'Watches'],
    'Shirts': ['Trousers', 'Formal Shoes', 'Belts', 'Watches'],
    'Kurtas': ['Leggings', 'Dupatta', 'Earrings', 'Sandals'],
    'Handbags': ['Tshirts', 'Jeans', 'Watches', 'Heels'],
    'Formal Shoes': ['Shirts', 'Trousers', 'Belts', 'Watches'],
    'Flip Flops': ['Shorts', 'Tshirts', 'Sunglasses'],
    'Heels': ['Dresses', 'Clutches', 'Necklace and Chains', 'Earrings'],
    'Leggings': ['Tops', 'Kurtas', 'Dupatta', 'Sandals'],
    'Backpacks': ['Tshirts', 'Jeans', 'Sneakers', 'Watches'],
    'Earrings': ['Dresses', 'Clutches', 'Heels', 'Watches'],
    'Clutches': ['Heels', 'Dresses', 'Earrings', 'Necklace and Chains'],
    'Casual Shoes': ['Jeans', 'Tshirts', 'Belts', 'Backpacks'],
    'Dupatta': ['Kurtas', 'Leggings', 'Sandals', 'Earrings'],
    'Tops': ['Skirts', 'Watches', 'Handbags', 'Heels'],
    'Ring': ['Dresses', 'Clutches', 'Necklace and Chains', 'Earrings'],
    'Flats': ['Shorts', 'Tops', 'Handbags', 'Sunglasses'],
    'Suspenders': ['Shirts', 'Trousers', 'Formal Shoes', 'Belts'],
    'Sports Shoes': ['Track Pants', 'Tshirts', 'Watches', 'Backpacks'],
    'Sunglasses': ['Tshirts', 'Jeans', 'Casual Shoes', 'Backpacks','Watches'],
    'Shorts': ['Tshirts', 'Flip Flops', 'Sunglasses', 'Backpacks'],
    'Belts': ['Jeans', 'Tshirts', 'Casual Shoes', 'Watches'],
    'Sarees': ['Handbags','Earrings', 'Necklace and Chains', 'Heels','Watches'],
    'Nightdress': ['Lounge Pants', 'Robe', 'Socks', 'Slippers'],
    'Jackets': ['Jeans', 'Tshirts', 'Sneakers', 'Backpacks'],
    'Trousers': ['Shirts', 'Formal Shoes', 'Belts', 'Watches'],
    'Messenger Bag': ['Tshirts', 'Jeans', 'Casual Shoes', 'Sunglasses'],
    'Track Pants': ['Sports Shoes', 'Tshirts', 'Watches', 'Backpacks'],
    'Kurtis': ['Leggings', 'Dupatta', 'Sandals', 'Earrings'],
    'Capris': ['Tshirts', 'Flip Flops', 'Sunglasses', 'Backpacks'],
    'Sweatshirts': ['Jeans', 'Sneakers', 'Backpacks', 'Watches'],
    'Wallets': ['Jeans', 'Shirts', 'Belts', 'Watches','Trousers'],
    'Socks': ['Sports Shoes', 'Track Pants', 'Tshirts', 'Backpacks'],
    'Necklace and Chains': ['Dresses', 'Earrings', 'Clutches', 'Heels','Kurtis','Dupatta','Leggings'],
    'Stockings': ['Skirts', 'Heels', 'Handbags', 'Tops'],
    'Waist Pouch': ['Tshirts', 'jeans', 'Sunglasses', 'Sneakers','Watches'],
    'Tunics': ['Leggings', 'Dupatta', 'Sandals', 'Earrings'],
    'Booties': ['Dresses', 'Leggings', 'Clutches', 'Necklace and Chains'],
    'Lounge Pants': ['Night Tshirts', 'Slippers', 'Robe', 'Socks'],
    'Sweaters': ['Jeans', 'Boots', 'Backpacks', 'Watches'],
    'Pendant': ['Tshirts', 'Jeans', 'Watches', 'Dresses','Necklace and Chains', 'Clutches'],
    'Duffel Bag': ['Sports Shoes', 'Track Pants', 'Tshirts', 'Watches'],
    'Tracksuits': ['Sports Shoes', 'Duffel Bag', 'Watches', 'Socks'],
    'Baby Dolls': ['Robes', 'Slippers', 'Socks', 'Nightgowns'],
    'Laptop Bag': ['Tshirts', 'Jeans', 'Sneakers', 'Watches'],
    'Night suits': ['Lounge Pants', 'Slippers', 'Robe', 'Socks'],
    'Tights': ['Tunics', 'Sweatshirts', 'Sneakers', 'Backpacks'],
    'Jeggings': ['Tops', 'Heels', 'Handbags', 'Sunglasses'],
    'Mobile Pouch': ['Jeans', 'Tshirts', 'Casual Shoes', 'Belts'],
    'Jewellery Set': ['Dresses', 'Earrings', 'Clutches', 'Heels'],
    'Lounge Shorts': ['Night Tshirts', 'Slippers', 'Robe', 'Socks','Sneakers'],
    #'Swimwear': ['Sunglasses', 'Flip Flops', 'Beach Bags'],
    'Swimwear': ['Sunglasses', 'Beach Bags', 'Swimwear'],
    'Bangle': ['Sarees', 'Necklace and Chains', 'Earrings', 'Heels','Tshirt','Jeans','Belts','Watches','Shoes'],
    'Churidar': ['Kurtas', 'Dupatta', 'Sandals', 'Earrings'],
    'Salwar': ['Kurtas', 'Dupatta', 'Sandals', 'Earrings'],
    'Shrug': ['Tops', 'Jeans', 'Sneakers', 'Backpacks'],
    'Bracelet': ['Shirts','Jeans','Casual Shoes','Belts','Dresses', 'Necklace and Chains', 'Earrings', 'Watches'],
    'Waistcoat': ['Shirts', 'Trousers', 'Formal Shoes', 'Belts'],
    'Rucksacks': ['Tshirts', 'Jeans', 'Sneakers', 'Sunglasses'],
    'Blazers': ['Shirts', 'Trousers', 'Formal Shoes', 'Watches'],
    'Rompers': ['Sandals', 'Clutches', 'Earrings', 'Watches'],
    'Robe': ['Nightdress', 'Slippers', 'Socks', 'Night Tshirts'],
    'Salwar and Dupatta': ['Kurtas', 'Sandals', 'Earrings', 'Bangles'],
    'Tablet Sleeve': ['Tshirts', 'Jeans', 'Sneakers', 'Watches'],
    'Rain Jacket': ['Track Pants', 'Sports Shoes', 'Backpacks'],
    'Lounge Tshirts': ['Lounge Pants', 'Slippers', 'Robe', 'Socks'],
    'Lehenga Choli': ['Earrings', 'Necklace and Chains', 'Bangles', 'Sandals'],
    'Sandals': ['Skirts', 'Tops', 'Handbags', 'Sunglasses'],
    'Suits': ['Shirts', 'Trousers', 'Formal Shoes', 'Watches'],
    'Travel Accessory': ['Tshirt','Jeans','Watches','Wallets','Sneakers', 'Sunglasses'],
    'Trolley Bag': ['Casual Shoes', 'Jeans', 'Tshirts', 'Watches'],
    'Watches' : ['Shirts', 'Jeans', 'Wallets', 'Formal Shoes', 'Belts'],
    #'Bangle' : ['Saree', 'Dresses', 'Necklace and Chains', 'Earrings']
}


In [None]:
color_contrast_map = {
    'Black': ['White', 'Grey', 'Beige', 'Silver', 'Cream'],
    'Grey': ['Black', 'White', 'Red', 'Navy Blue'],
    'Blue': ['White', 'Cream', 'Yellow', 'Pink', 'Silver'],
    'Pink': ['Navy Blue', 'White', 'Beige', 'Grey'],
    'Brown': ['Cream', 'White', 'Beige', 'Olive', 'Mustard'],
    'Cream': ['Black', 'Blue', 'Brown', 'Navy Blue', 'Purple'],
    'Green': ['White', 'Black', 'Navy Blue', 'Yellow', 'Brown'],
    'White': ['Black', 'Blue', 'Red', 'Navy Blue', 'Grey'],
    'Navy Blue': ['White', 'Yellow', 'Cream', 'Pink', 'Beige'],
    'Yellow': ['Black', 'Navy Blue', 'Grey', 'Green', 'Purple'],
    'Silver': ['Black', 'Purple', 'Pink', 'Red'],
    'Red': ['White', 'Black', 'Grey', 'Beige'],
    'Beige': ['Navy Blue', 'Black', 'Red', 'Olive', 'Cream'],
    'Maroon': ['Cream', 'Beige', 'White', 'Olive'],
    'Gold': ['Black', 'White', 'Cream', 'Navy Blue'],
    'Magenta': ['Brown', 'Blue', 'Pink','Black'],
    'Lavender': ['Navy Blue', 'White', 'Grey'],
    'Multi': ['White', 'Black'],
    'Purple': ['Yellow', 'White', 'Cream'],
    'Charcoal': ['White', 'Cream', 'Blue'],
    'Orange': ['White', 'Black', 'Navy Blue'],
    'Tan': ['Navy Blue', 'White', 'Black'],
    'Olive': ['White', 'Yellow', 'Cream', 'Tan'],
    'Off White': ['Black', 'Grey', 'Navy Blue'],
    'Grey Melange': ['Black', 'White', 'Red', 'Blue'],
    'Rust': ['White', 'Cream', 'Beige', 'Navy Blue'],
    'Turquoise Blue': ['White', 'Black', 'Yellow'],
    'Mustard': ['White', 'Black', 'Brown', 'Navy Blue'],
    'Khaki': ['White', 'Navy Blue', 'Brown'],
    'Lime Green': ['White', 'Black', 'Grey'],
    'Peach': ['Blue', 'Black', 'Navy Blue'],
    'Sea Green': ['White', 'Black', 'Navy Blue'],
    'Teal': ['White', 'Cream', 'Navy Blue'],
    'Mauve': ['White', 'Grey', 'Blue'],
    'Copper': ['White', 'Black', 'Navy Blue'],
    'Steel': ['Black', 'White', 'Beige'],
    'Bronze': ['White', 'Black'],
    'Nude': ['White', 'Black', 'Beige'],
    'Metallic': ['White', 'Black', 'Silver'],
    'Taupe': ['White', 'Black', 'Beige'],
    'Fluorescent Green': ['White', 'Black'],
    'Burgundy': ['White', 'Beige', 'Grey'],
    'Mushroom Brown': ['White', 'Grey', 'Black'],
    'Coffee Brown': ['White', 'Beige', 'Cream']
}

In [None]:
# ----------- Hybrid Recommendation Function ------------
def get_hybrid_recommendations(user_id, item_id, df, content_features, complementary_mapping, num_recommendations=10):
    # Validate user ID and item ID
    if user_id not in user_mapping or item_id not in item_mapping:
        print("User or Item ID not found in dataset.")
        return []

    user_idx = user_mapping[user_id]
    item_idx = df.index[df['id'] == item_id][0]
    input_item = df.iloc[item_idx]

    # Display input item details
    print("\nInput Item Details:")
    print(f"Product ID: {input_item['id']}")
    print(f"Article Type: {input_item['articleType']}")
    print(f"SubCategory: {input_item['subCategory']}")
    print(f"Gender: {input_item['gender']}")
    print(f"Base Colour: {input_item['baseColour']}")

    # Display input image from the link
    input_image_url = input_item['link']
    display(Image(url=input_image_url, width=100, height=100))  # Reduced size

    # Collaborative Filtering - Predict ratings for the user
    user_ratings = predicted_ratings[user_idx]

    # Content-Based Filtering - Find complementary items
    input_article_type = input_item['articleType']
    input_gender = input_item['gender']
    complementary_article_types = complementary_mapping.get(input_article_type, [])

    if not complementary_article_types:
        print("No complementary mapping found for the input item.")
        return []

    recommendations = []

    for article_type in complementary_article_types:
        # Filter items by articleType and gender
        filtered_items = df[(df['articleType'] == article_type) & (df['gender'] == input_gender)]
        if filtered_items.empty:
            continue

        # Content similarity
        filtered_features = content_features[filtered_items.index]
        input_features = content_features[item_idx].reshape(1, -1)
        content_similarities = cosine_similarity(input_features, filtered_features).flatten()

        # Combine content similarity and collaborative filtering score
        filtered_items = filtered_items.copy()
        filtered_items['content_similarity'] = content_similarities
        filtered_items['item_index'] = filtered_items.index
        filtered_items['collab_score'] = user_ratings[filtered_items['item_index']]
        filtered_items['hybrid_score'] = 0.5 * filtered_items['content_similarity'] + 0.5 * filtered_items['collab_score']

        # Pick top recommendation for the article type
        top_item = filtered_items.sort_values(by='hybrid_score', ascending=False).iloc[0]
        recommendations.append(top_item)

        # Stop if we reach the required number of recommendations
        if len(recommendations) >= num_recommendations:
            break

    # Display recommendations
    print("\nHybrid Recommendations:")

    for rec in recommendations:
        rec_id = rec['id']
        rec_article_type = rec['articleType']
        rec_sub_category = rec['subCategory']
        rec_hybrid_score = rec['hybrid_score']

        print(f"Product ID: {rec_id}, Article Type: {rec_article_type}, SubCategory: {rec_sub_category}")

        # Display image from the link with reduced size
        rec_image_url = rec['link']
        display(Image(url=rec_image_url, width=100, height=100))  # Reduced size

In [None]:
# just for reference
df[df['user_id'] == 45601]

Unnamed: 0,user_id,id,gender,masterCategory,subCategory,articleType,baseColour,season,year,usage,productDisplayName,filename,link,ratings,review,Month,Price (USD),user_index,item_index
3292,45601,23043,Women,Apparel,Bottomwear,Leggings,Blue,Fall,2011.0,Casual,Arrow Woman Blue Jeggings,23043.jpg,http://assets.myntassets.com/v1/images/style/p...,3,"Decent product, but could be better. Would not...",July,17,2451,3292
8209,45601,34990,Women,Apparel,Saree,Sarees,Grey,Summer,2012.0,Ethnic,Fabindia H& Printed Grey Cotton Mull Sari,34990.jpg,http://assets.myntassets.com/v1/images/style/p...,3,Satisfactory for the price. but could be better.,November,18,2451,8209
18311,45601,35033,Women,Apparel,Bottomwear,Churidar,Green,Summer,2012.0,Ethnic,Fabindia Women Green Churidar,35033.jpg,http://assets.myntassets.com/v1/images/style/p...,2,"Not as described, low-quality product. Would n...",May,24,2451,18311
28534,45601,53417,Women,Apparel,Topwear,Kurtas,Teal,Fall,2012.0,Ethnic,Alma Women Teal Kurta,53417.jpg,http://assets.myntassets.com/v1/images/style/p...,5,"Exceeded expectations, love the fit! for speci...",April,31,2451,28534
29881,45601,21418,Women,Apparel,Topwear,Tops,Pink,Winter,2012.0,Casual,Roxy Women Pink Top,21418.jpg,http://assets.myntassets.com/v1/images/style/p...,4,"Comfortable and stylish, worth the price. for ...",October,31,2451,29881
31196,45601,25145,Women,Accessories,Bags,Handbags,Beige,Winter,2015.0,Casual,Lino Perros Women Beige Handbag,25145.jpg,http://assets.myntassets.com/v1/images/style/p...,4,"High quality fabric, very satisfied.!",June,21,2451,31196
34951,45601,28575,Women,Apparel,Topwear,Tshirts,Pink,Summer,2012.0,Casual,Nike Women Try Me Pink T-shirt,28575.jpg,http://assets.myntassets.com/v1/images/style/p...,4,"Perfect for any occasion, great buy. for casua...",May,29,2451,34951


In [None]:
# Example Usage
user_id_example = 45601
item_id_example = 34990
get_hybrid_recommendations(user_id_example, item_id_example, df, content_features, complementary_mapping)



Input Item Details:
Product ID: 34990
Article Type: Sarees
SubCategory: Saree
Gender: Women
Base Colour: Grey



Hybrid Recommendations:
Product ID: 46979, Article Type: Handbags, SubCategory: Bags


Product ID: 43863, Article Type: Earrings, SubCategory: Jewellery


Product ID: 54158, Article Type: Necklace and Chains, SubCategory: Jewellery


Product ID: 10272, Article Type: Heels, SubCategory: Shoes


Product ID: 22939, Article Type: Watches, SubCategory: Watches


In [None]:
# Example Usage
user_id_example = 17019
item_id_example = 19339
get_hybrid_recommendations(user_id_example, item_id_example, df, content_features, complementary_mapping)


Input Item Details:
Product ID: 19339
Article Type: Jackets
SubCategory: Topwear
Gender: Men
Base Colour: Black



Hybrid Recommendations:
Product ID: 21583, Article Type: Jeans, SubCategory: Bottomwear


Product ID: 45534, Article Type: Tshirts, SubCategory: Topwear


Product ID: 15215, Article Type: Backpacks, SubCategory: Bags


In [None]:
item = df[df['articleType'] == "Travel Accessory"]

# Select the required columns: id and gender
item_details = item[['id', 'gender']]

for _, row in item_details.iterrows():
    print(f"ID: {row['id']}, Gender: {row['gender']}")



ID: 7124, Gender: Unisex


In [None]:
#Waist Pouch
item = df[df['articleType'] == "Waist Pouch"]

# Select the required columns: id and gender
item_details = item[['user_id','id', 'gender']]

for _, row in item_details.iterrows():
    print(f"User_ID:{row['user_id']}, ID: {row['id']}, Gender: {row['gender']}")


User_ID:39451, ID: 4612, Gender: Unisex
User_ID:24316, ID: 38757, Gender: Men
User_ID:47880, ID: 26693, Gender: Unisex
User_ID:55982, ID: 13177, Gender: Unisex
User_ID:51879, ID: 22685, Gender: Unisex
User_ID:54869, ID: 38967, Gender: Unisex
User_ID:57235, ID: 5226, Gender: Unisex
User_ID:98835, ID: 22665, Gender: Unisex
User_ID:33416, ID: 19915, Gender: Unisex
User_ID:54252, ID: 37261, Gender: Unisex
User_ID:24707, ID: 22670, Gender: Unisex
User_ID:67438, ID: 4611, Gender: Unisex
User_ID:78742, ID: 19914, Gender: Unisex
User_ID:75911, ID: 37260, Gender: Unisex
User_ID:95344, ID: 5223, Gender: Unisex
User_ID:61987, ID: 5225, Gender: Unisex
User_ID:24371, ID: 38742, Gender: Unisex


In [None]:
item = df[df['articleType'] == "Messenger Bag"]

# Select the required columns: id and gender
item_details = item[['user_id','id', 'gender']]

for _, row in item_details.iterrows():
    print(f"User_ID:{row['user_id']}, ID: {row['id']}, Gender: {row['gender']}")


User_ID:40512, ID: 53377, Gender: Unisex
User_ID:58311, ID: 5224, Gender: Unisex
User_ID:72739, ID: 5264, Gender: Unisex
User_ID:26059, ID: 12745, Gender: Unisex
User_ID:46434, ID: 22678, Gender: Unisex
User_ID:68447, ID: 7610, Gender: Men
User_ID:38551, ID: 7616, Gender: Men
User_ID:19634, ID: 19911, Gender: Unisex
User_ID:89471, ID: 42937, Gender: Men
User_ID:65026, ID: 25427, Gender: Unisex
User_ID:65444, ID: 18796, Gender: Unisex
User_ID:49387, ID: 7612, Gender: Men
User_ID:52753, ID: 22679, Gender: Unisex
User_ID:52753, ID: 5263, Gender: Unisex
User_ID:84085, ID: 5260, Gender: Unisex
User_ID:16839, ID: 22656, Gender: Unisex
User_ID:65392, ID: 39521, Gender: Unisex
User_ID:92616, ID: 8246, Gender: Unisex
User_ID:65176, ID: 12743, Gender: Unisex
User_ID:16839, ID: 36058, Gender: Unisex
User_ID:98452, ID: 36055, Gender: Unisex
User_ID:20696, ID: 39520, Gender: Unisex
User_ID:60181, ID: 7614, Gender: Men
User_ID:51207, ID: 41847, Gender: Unisex
User_ID:55957, ID: 5265, Gender: Unisex


In [None]:
item = df[df['articleType'] == "Jeans"]

# Select the required columns: id and gender
item_details = item[['user_id','id', 'gender']]

for _, row in item_details.iterrows():
    print(f"User_ID:{row['user_id']}, ID: {row['id']}, Gender: {row['gender']}")


User_ID:46463, ID: 27926, Gender: Women
User_ID:93886, ID: 27935, Gender: Women
User_ID:50306, ID: 11290, Gender: Men
User_ID:69638, ID: 50958, Gender: Women
User_ID:63225, ID: 40371, Gender: Women
User_ID:96900, ID: 16820, Gender: Men
User_ID:59692, ID: 50956, Gender: Women
User_ID:42177, ID: 18180, Gender: Boys
User_ID:19150, ID: 14127, Gender: Men
User_ID:12875, ID: 14124, Gender: Men
User_ID:25129, ID: 41010, Gender: Girls
User_ID:71797, ID: 36742, Gender: Women
User_ID:14877, ID: 46893, Gender: Men
User_ID:32838, ID: 11334, Gender: Men
User_ID:50730, ID: 23400, Gender: Women
User_ID:35856, ID: 27937, Gender: Women
User_ID:69028, ID: 26991, Gender: Women
User_ID:70686, ID: 11240, Gender: Women
User_ID:90599, ID: 39401, Gender: Men
User_ID:64568, ID: 36732, Gender: Women
User_ID:67939, ID: 40933, Gender: Boys
User_ID:73469, ID: 9145, Gender: Women
User_ID:18804, ID: 27938, Gender: Women
User_ID:34176, ID: 51367, Gender: Women
User_ID:69942, ID: 27001, Gender: Women
User_ID:36633, ID

In [None]:

user_id_example = 24707
item_id_example = 22670
get_hybrid_recommendations(user_id_example, item_id_example, df, content_features, complementary_mapping)


Input Item Details:
Product ID: 22670
Article Type: Waist Pouch
SubCategory: Bags
Gender: Unisex
Base Colour: Grey



Hybrid Recommendations:
Product ID: 4305, Article Type: Tshirts, SubCategory: Topwear


Product ID: 51717, Article Type: Sunglasses, SubCategory: Eyewear


Product ID: 22929, Article Type: Watches, SubCategory: Watches


In [None]:
user_id_example = 44735
item_id_example = 7608
get_hybrid_recommendations(user_id_example, item_id_example, df, content_features, complementary_mapping)


Input Item Details:
Product ID: 7608
Article Type: Messenger Bag
SubCategory: Bags
Gender: Men
Base Colour: Brown



Hybrid Recommendations:
Product ID: 3276, Article Type: Tshirts, SubCategory: Topwear


Product ID: 28818, Article Type: Jeans, SubCategory: Bottomwear


Product ID: 22161, Article Type: Casual Shoes, SubCategory: Shoes


Product ID: 30569, Article Type: Sunglasses, SubCategory: Eyewear


**Included DNN with the Hybrid model**


In [None]:
import tensorflow as tf
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Dense, Embedding, Flatten, concatenate
from tensorflow.keras.utils import plot_model

In [None]:
# --------- DNN Model for Hybrid Recommendations ---------
# Input layers for user and item
user_input = Input(shape=(1,), name='user_input')
item_input = Input(shape=(1,), name='item_input')

# User embedding
user_embedding = Embedding(input_dim=num_users, output_dim=50, name='user_embedding')(user_input)
user_embedding = Flatten()(user_embedding)

# Item embedding
item_embedding = Embedding(input_dim=num_items, output_dim=50, name='item_embedding')(item_input)
item_embedding = Flatten()(item_embedding)

# Combine content-based features and collaborative embeddings
content_input = Input(shape=(content_features.shape[1],), name='content_input')
merged = concatenate([user_embedding, item_embedding, content_input])

# Deep layers
dense1 = Dense(128, activation='relu')(merged)
dense2 = Dense(64, activation='relu')(dense1)
dense3 = Dense(32, activation='relu')(dense2)

# Output layer
output = Dense(1, activation='sigmoid', name='output')(dense3)

# Define the hybrid model
hybrid_model = Model(inputs=[user_input, item_input, content_input], outputs=output)
hybrid_model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# Display the model architecture
hybrid_model.summary()

# --------- Training the Hybrid Model ---------
# Prepare inputs for training

user_ids = df['user_id'].unique()
user_to_index = {user_id: index for index, user_id in enumerate(user_ids)}
df['user_index'] = df['user_id'].map(user_to_index)

item_ids = df['id'].unique()
item_to_index = {item_id: index for index, item_id in enumerate(item_ids)}
df['item_index'] = df['id'].map(item_to_index)

user_indices = df['user_index'].values
item_indices = df['item_index'].values
content_inputs = content_features
ratings = df['ratings'].values / df['ratings'].max()  # Normalize ratings to 0-1

# Train the model
hybrid_model.fit(
    [user_indices, item_indices, content_inputs],
    ratings,
    epochs=10,
    batch_size=64,
    validation_split=0.1
)

Model: "model"
__________________________________________________________________________________________________
 Layer (type)                Output Shape                 Param #   Connected to                  
 user_input (InputLayer)     [(None, 1)]                  0         []                            
                                                                                                  
 item_input (InputLayer)     [(None, 1)]                  0         []                            
                                                                                                  
 user_embedding (Embedding)  (None, 1, 50)                244550    ['user_input[0][0]']          
                                                                                                  
 item_embedding (Embedding)  (None, 1, 50)                1824200   ['item_input[0][0]']          
                                                                                              

<keras.src.callbacks.History at 0x7bc0440c9840>

In [None]:
def hybrid_recommend(
    user_id,
    input_item_id,
    df,
    hybrid_model,
    content_features,
    complementary_mapping,
    color_contrast_map,
    top_n=5
):
    """
    Generate robust hybrid recommendations with input item details, gender consistency,
    unique article types, and complementary mapping.

    Parameters:
        user_id (int): The ID of the user for whom recommendations are generated.
        input_item_id (int): The ID of the input item for recommendations.
        df (pd.DataFrame): The dataset containing item information.
        hybrid_model (Model): The trained hybrid recommendation model.
        content_features (np.array): Preprocessed content-based features.
        complementary_mapping (dict): Mapping for complementary `articleType` recommendations.
        color_contrast_map (dict): Mapping for color contrasts.
        top_n (int): The number of recommendations to return.

    Returns:
        pd.DataFrame: A DataFrame containing input item details and recommended items with additional details.
    """
    # Extract input item details
    input_item = df[df['id'] == input_item_id].iloc[0]
    input_gender = input_item['gender']
    input_articleType = input_item['articleType']
    input_baseColour = input_item['baseColour']

    # Get allowed article types based on complementary mapping
    allowed_article_types = complementary_mapping.get(input_articleType, [])

    # Get allowed colors based on contrast map
    allowed_colors = color_contrast_map.get(input_baseColour, [])

    # Map user ID to index and predict ratings for all items
    user_idx = user_mapping[user_id]
    item_indices = np.arange(num_items)
    predictions = hybrid_model.predict([np.full_like(item_indices, user_idx), item_indices, content_features])
    predictions = predictions.flatten()

    # Exclude the input item from recommendations
    input_item_idx = df[df['id'] == input_item_id].index[0]
    predictions[input_item_idx] = -np.inf

    # Filter items based on gender, complementary mapping, and color contrast
    filtered_df = df[
        (df['gender'] == input_gender) &  # Match gender
        (df['articleType'].isin(allowed_article_types)) &  # Match allowed article types
        (df['baseColour'].isin(allowed_colors))  # Match allowed colors
    ]

    # Get indices of filtered items
    filtered_indices = filtered_df.index

    # Update predictions for unallowed items to -inf
    mask = np.ones_like(predictions, dtype=bool)
    mask[filtered_indices] = False
    predictions[mask] = -np.inf

    # Get top N recommendations with unique article types, ensuring gender consistency
    recommended_indices = []
    used_article_types = set()

    for idx in predictions.argsort()[::-1]:  # Sorted indices of predictions
        if len(recommended_indices) >= top_n:
            break

        item = df.iloc[idx]
        if item['articleType'] not in used_article_types and item['gender'] == input_gender:
            recommended_indices.append(idx)
            used_article_types.add(item['articleType'])

    # Prepare input item details for display
    input_item_details = {
        'Product ID': input_item_id,
        'Article Type': input_articleType,
        'SubCategory': input_item['subCategory'],
        'Gender': input_gender,
        'Base Colour': input_baseColour
    }

    # Prepare recommended items DataFrame
    recommended_items = df.iloc[recommended_indices][['user_id', 'id', 'gender', 'articleType', 'subCategory', 'link']].copy()

    print("Input Item Details:")
    for key, value in input_item_details.items():
        print(f"{key}: {value}")

    print("\nRecommended Items:")

    from IPython.display import display, Image

    for idx in recommended_indices:
        rec_item = df.iloc[idx]
        print(f"User ID: {rec_item['user_id']}, Product ID: {rec_item['id']}, Gender: {rec_item['gender']}, Article Type: {rec_item['articleType']}, SubCategory: {rec_item['subCategory']}")
        rec_image_url = rec_item['link']
        display(Image(url=rec_image_url, width=100, height=100))


In [None]:
def displayRecomImages(recommendations):
  if recommendations:
      print("\nRecommendations Retrieved:")
      for rec in recommendations:
          print(f"Product ID: {rec['id']}, Article Type: {rec['articleType']}, SubCategory: {rec['subCategory']}")
          rec_image_url = rec['link']
          display(Image(url=rec_image_url, width=100, height=100))

In [None]:

user_id_example = 45601
item_id_example = 34990

recommendations = get_hybrid_recommendations(
    user_id=user_id_example,
    item_id=item_id_example,
    df=df,
    content_features=content_features,
    complementary_mapping=complementary_mapping,
    num_recommendations=5
)

displayRecomImages(recommendations)



Input Item Details:
Product ID: 34990
Article Type: Sarees
SubCategory: Saree
Gender: Women
Base Colour: Grey



Hybrid Recommendations:
Product ID: 46979, Article Type: Handbags, SubCategory: Bags


Product ID: 43863, Article Type: Earrings, SubCategory: Jewellery


Product ID: 54158, Article Type: Necklace and Chains, SubCategory: Jewellery


Product ID: 10272, Article Type: Heels, SubCategory: Shoes


Product ID: 22939, Article Type: Watches, SubCategory: Watches


In [None]:
user_id_example = 17019
item_id_example = 19339
recommendations = hybrid_recommend(
    user_id=user_id_example,
    input_item_id=item_id_example,
    df=df,
    hybrid_model=hybrid_model,
    content_features=content_features,
    complementary_mapping=complementary_mapping,
    color_contrast_map=color_contrast_map,
    top_n=5
)
displayRecomImages(recommendations)

Input Item Details:
Product ID: 19339
Article Type: Jackets
SubCategory: Topwear
Gender: Men
Base Colour: Black

Recommended Items:
User ID: 97207, Product ID: 3272, Gender: Men, Article Type: Tshirts, SubCategory: Topwear


User ID: 40947, Product ID: 29065, Gender: Men, Article Type: Backpacks, SubCategory: Bags


User ID: 81819, Product ID: 9176, Gender: Men, Article Type: Jeans, SubCategory: Bottomwear


User ID: 99181, Product ID: 39076, Gender: Men, Article Type: Shirts, SubCategory: Topwear


User ID: 42447, Product ID: 36789, Gender: Men, Article Type: Watches, SubCategory: Watches


In [None]:

user_id_example = 39256
item_id_example = 52816
recommendations = hybrid_recommend(
    user_id=user_id_example,
    input_item_id=item_id_example,
    df=df,
    hybrid_model=hybrid_model,
    content_features=content_features,
    complementary_mapping=complementary_mapping,
    color_contrast_map=color_contrast_map,
    top_n=5
)
displayRecomImages(recommendations)

Input Item Details:
Product ID: 52816
Article Type: Watches
SubCategory: Watches
Gender: Men
Base Colour: Black

Recommended Items:
User ID: 31542, Product ID: 27154, Gender: Men, Article Type: Shirts, SubCategory: Topwear


User ID: 46559, Product ID: 25755, Gender: Men, Article Type: Belts, SubCategory: Belts


User ID: 35265, Product ID: 18787, Gender: Men, Article Type: Wallets, SubCategory: Wallets


User ID: 39937, Product ID: 7237, Gender: Men, Article Type: Formal Shoes, SubCategory: Shoes


User ID: 81819, Product ID: 9176, Gender: Men, Article Type: Jeans, SubCategory: Bottomwear


In [None]:
from sklearn.model_selection import train_test_split

# Split into training and testing
train_df, test_df = train_test_split(df, test_size=0.2, random_state=42)

# Prepare the inputs for the test set
test_user_indices = test_df['user_index'].values
test_item_indices = test_df['item_index'].values
test_content_inputs = content_features[test_df.index]
test_ratings = test_df['ratings'].values


In [None]:
from sklearn.metrics import mean_absolute_error, mean_squared_error
import numpy as np

# Predict ratings for the test set
predicted_ratings = hybrid_model.predict([test_user_indices, test_item_indices, test_content_inputs]).flatten()

# Calculate metrics
mae = mean_absolute_error(test_ratings, predicted_ratings)
rmse = np.sqrt(mean_squared_error(test_ratings, predicted_ratings))

print(f"Mean Absolute Error (MAE): {mae:.4f}")
print(f"Root Mean Squared Error (RMSE): {rmse:.4f}")


Mean Absolute Error (MAE): 2.8690
Root Mean Squared Error (RMSE): 3.0491


In [None]:
def precision_at_k(predictions, ground_truth, k):
    """
    Precision@K: Measures the proportion of recommended items in the top K that are relevant.
    """
    top_k_indices = np.argsort(predictions)[::-1][:k]
    relevant_items = set(np.where(ground_truth > 0)[0])  # Ground truth relevance
    recommended_items = set(top_k_indices)
    return len(recommended_items & relevant_items) / k


def recall_at_k(predictions, ground_truth, k):
    """
    Recall@K: Measures the proportion of relevant items captured in the top K recommendations.
    """
    top_k_indices = np.argsort(predictions)[::-1][:k]
    relevant_items = set(np.where(ground_truth > 0)[0])  # Ground truth relevance
    recommended_items = set(top_k_indices)
    return len(recommended_items & relevant_items) / len(relevant_items)


# Example usage for Precision@K and Recall@K
k = 10
precision_scores = []
recall_scores = []

for user_id in test_df['user_id'].unique():
    # Get the test subset for the user
    user_test_df = test_df[test_df['user_id'] == user_id]
    if user_test_df.empty:
        continue

    # Prepare inputs
    user_idx = user_test_df['user_index'].iloc[0]
    item_indices = user_test_df['item_index'].values
    content_inputs = content_features[user_test_df.index]
    true_ratings = user_test_df['ratings'].values

    # Predict ratings for all items
    predictions = hybrid_model.predict([np.full_like(item_indices, user_idx), item_indices, content_inputs]).flatten()

    # Compute precision and recall
    precision_scores.append(precision_at_k(predictions, true_ratings, k))
    recall_scores.append(recall_at_k(predictions, true_ratings, k))

# Average precision and recall
avg_precision = np.mean(precision_scores)
avg_recall = np.mean(recall_scores)

print(f"Precision@{k}: {avg_precision:.4f}")
print(f"Recall@{k}: {avg_recall:.4f}")


Precision@10: 0.1881
Recall@10: 1.0000


In [None]:
def evaluate_model(hybrid_model, test_df, content_features, k=10):
    """
    Evaluate the hybrid recommendation model on the test dataset, including F1 Score.
    """
    from sklearn.metrics import mean_absolute_error, mean_squared_error
    import numpy as np

    # Prepare test inputs
    test_user_indices = test_df['user_index'].values
    test_item_indices = test_df['item_index'].values
    test_content_inputs = content_features[test_df.index]
    test_ratings = test_df['ratings'].values

    # Predict ratings
    predicted_ratings = hybrid_model.predict([test_user_indices, test_item_indices, test_content_inputs]).flatten()

    # Calculate MAE and RMSE
    mae = mean_absolute_error(test_ratings, predicted_ratings)
    rmse = np.sqrt(mean_squared_error(test_ratings, predicted_ratings))

    # Precision@K, Recall@K, and F1 Score
    precision_scores = []
    recall_scores = []
    f1_scores = []

    for user_id in test_df['user_id'].unique():
        user_test_df = test_df[test_df['user_id'] == user_id]
        if user_test_df.empty:
            continue

        user_idx = user_test_df['user_index'].iloc[0]
        item_indices = user_test_df['item_index'].values
        content_inputs = content_features[user_test_df.index]
        true_ratings = user_test_df['ratings'].values

        predictions = hybrid_model.predict([np.full_like(item_indices, user_idx), item_indices, content_inputs]).flatten()

        # Compute precision and recall
        precision = precision_at_k(predictions, true_ratings, k)
        recall = recall_at_k(predictions, true_ratings, k)

        # Compute F1 Score
        if precision + recall > 0:
            f1 = 2 * (precision * recall) / (precision + recall)
        else:
            f1 = 0

        precision_scores.append(precision)
        recall_scores.append(recall)
        f1_scores.append(f1)

    avg_precision = np.mean(precision_scores)
    avg_recall = np.mean(recall_scores)
    avg_f1 = np.mean(f1_scores)

    # Print results
    print(f"Evaluation Metrics:")
    print(f"Mean Absolute Error (MAE): {mae:.4f}")
    print(f"Root Mean Squared Error (RMSE): {rmse:.4f}")
    print(f"Precision@{k}: {avg_precision:.4f}")
    print(f"Recall@{k}: {avg_recall:.4f}")
    print(f"F1 Score@{k}: {avg_f1:.4f}")

    return {
        "mae": mae,
        "rmse": rmse,
        "precision_at_k": avg_precision,
        "recall_at_k": avg_recall,
        "f1_score_at_k": avg_f1
    }


In [None]:
def precision_at_k(predictions, true_ratings, k):
    """
    Compute precision@k for a single user.
    """
    # Get the indices of the top-k predicted ratings
    top_k_indices = np.argsort(predictions)[-k:][::-1]

    # Get the actual ratings corresponding to these indices
    relevant_items = (true_ratings[top_k_indices] > 0.5).sum()  # Binary relevance (e.g., > 0.5 is relevant)
    return relevant_items / k if k > 0 else 0

def recall_at_k(predictions, true_ratings, k):
    """
    Compute recall@k for a single user.
    """
    # Get the indices of the top-k predicted ratings
    top_k_indices = np.argsort(predictions)[-k:][::-1]

    # Get the total number of relevant items in true ratings
    total_relevant_items = (true_ratings > 0.5).sum()

    # Get the number of relevant items in the top-k predictions
    relevant_items = (true_ratings[top_k_indices] > 0.5).sum()

    return relevant_items / total_relevant_items if total_relevant_items > 0 else 0


In [None]:
def evaluate_model(hybrid_model, test_df, content_features, k=10):
    """
    Evaluate the hybrid recommendation model on the test dataset, including F1 Score.
    """
    from sklearn.metrics import mean_absolute_error, mean_squared_error
    import numpy as np

    # Prepare test inputs
    test_user_indices = test_df['user_index'].values
    test_item_indices = test_df['item_index'].values
    test_content_inputs = content_features[test_df.index]
    test_ratings = test_df['ratings'].values

    # Predict ratings
    predicted_ratings = hybrid_model.predict([test_user_indices, test_item_indices, test_content_inputs]).flatten()

    # Calculate MAE and RMSE
    mae = mean_absolute_error(test_ratings, predicted_ratings)
    rmse = np.sqrt(mean_squared_error(test_ratings, predicted_ratings))

    # Precision@K, Recall@K, and F1 Score
    precision_scores = []
    recall_scores = []
    f1_scores = []

    for user_id in test_df['user_id'].unique():
        user_test_df = test_df[test_df['user_id'] == user_id]
        if user_test_df.empty:
            continue

        user_idx = user_test_df['user_index'].iloc[0]
        item_indices = user_test_df['item_index'].values
        content_inputs = content_features[user_test_df.index]
        true_ratings = user_test_df['ratings'].values

        predictions = hybrid_model.predict([np.full_like(item_indices, user_idx), item_indices, content_inputs]).flatten()

        # Compute precision and recall
        precision = precision_at_k(predictions, true_ratings, k)
        recall = recall_at_k(predictions, true_ratings, k)

        # Compute F1 Score
        if precision + recall > 0:
            f1 = 2 * (precision * recall) / (precision + recall)
        else:
            f1 = 0

        precision_scores.append(precision)
        recall_scores.append(recall)
        f1_scores.append(f1)

    avg_precision = np.mean(precision_scores)
    avg_recall = np.mean(recall_scores)
    avg_f1 = np.mean(f1_scores)

    # Print results
    print(f"Evaluation Metrics:")
    print(f"Mean Absolute Error (MAE): {mae:.4f}")
    print(f"Root Mean Squared Error (RMSE): {rmse:.4f}")
    print(f"Precision@{k}: {avg_precision:.4f}")
    print(f"Recall@{k}: {avg_recall:.4f}")
    print(f"F1 Score@{k}: {avg_f1:.4f}")

    return {
        "mae": mae,
        "rmse": rmse,
        "precision_at_k": avg_precision,
        "recall_at_k": avg_recall,
        "f1_score_at_k": avg_f1
    }


In [None]:
metrics = evaluate_model(hybrid_model, test_df, content_features, k=10)
print(metrics)


Evaluation Metrics:
Mean Absolute Error (MAE): 2.8690
Root Mean Squared Error (RMSE): 3.0491
Precision@10: 0.1881
Recall@10: 1.0000
F1 Score@10: 0.3063
{'mae': 2.868992751896406, 'rmse': 3.049107837039162, 'precision_at_k': 0.18806701030927836, 'recall_at_k': 1.0, 'f1_score_at_k': 0.3062766751138491}
