In [49]:
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.decomposition import TruncatedSVD
from sklearn.neighbors import NearestNeighbors
from scipy.sparse import csr_matrix

# Load product data (attributes)
products_df = pd.read_csv('MyntraDataset4.csv')

# Load user interaction data (e.g., ratings or swipes)
interactions_df = pd.read_csv('DummyMyntraDataset.csv')

In [50]:
interactions_df.columns

Index(['p_id', 'name', 'price', 'colour', 'brand', 'img', 'ratingCount',
       'avg_rating', 'Body Shape ID', 'Body or Garment Size', 'Bottom Closure',
       'Bottom Fabric', 'Bottom Pattern', 'Bottom Type', 'Dupatta',
       'Dupatta Border', 'Dupatta Fabric', 'Dupatta Pattern', 'Main Trend',
       'Neck', 'Number of Pockets', 'Occasion', 'Pattern Coverage',
       'Print or Pattern Type', 'Sleeve Length', 'Sleeve Styling',
       'Slit Detail', 'Stitch', 'Sustainable', 'Top Design Styling',
       'Top Fabric', 'Top Hemline', 'Top Length', 'Top Pattern', 'Top Shape',
       'Top Type', 'Waistband', 'Wash Care', 'Weave Pattern', 'Weave Type',
       'Ornamentation'],
      dtype='object')

In [51]:
# Combine relevant product attributes into a single string for TF-IDF
products_df['combined_features'] = products_df[[
    'colour', 'brand', 'Bottom Pattern', 'Bottom Type', 'Dupatta', 'Dupatta Border', 'Dupatta Fabric',
    'Dupatta Pattern', 'Main Trend', 'Neck', 'Occasion', 'Print or Pattern Type', 'Sleeve Length',
    'Sleeve Styling', 'Stitch', 'Top Design Styling', 'Top Fabric', 'Top Hemline', 'Top Pattern',
    'Top Shape', 'Top Type', 'Waistband', 'Wash Care', 'Weave Pattern', 'Weave Type', 'Ornamentation'
]].fillna('').agg(' '.join, axis=1)

# Apply TF-IDF vectorization
vectorizer = TfidfVectorizer(max_features=500)  # Adjust max_features as needed
tfidf_matrix = vectorizer.fit_transform(products_df['combined_features'])


In [52]:
# Fit Nearest Neighbors model
content_model = NearestNeighbors(n_neighbors=5, algorithm='auto')
content_model.fit(tfidf_matrix)


In [53]:
pip install scikit-surprise

Note: you may need to restart the kernel to use updated packages.



[notice] A new release of pip is available: 24.0 -> 24.1.2
[notice] To update, run: python.exe -m pip install --upgrade pip


In [54]:
from surprise import Reader, Dataset, SVD
from surprise.model_selection import train_test_split as surprise_train_test_split

In [81]:
from surprise import Dataset, Reader
from surprise.model_selection import train_test_split as surprise_train_test_split
from surprise import SVD

# Assuming interactions_df has columns: ['p_id', 'avg_rating']
# Adding a constant user_id column for all interactions
interactions_df['user_id'] = 0

# Define the reader and load data
reader = Reader(rating_scale=(1, 5))
data = Dataset.load_from_df(interactions_df[['user_id', 'p_id', 'avg_rating']], reader)

# Split the data
trainset, testset = surprise_train_test_split(data, test_size=0.2)

# Train SVD model
collaborative_model = SVD()
collaborative_model.fit(trainset)


<surprise.prediction_algorithms.matrix_factorization.SVD at 0x1e0308fed30>

In [76]:
def get_content_recommendations(product_id, model, n_recommendations=5):
    product_index = products_df[products_df['p_id'] == product_id].index[0]
    distances, indices = model.kneighbors(tfidf_matrix[product_index], n_neighbors=n_recommendations + 1)
    recommended_product_indices = indices.flatten()[1:]  # Exclude the product itself
    return products_df.iloc[recommended_product_indices]


In [77]:
def get_collaborative_recommendations(user_id, model, n_recommendations=5):
    all_product_ids = products_df['p_id'].unique()
    user_interacted_products = interactions_df[interactions_df['user_id'] == user_id]['p_id'].unique()
    non_interacted_products = list(set(all_product_ids) - set(user_interacted_products))
    
    predictions = [model.predict(user_id, product_id) for product_id in non_interacted_products]
    predictions.sort(key=lambda x: x.est, reverse=True)
    
    top_predictions = predictions[:n_recommendations]
    top_product_ids = [pred.iid for pred in top_predictions]
    
    return products_df[products_df['p_id'].isin(top_product_ids)]


In [78]:
def hybrid_recommendations(user_id, p_id, content_model, collaborative_model, alpha=0.5, n_recommendations=5):
    content_recs = get_content_recommendations(product_id, content_model, n_recommendations)
    collaborative_recs = get_collaborative_recommendations(user_id, collaborative_model, n_recommendations)
    
    # Assuming a simple blend where alpha controls the weight of each model
    combined_recs = pd.concat([content_recs, collaborative_recs]).drop_duplicates().head(n_recommendations)
    
    return combined_recs


In [86]:
# Example of getting hybrid recommendations
user_id = 1  # Example user_id
product_id = products_df['p_id'].iloc[87]  # Example product_id

recommendations = hybrid_recommendations(user_id, p_id, content_model, collaborative_model, alpha=0.5)
print(recommendations)


     Unnamed: 0        p_id  \
909         909  13078244.0   
450         450  13913182.0   
906         906  18686412.0   
154         154  13940072.0   
749         749  13369436.0   

                                                  name   price     colour  \
909                  W Women Pink Solid Straight Kurta  1599.0       Pink   
450  Biba Women Off White & Brown Pure Cotton Strip...  1599.0  Off White   
906  BARARA ETHNIC Women Green Floral Embroidered C...  3999.0      Green   
154              W Women Green Geometric Printed Kurta  3599.0      Green   
749  YASH GALLERY Women Navy Blue & White Cotton Pr...  3299.0  Navy Blue   

             brand                                                img  \
909              W  http://assets.myntassets.com/assets/images/130...   
450           Biba  http://assets.myntassets.com/assets/images/139...   
906  BARARA ETHNIC  http://assets.myntassets.com/assets/images/186...   
154              W  http://assets.myntassets.com/assets/im

In [87]:
# Example of getting hybrid recommendations
user_id = 0 # Example user_id
product_id = products_df['p_id'].iloc[56]  # Example product_id

recommendations = hybrid_recommendations(user_id, product_id, content_model, collaborative_model, alpha=0.5)
print(recommendations)


     Unnamed: 0        p_id  \
677         677  16825614.0   
640         640  19145026.0   
737         737  16585626.0   
974         974  14138768.0   
936         936  17127072.0   

                                                  name   price colour  \
677  Stylum Women Green Embroidered Pure Cotton Kur...  3599.0  Green   
640  Baisacrafts Women Red Ethnic Motifs Printed Pl...  5999.0    Red   
737  Biba Women White Embroidered Pure Cotton Kurta...  3799.0  White   
974  Ishin Women Red & Gold-Toned Embroidered Kurta...  6199.0    Red   
936  Biba Women Red Floral Printed Pleated Pure Cot...  3599.0    Red   

           brand                                                img  \
677       Stylum  http://assets.myntassets.com/assets/images/168...   
640  Baisacrafts  http://assets.myntassets.com/assets/images/191...   
737         Biba  http://assets.myntassets.com/assets/images/165...   
974        Ishin  http://assets.myntassets.com/assets/images/pro...   
936         Biba  h