## myntra_products_catalog Analysis (Time Series)

In [2]:
# importing libaries

import pandas as pd
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.preprocessing import OneHotEncoder

data = pd.read_csv('myntra_products_catalog.csv')  

## Exploring the dataset

In [4]:
print(data.head())
print(f"Shape: {data.shape}")
print(data.describe(include='all'))

   ProductID                                        ProductName ProductBrand  \
0   10017413  DKNY Unisex Black & Grey Printed Medium Trolle...         DKNY   
1   10016283  EthnoVogue Women Beige & Grey Made to Measure ...   EthnoVogue   
2   10009781  SPYKAR Women Pink Alexa Super Skinny Fit High-...       SPYKAR   
3   10015921  Raymond Men Blue Self-Design Single-Breasted B...      Raymond   
4   10017833  Parx Men Brown & Off-White Slim Fit Printed Ca...         Parx   

   Gender  Price (INR)  NumImages  \
0  Unisex        11745          7   
1   Women         5810          7   
2   Women          899          7   
3     Men         5599          5   
4     Men          759          5   

                                         Description PrimaryColor  
0  Black and grey printed medium trolley bag, sec...        Black  
1  Beige & Grey made to measure kurta with churid...        Beige  
2  Pink coloured wash 5-pocket high-rise cropped ...         Pink  
3  Blue self-design band

In [5]:
# most expensive product by gender ( because we do not have a cateogory coloumn)

most_expensive = data.loc[data.groupby('Gender')['Price (INR)'].idxmax()]
print(most_expensive[['Gender', 'ProductName', 'Price (INR)']])

           Gender                                        ProductName  \
9111         Boys  ADIDAS Boys Blue X 19.3 Firm Ground J Football...   
787         Girls            Pink Cow Girls Blue Fit and Flare Dress   
1568          Men             MOVADO Men Black Analogue Watch 607243   
7416       Unisex  Garmin Unisex Blue Forerunner 945 Smartwatch 0...   
3351  Unisex Kids  FirstClap Kids Brown Camouflage Printed Regula...   
1376        Women            MOVADO Women Blue Analogue Watch 607268   

      Price (INR)  
9111         3999  
787          3800  
1568        58854  
7416        63090  
3351         1799  
1376        56192  


In [6]:
features = data[['Gender', 'Price (INR)', 'PrimaryColor', 'NumImages']].copy()
features.fillna('Unknown', inplace=True) # handinling misssing values

# encoding catetogriccal variables (One-Hot Encoding)

encoder = OneHotEncoder()
encoded = encoder.fit_transform(features[['Gender', 'PrimaryColor']])

numerical = features[['Price (INR)', 'NumImages']].values

# Combineing Encoded Categorical Data with Numerical Data

combined_features = np.hstack((encoded.toarray(), numerical))

print(combined_features)


[[0.0000e+00 0.0000e+00 0.0000e+00 ... 0.0000e+00 1.1745e+04 7.0000e+00]
 [0.0000e+00 0.0000e+00 0.0000e+00 ... 0.0000e+00 5.8100e+03 7.0000e+00]
 [0.0000e+00 0.0000e+00 0.0000e+00 ... 0.0000e+00 8.9900e+02 7.0000e+00]
 ...
 [0.0000e+00 1.0000e+00 0.0000e+00 ... 0.0000e+00 6.0200e+02 4.0000e+00]
 [0.0000e+00 0.0000e+00 1.0000e+00 ... 1.0000e+00 8.9500e+03 2.0000e+00]
 [0.0000e+00 0.0000e+00 1.0000e+00 ... 0.0000e+00 7.9900e+02 5.0000e+00]]


In [7]:
# cosine similarity

similarity_matrix = cosine_similarity(combined_features)

In [8]:
# recommending similar products

def recommend(product_index, top_n=5):
    scores = list(enumerate(similarity_matrix[product_index]))
    scores = sorted(scores, key=lambda x: x[1], reverse=True)
    top_indices = [i[0] for i in scores[1:top_n+1]]
    return data.iloc[top_indices][['ProductName', 'Gender', 'Price (INR)', 'PrimaryColor']]

# ex: Recommending similar products to the first item
recommend(0)

Unnamed: 0,ProductName,Gender,Price (INR),PrimaryColor
2535,Calvin Klein Unisex Black Large Trolley Bag,Unisex,11880,Black
273,DKNY Unisex Black & Grey Printed Cabin Trolley...,Unisex,10800,Black
4906,Calvin Klein Unisex Black & White Printed Larg...,Unisex,12690,Black
7161,SPRAY GROUND Unisex Black & Grey Geometric Bac...,Unisex,8104,Black
7359,SPRAY GROUND Unisex Brown & Black Geometric Ba...,Unisex,8104,Black


In [9]:
# color VS price analysis

color_price = data.groupby('PrimaryColor')['Price (INR)'].mean().sort_values(ascending=False)
most_expensive_color = color_price.idxmax()
print(f"Most expensive color: {most_expensive_color}")

Most expensive color:  Silver


## Conclusion :