In [1]:
import pandas as pd
import pickle
from sklearn.neighbors import NearestNeighbors
from scipy.sparse import csr_matrix

In [2]:
# Load datasets
myntra = pd.read_csv('processed_data/clean_myntra.csv')
ratings = pd.read_csv('ratings.csv')

In [3]:
ratings.head()

Unnamed: 0.1,Unnamed: 0,userId,product_id,Rating
0,0,3269,0,4.3
1,1,4575,0,3.95
2,2,3844,0,3.72
3,3,1260,0,3.98
4,4,5298,0,4.37


In [4]:
# Drop unnecessery column
ratings.drop('Unnamed: 0', axis=1, inplace=True)

In [5]:
ratings

Unnamed: 0,userId,product_id,Rating
0,3269,0,4.30
1,4575,0,3.95
2,3844,0,3.72
3,1260,0,3.98
4,5298,0,4.37
...,...,...,...
2419073,4732,8770,4.48
2419074,4117,8770,4.33
2419075,5459,8770,4.19
2419076,818,8770,4.78


In [6]:
# Merge ratings dataset with myntra using product_id
myntra_merge_ratings = ratings.merge(myntra, on="product_id")
myntra_merge_ratings.head(1)

Unnamed: 0.1,userId,product_id,Rating,Unnamed: 0,size,brand,dominant_material,title,actual_color,dominant_color,...,variant_compare_at_price,ideal_for,inventory,Pre_Inventory,preprocess_body,img1,img2,img3,img4,corpusData
0,3269,0,4.3,0,XL,IMARA,Polyester,IMARA Women Black Solid Top,Black,Black,...,1599,Women,,,black solid woven regular top has a v neck thr...,http://assets.myntassets.com/v1/assets/images/...,http://assets.myntassets.com/v1/assets/images/...,http://assets.myntassets.com/v1/assets/images/...,http://assets.myntassets.com/v1/assets/images/...,IMARA Women Black Solid Top Black solid woven ...


In [7]:
myntra_merge_ratings.drop('Unnamed: 0', axis=1, inplace=True)

In [8]:
myntra_merge_ratings.head(1)

Unnamed: 0,userId,product_id,Rating,size,brand,dominant_material,title,actual_color,dominant_color,product_type,...,variant_compare_at_price,ideal_for,inventory,Pre_Inventory,preprocess_body,img1,img2,img3,img4,corpusData
0,3269,0,4.3,XL,IMARA,Polyester,IMARA Women Black Solid Top,Black,Black,Top,...,1599,Women,,,black solid woven regular top has a v neck thr...,http://assets.myntassets.com/v1/assets/images/...,http://assets.myntassets.com/v1/assets/images/...,http://assets.myntassets.com/v1/assets/images/...,http://assets.myntassets.com/v1/assets/images/...,IMARA Women Black Solid Top Black solid woven ...


In [9]:

# Total Number of counts on each products
ratingCount = (myntra_merge_ratings.groupby(by=['title'])['Rating'].count().reset_index().rename(columns={'Rating': "TotalCountRating"})[["title", "TotalCountRating"]])

In [10]:
ratingCount

Unnamed: 0,title,TotalCountRating
0,Jaipur Kurti Women Blue Printed Maxi Flared S...,270
1,20Dresses Women Black & Blue Printed Maxi Dress,292
2,A.T.U.N All Things Uber Nice Girls Mustard Yel...,269
3,AASI - HOUSE OF NAYO Women Blue & Rust Orange ...,284
4,AASI - HOUSE OF NAYO Women Navy Blue Solid Fit...,301
...,...,...
5570,studio rasa Black & Gold-Coloured Hand Block-P...,283
5571,studio rasa Navy Blue & Off-White Handblock Pr...,278
5572,studio rasa Women Gold-Coloured Embroidered Fl...,309
5573,studio rasa Women Navy-Blue & Green Flared Max...,282


In [11]:

# Average product rating on each product
averageRating = (myntra_merge_ratings.groupby(by=['title'])['Rating'].mean().reset_index().rename(columns={'Rating': "AverageRatingCount"})[["title", "AverageRatingCount"]])

In [12]:
averageRating

Unnamed: 0,title,AverageRatingCount
0,Jaipur Kurti Women Blue Printed Maxi Flared S...,4.266667
1,20Dresses Women Black & Blue Printed Maxi Dress,4.226096
2,A.T.U.N All Things Uber Nice Girls Mustard Yel...,4.258141
3,AASI - HOUSE OF NAYO Women Blue & Rust Orange ...,4.241092
4,AASI - HOUSE OF NAYO Women Navy Blue Solid Fit...,4.213821
...,...,...
5570,studio rasa Black & Gold-Coloured Hand Block-P...,4.258481
5571,studio rasa Navy Blue & Off-White Handblock Pr...,4.266259
5572,studio rasa Women Gold-Coloured Embroidered Fl...,4.231942
5573,studio rasa Women Navy-Blue & Green Flared Max...,4.247092


In [13]:
# Find all the product index with ratings
titles = ratingCount['title']
idx = []

for title in titles:
    new_df = myntra[myntra['title'] == title]
    idx.append(new_df.index)

prod_idx = []
for i in idx:
    prod_idx.append(i[0])
len(prod_idx)

5575

In [14]:
# remove all products without ratings
myntra = myntra.iloc[prod_idx]
myntra.drop('Unnamed: 0', axis=1, inplace=True)
myntra.reset_index(drop=True, inplace=True)
myntra

Unnamed: 0,product_id,size,brand,dominant_material,title,actual_color,dominant_color,product_type,body,product_details,...,variant_compare_at_price,ideal_for,inventory,Pre_Inventory,preprocess_body,img1,img2,img3,img4,corpusData
0,525,34,Jaipur Kurti,cotton,Jaipur Kurti Women Blue Printed Maxi Flared S...,Blue,Blue,Maxi Flared Skirt,"Blue woven printed maxi flared skirt, has an e...","Blue woven printed maxi flared skirt, has an e...",...,2299,Women,15,15,blue woven printed maxi flared skirt has an el...,http://assets.myntassets.com/v1/assets/images/...,http://assets.myntassets.com/v1/assets/images/...,http://assets.myntassets.com/v1/assets/images/...,http://assets.myntassets.com/v1/assets/images/...,Jaipur Kurti Women Blue Printed Maxi Flared S...
1,4769,M,20Dresses,Viscose Rayon,20Dresses Women Black & Blue Printed Maxi Dress,Black | Blue,Black,Maxi Dress,"Black and blue printed woven maxi dress, has a...","Black and blue printed woven maxi dress, has a...",...,1895,Women,Knit or Woven : Woven | Transparency : Opaque ...,knit or woven woven transparency opaque shape ...,black and blue printed woven maxi dress has a ...,http://assets.myntassets.com/v1/assets/images/...,http://assets.myntassets.com/v1/assets/images/...,http://assets.myntassets.com/v1/assets/images/...,http://assets.myntassets.com/v1/assets/images/...,20Dresses Women Black & Blue Printed Maxi Dres...
2,4147,1-2Y,A.T.U.N All Things Uber Nice,polyester,A.T.U.N All Things Uber Nice Girls Mustard Yel...,Yellow | Maroon | Mustard,Mustard,Lehenga Choli with Dupatta,Mustard yellow and maroon lehenga choli with d...,Mustard yellow and maroon lehenga choli with d...,...,2999,Girls,Dupatta Fabric : Net | Occasion : Festive | Nu...,dupatta fabric net occasion festive number of ...,mustard yellow and maroon lehenga choli with d...,http://assets.myntassets.com/v1/assets/images/...,http://assets.myntassets.com/v1/assets/images/...,http://assets.myntassets.com/v1/assets/images/...,http://assets.myntassets.com/v1/assets/images/...,A.T.U.N All Things Uber Nice Girls Mustard Yel...
3,2782,S,AASI - HOUSE OF NAYO,Cotton,AASI - HOUSE OF NAYO Women Blue & Rust Orange ...,Blue | Rust | Orange,Blue,Maxi Dress,"Blue, rust orange and white printed woven maxi...","Blue, rust orange and white printed woven maxi...",...,1699,Women,Knit or Woven : Woven | Transparency : Opaque ...,knit or woven woven transparency opaque shape ...,blue rust orange and white printed woven maxi ...,http://assets.myntassets.com/v1/assets/images/...,http://assets.myntassets.com/v1/assets/images/...,http://assets.myntassets.com/v1/assets/images/...,http://assets.myntassets.com/v1/assets/images/...,AASI - HOUSE OF NAYO Women Blue & Rust Orange ...
4,6202,L,AASI - HOUSE OF NAYO,Cotton,AASI - HOUSE OF NAYO Women Navy Blue Solid Fit...,Blue | Navy,Navy,Fit & Flare Dress,Navy Blue and mustard yellow solid woven fit a...,Navy Blue and mustard yellow solid woven fit a...,...,1299,Women,Knit or Woven : Woven | Transparency : Opaque ...,knit or woven woven transparency opaque shape ...,navy blue and mustard yellow solid woven fit a...,http://assets.myntassets.com/v1/assets/images/...,http://assets.myntassets.com/v1/assets/images/...,http://assets.myntassets.com/v1/assets/images/...,http://assets.myntassets.com/v1/assets/images/...,AASI - HOUSE OF NAYO Women Navy Blue Solid Fit...
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5570,1740,M,studio rasa,Chanderi,studio rasa Black & Gold-Coloured Hand Block-P...,Black,Black,Ready to Wear Lehenga with Blouse,Black and gold-toned embroidered lehenga choli...,Black and gold-toned embroidered lehenga choli...,...,4999,Women,Occasion : Festive | Number of Components : 2 ...,occasion festive number of components 2 blouse...,black and gold toned embroidered lehenga choli...,http://assets.myntassets.com/v1/assets/images/...,http://assets.myntassets.com/v1/assets/images/...,http://assets.myntassets.com/v1/assets/images/...,http://assets.myntassets.com/v1/assets/images/...,studio rasa Black & Gold-Coloured Hand Block-P...
5571,1455,S,studio rasa,Dupion,studio rasa Navy Blue & Off-White Handblock Pr...,Blue | Navy,Navy,Ready to Wear Lehenga with Blouse,Navy Blue and off-white handblock print leheng...,Navy Blue and off-white handblock print leheng...,...,4599,Women,Occasion : Festive | Number of Components : 2 ...,occasion festive number of components 2 blouse...,navy blue and off white handblock print leheng...,http://assets.myntassets.com/v1/assets/images/...,http://assets.myntassets.com/v1/assets/images/...,http://assets.myntassets.com/v1/assets/images/...,http://assets.myntassets.com/v1/assets/images/...,studio rasa Navy Blue & Off-White Handblock Pr...
5572,7407,Onesize,studio rasa,Dupion,studio rasa Women Gold-Coloured Embroidered Fl...,,,Maxi Skirt,Gold-coloured woven maxi skirt with embroidere...,Gold-coloured woven maxi skirt with embroidere...,...,4299,Women,Knit or Woven : Woven | Transparency : Opaque ...,knit or woven woven transparency opaque closur...,gold coloured woven maxi skirt with embroidere...,http://assets.myntassets.com/v1/assets/images/...,http://assets.myntassets.com/v1/assets/images/...,http://assets.myntassets.com/v1/assets/images/...,http://assets.myntassets.com/v1/assets/images/...,studio rasa Women Gold-Coloured Embroidered Fl...
5573,5797,Onesize,studio rasa,Silk,studio rasa Women Navy-Blue & Green Flared Max...,Green,Green,Maxi Skirt,Navy blue and green woven flared maxi skirt wi...,Navy blue and green woven flared maxi skirt wi...,...,2999,Women,10,10,navy blue and green woven flared maxi skirt wi...,http://assets.myntassets.com/v1/assets/images/...,http://assets.myntassets.com/v1/assets/images/...,http://assets.myntassets.com/v1/assets/images/...,http://assets.myntassets.com/v1/assets/images/...,studio rasa Women Navy-Blue & Green Flared Max...


In [15]:
myntra_with_ratings = myntra.merge(averageRating, on='title')
myntra_with_ratings.to_csv('processed_data/myntra_with_ratings.csv')

In [16]:
# merge totalRating and average Rating
rating_df = ratingCount.merge(averageRating, on="title")
rating_df

Unnamed: 0,title,TotalCountRating,AverageRatingCount
0,Jaipur Kurti Women Blue Printed Maxi Flared S...,270,4.266667
1,20Dresses Women Black & Blue Printed Maxi Dress,292,4.226096
2,A.T.U.N All Things Uber Nice Girls Mustard Yel...,269,4.258141
3,AASI - HOUSE OF NAYO Women Blue & Rust Orange ...,284,4.241092
4,AASI - HOUSE OF NAYO Women Navy Blue Solid Fit...,301,4.213821
...,...,...,...
5570,studio rasa Black & Gold-Coloured Hand Block-P...,283,4.258481
5571,studio rasa Navy Blue & Off-White Handblock Pr...,278,4.266259
5572,studio rasa Women Gold-Coloured Embroidered Fl...,309,4.231942
5573,studio rasa Women Navy-Blue & Green Flared Max...,282,4.247092


In [17]:
# merge ratings with myntra_merge_ratings
main_data = rating_df.merge(myntra_merge_ratings, on="title")
main_data.head(2)

Unnamed: 0,title,TotalCountRating,AverageRatingCount,userId,product_id,Rating,size,brand,dominant_material,actual_color,...,variant_compare_at_price,ideal_for,inventory,Pre_Inventory,preprocess_body,img1,img2,img3,img4,corpusData
0,Jaipur Kurti Women Blue Printed Maxi Flared S...,270,4.266667,3321,525,3.62,34,Jaipur Kurti,cotton,Blue,...,2299,Women,15,15,blue woven printed maxi flared skirt has an el...,http://assets.myntassets.com/v1/assets/images/...,http://assets.myntassets.com/v1/assets/images/...,http://assets.myntassets.com/v1/assets/images/...,http://assets.myntassets.com/v1/assets/images/...,Jaipur Kurti Women Blue Printed Maxi Flared S...
1,Jaipur Kurti Women Blue Printed Maxi Flared S...,270,4.266667,3233,525,4.66,34,Jaipur Kurti,cotton,Blue,...,2299,Women,15,15,blue woven printed maxi flared skirt has an el...,http://assets.myntassets.com/v1/assets/images/...,http://assets.myntassets.com/v1/assets/images/...,http://assets.myntassets.com/v1/assets/images/...,http://assets.myntassets.com/v1/assets/images/...,Jaipur Kurti Women Blue Printed Maxi Flared S...


In [18]:
# Create pivot matrix
pivot_Matrix = main_data.pivot_table(index='title', columns='userId', values='Rating').fillna(0)
pivot_Matrix

userId,1,2,3,4,5,6,7,8,9,10,...,7990,7991,7992,7993,7994,7995,7996,7997,7998,7999
title,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Jaipur Kurti Women Blue Printed Maxi Flared Skirt,0.0,0.0,0.0,0.0,0.0,0.0,0.00,0.0,0.00,0.0,...,4.87,0.0,0.00,0.0,0.0,0.00,0.00,0.0,0.00,0.00
20Dresses Women Black & Blue Printed Maxi Dress,0.0,0.0,0.0,0.0,0.0,0.0,0.00,0.0,0.00,0.0,...,0.00,0.0,0.00,0.0,0.0,0.00,0.00,0.0,0.00,0.00
A.T.U.N All Things Uber Nice Girls Mustard Yellow & Maroon Lehenga Choli with Dupatta,0.0,0.0,0.0,0.0,0.0,0.0,0.00,0.0,0.00,0.0,...,0.00,0.0,0.00,0.0,0.0,3.63,0.00,0.0,0.00,3.87
AASI - HOUSE OF NAYO Women Blue & Rust Orange Printed Maxi Dress,0.0,0.0,0.0,0.0,0.0,0.0,0.00,0.0,0.00,0.0,...,0.00,0.0,0.00,0.0,0.0,0.00,0.00,0.0,0.00,0.00
AASI - HOUSE OF NAYO Women Navy Blue Solid Fit & Flare Dress,0.0,0.0,0.0,0.0,0.0,0.0,0.00,0.0,0.00,0.0,...,0.00,0.0,0.00,0.0,0.0,0.00,0.00,4.2,0.00,0.00
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
studio rasa Black & Gold-Coloured Hand Block-Printed Ready to Wear Lehenga with Blouse,0.0,0.0,0.0,0.0,0.0,0.0,0.00,0.0,0.00,0.0,...,0.00,0.0,0.00,0.0,0.0,0.00,3.79,0.0,0.00,0.00
studio rasa Navy Blue & Off-White Handblock Print Ready to Wear Lehenga with Blouse,0.0,0.0,0.0,0.0,0.0,0.0,0.00,0.0,0.00,0.0,...,0.00,0.0,3.93,0.0,0.0,0.00,0.00,0.0,0.00,0.00
studio rasa Women Gold-Coloured Embroidered Flared Maxi Skirt,0.0,0.0,0.0,0.0,0.0,0.0,0.00,0.0,0.00,0.0,...,0.00,0.0,0.00,0.0,0.0,0.00,0.00,0.0,0.00,0.00
studio rasa Women Navy-Blue & Green Flared Maxi Skirt,0.0,0.0,0.0,0.0,0.0,0.0,3.85,0.0,4.28,0.0,...,0.00,0.0,0.00,0.0,0.0,0.00,0.00,0.0,3.93,0.00


In [19]:
main_matrix = csr_matrix(pivot_Matrix.values)

model_knn = NearestNeighbors(metric="cosine", algorithm="brute")
model_knn.fit(main_matrix)

In [20]:
# Find distances and indices for every products
distances, indices = model_knn.kneighbors(pivot_Matrix.iloc[0:].values, n_neighbors=7)

In [21]:
indices

array([[   0, 3107, 1581, ..., 2785, 2509,  405],
       [   1, 5469, 3161, ..., 4181,  589, 3415],
       [   2, 4960, 2563, ..., 4250, 3061, 2900],
       ...,
       [5572, 4589, 1400, ..., 3429, 4094, 2168],
       [5573, 3483, 3145, ..., 1796, 5280, 4030],
       [5574, 4045,  393, ..., 3634,  555, 5477]], dtype=int64)

In [22]:
# Dump pickle files
pickle.dump(pivot_Matrix.index, open('pickle_files/pivot_matrix.pkl', 'wb'))
pickle.dump(indices, open('pickle_files/filtered_indices.pkl', 'wb'))