# Fashion Product Recommendation using Multimodal Data

In [11]:
from mpl_toolkits.mplot3d import Axes3D
from sklearn.preprocessing import StandardScaler
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
import numpy as np 
import pandas as pd
import seaborn as sn
import ast
import os
import cv2
import warnings
warnings.filterwarnings(action = 'ignore')

In [12]:
DATASET_PATH = "/home/arj/TriSem3/CaseStudy/DatasetCaseStudy/myntradataset"
print(os.listdir(DATASET_PATH))

['styles.csv', 'images']


In [13]:
DATASET_PATH = "/home/arj/TriSem3/CaseStudy/DatasetCaseStudy/myntradataset"
df = pd.read_csv(DATASET_PATH+"/styles.csv",nrows = 5000,on_bad_lines = "skip")
df.head()

Unnamed: 0,id,gender,masterCategory,subCategory,articleType,baseColour,season,year,usage,productDisplayName
0,15970,Men,Apparel,Topwear,Shirts,Navy Blue,Fall,2011,Casual,Turtle Check Men Navy Blue Shirt
1,39386,Men,Apparel,Bottomwear,Jeans,Blue,Summer,2012,Casual,Peter England Men Party Blue Jeans
2,59263,Women,Accessories,Watches,Watches,Silver,Winter,2016,Casual,Titan Women Silver Watch
3,21379,Men,Apparel,Bottomwear,Track Pants,Black,Fall,2011,Casual,Manchester United Men Solid Black Track Pants
4,53759,Men,Apparel,Topwear,Tshirts,Grey,Summer,2012,Casual,Puma Men Grey T-shirt


In [14]:
df['image'] = df.apply(lambda row:str(row['id'])+".jpg",axis = 1).reset_index(drop = True)
df.head()

Unnamed: 0,id,gender,masterCategory,subCategory,articleType,baseColour,season,year,usage,productDisplayName,image
0,15970,Men,Apparel,Topwear,Shirts,Navy Blue,Fall,2011,Casual,Turtle Check Men Navy Blue Shirt,15970.jpg
1,39386,Men,Apparel,Bottomwear,Jeans,Blue,Summer,2012,Casual,Peter England Men Party Blue Jeans,39386.jpg
2,59263,Women,Accessories,Watches,Watches,Silver,Winter,2016,Casual,Titan Women Silver Watch,59263.jpg
3,21379,Men,Apparel,Bottomwear,Track Pants,Black,Fall,2011,Casual,Manchester United Men Solid Black Track Pants,21379.jpg
4,53759,Men,Apparel,Topwear,Tshirts,Grey,Summer,2012,Casual,Puma Men Grey T-shirt,53759.jpg


In [15]:
def plot_figures(figures,nrows = 1,ncols = 1,figsize = (8,8)):
    fig,axeslist = plt.subplots(ncols = ncols,nrows = nrows,figsize = figsize)
    for index,title in enumerate(figures):
        axeslist.ravel()[index].imshow(cv2.cvtColor(figures[title], cv2.COLOR_BGR2RGB))
        axeslist.ravel()[index].set_title(title)
        axeslist.ravel()[index].set_axis_off()
        
    plt.tight_layout()
    
def img_path(img):
    DATASET_PATH = '/home/arj/TriSem3/CaseStudy/DatasetCaseStudy/myntradataset'
    return DATASET_PATH+"/images/"+img

def load_image(img, resized_fac = 0.1):
    img     = cv2.imread(img_path(img))
    w, h, _ = img.shape
    resized = cv2.resize(img, (int(h*resized_fac), int(w*resized_fac)), interpolation = cv2.INTER_AREA)
    return resized   

Feature extraction

In [16]:
import tensorflow as tf
import keras
from keras import Model
from tensorflow.keras.applications.resnet50 import ResNet50
from tensorflow.keras.applications.vgg16 import VGG16
from tensorflow.keras.applications.vgg19 import VGG19
from tensorflow.keras.preprocessing import image
from tensorflow.keras.applications.resnet50 import preprocess_input, decode_predictions
from tensorflow.keras.applications.vgg16 import preprocess_input, decode_predictions
from tensorflow.keras.applications.vgg19 import preprocess_input, decode_predictions
from tensorflow.keras.layers import GlobalMaxPooling2D
tf.__version__

'2.16.1'

In [17]:
img_width,img_height,_ = 224,224,3
model_1 = ResNet50(weights = 'imagenet',
                   include_top = False,
                   input_shape = (img_width,img_height,3))
model_1.trainable = False

# Adding Embedding Layer
model_1 = tf.keras.models.Sequential([model_1,GlobalMaxPooling2D()])

model_1.summary()

In [18]:
def get_embedding(model,img_name):
    
    img = image.load_img(img_path(img_name),target_size = (224,224))
    x = image.img_to_array(img)
    x = np.expand_dims(x,axis = 0)
    x = preprocess_input(x)
    return model.predict(x).reshape(-1)

In [19]:
emb = get_embedding(model_1, df.iloc[0].image)
emb.shape

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 3s/step


(2048,)

In [20]:
%%time

df_sample = df.copy()
map_embeddings = df_sample['image'].apply(lambda img:get_embedding(model_1,img))
df_embds = map_embeddings.apply(pd.Series)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 163ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 148ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 169ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 156ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 157ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 161ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 271ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 249ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 254ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 289ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 212ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 221ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 297ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m 

In [None]:
df_embds.head()

NameError: name 'df_embds' is not defined

Cosine similarity and other matrix

In [None]:
from sklearn.metrics.pairwise import pairwise_distances

cosine_sim = 1-pairwise_distances(df_embds, metric='cosine')
cosine_sim[:4, :4]

In [None]:
%%time
def get_recommendations(indices,cosine_sim,index,df,top_n = 5):
    sim_index = indices[index]
    sim_scores = list(enumerate(cosine_sim[sim_index]))
    sim_scores = sorted(sim_scores,key = lambda x:x[1],reverse = True)
    sim_scores = sim_scores[1:top_n+1]
    index_rec = [i[0] for i in sim_scores]
    index_sim = [i[1] for i in sim_scores]
    
    return indices.iloc[index_rec].index,index_sim


cosine_sim = 1-pairwise_distances(df_embds, metric='cosine')
indices = pd.Series(range(len(df)),index = df.index)
mean_sim_scores = []
complete_indices = []
complete_scores = []
for index in range(df.shape[0]):    
    try:
        index,sim_array = get_recommendations(indices,cosine_sim,index,df,top_n = 5)
        mean_score = np.mean(sim_array)
        mean_sim_scores.append(mean_score)
        complete_indices.append(index)
        complete_scores.append(sim_array)
    except Exception as ex:
        print(f'Following exception : {ex} occured at the index : {index}')

In [None]:
a = [list(i) for i in complete_indices]
sub_cat = [df['subCategory'].iloc[a[index]].to_list() for index in range(df.shape[0])]
mast_cat = [df['masterCategory'].iloc[a[index]].to_list() for index in range(df.shape[0])]
resnet_50 = pd.DataFrame()
resnet_50['id'] = df['id']
resnet_50['recommended_index'] = [list(i) for i in complete_indices]
resnet_50['recommended_scores'] = complete_scores
resnet_50['masterCategory'] = df['masterCategory']
resnet_50['subCategory'] = df['subCategory']
resnet_50['Recommended_master_category'] = mast_cat
resnet_50['Recommended_sub_category'] = sub_cat
resnet_50['mean_recommended_score'] = mean_sim_scores
resnet_50.head(2)

In [None]:
def getmatch_score(resnet_50,col,col1):
    
    match_list = []
    for index in range(resnet_50.shape[0]):
        try:
            actual_match = resnet_50[col].iloc[index]
            predicted_match = resnet_50[col1].iloc[index]
            count = 0
            for item in predicted_match:
                if item==actual_match:
                    count+=1
            match_list.append((count/5)*100)
        except Exception as ex:
            print(f'Following exception : {ex} occured at index : {index}')
    
    return match_list

resnet_50['Match_Master_Score'] = getmatch_score(resnet_50,'masterCategory','Recommended_master_category')
resnet_50['Match_Sub_Score'] = getmatch_score(resnet_50,'subCategory','Recommended_sub_category')
resnet_50.head(2)

In [None]:
resnet_50.head(2)

In [None]:
sn.distplot(resnet_50['mean_recommended_score'])

vgg-16


In [None]:
img_width,img_height,_ = 224,224,3
model_2 = VGG16(weights = 'imagenet',
                   include_top = False,
                   input_shape = (img_width,img_height,3))
model_2.trainable = False

# Adding Embedding Layer
model_2 = tf.keras.models.Sequential([model_2,GlobalMaxPooling2D()])

model_2.summary()

In [None]:
vgg_emb = get_embedding(model_2, df.iloc[0].image)
vgg_emb.shape

In [None]:
%%time

df_sample = df.copy()
map_embeddings = df_sample['image'].apply(lambda img:get_embedding(model_2,img))
df_embds_vgg16 = map_embeddings.apply(pd.Series)

recommendation using vgg-16

In [None]:
%%time
def get_recommendations(indices,cosine_sim,index,df,top_n = 5):
    sim_index = indices[index]
    sim_scores = list(enumerate(cosine_sim[sim_index]))
    sim_scores = sorted(sim_scores,key = lambda x:x[1],reverse = True)
    sim_scores = sim_scores[1:top_n+1]
    index_rec = [i[0] for i in sim_scores]
    index_sim = [i[1] for i in sim_scores]
    
    return indices.iloc[index_rec].index,index_sim


cosine_sim = 1-pairwise_distances(df_embds_vgg16, metric='cosine')
indices = pd.Series(range(len(df)),index = df.index)
mean_sim_scores_vgg = []
complete_indices_vgg = []
complete_scores_vgg = []
for index in range(df.shape[0]):    
    try:
        index,sim_array = get_recommendations(indices,cosine_sim,index,df,top_n = 5)
        mean_score = np.mean(sim_array)
        mean_sim_scores_vgg.append(mean_score)
        complete_indices_vgg.append(index)
        complete_scores_vgg.append(sim_array)
    except Exception as ex:
        print(f'Following exception : {ex} occured at the index : {index}')

In [None]:
a = [list(i) for i in complete_indices_vgg]
sub_cat = [df['subCategory'].iloc[a[index]].to_list() for index in range(df.shape[0])]
mast_cat = [df['masterCategory'].iloc[a[index]].to_list() for index in range(df.shape[0])]
vgg_16 = pd.DataFrame()
vgg_16['id'] = df['id']
vgg_16['recommended_index'] = [list(i) for i in complete_indices_vgg]
vgg_16['recommended_scores'] = complete_scores_vgg
vgg_16['masterCategory'] = df['masterCategory']
vgg_16['subCategory'] = df['subCategory']
vgg_16['Recommended_master_category'] = mast_cat
vgg_16['Recommended_sub_category'] = sub_cat
vgg_16['mean_recommended_score'] = mean_sim_scores_vgg
vgg_16.head(2)

In [None]:
def getmatch_score(data,col,col1):
    
    match_list = []
    for index in range(data.shape[0]):
        try:
            actual_match = data[col].iloc[index]
            predicted_match = data[col1].iloc[index]
            count = 0
            for item in predicted_match:
                if item==actual_match:
                    count+=1
            match_list.append((count/5)*100)
        except Exception as ex:
            print(f'Following exception : {ex} occured at index : {index}')
    
    return match_list

vgg_16['Match_Master_Score'] = getmatch_score(vgg_16,'masterCategory','Recommended_master_category')
vgg_16['Match_Sub_Score'] = getmatch_score(vgg_16,'subCategory','Recommended_sub_category')
vgg_16.head(2)

In [None]:
sn.distplot(vgg_16['mean_recommended_score'])