In [None]:
import numpy as np
import pandas as pd
import os
import tensorflow as tf
from tensorflow.keras.layers import Conv2D, MaxPooling2D, GlobalAveragePooling2D, Activation, Dropout, Flatten, Dense, Input, Layer
from tensorflow.keras.applications import VGG16
from tensorflow.keras.models import Model, Sequential
from tensorflow.keras.preprocessing.image import ImageDataGenerator, load_img, img_to_array
import matplotlib.pyplot as plt
import seaborn as sns

plt.rcParams['font.size'] = 16

In [None]:
path = '/kaggle/input/fashion-product-images-dataset/fashion-dataset/images'
images_df = pd.read_csv("/kaggle/input/fashion-product-images-dataset/fashion-dataset/images.csv")
styles_df = pd.read_csv("/kaggle/input/fashion-product-images-dataset/fashion-dataset/styles.csv", on_bad_lines='skip')

In [None]:
images_df.head()

In [None]:
styles_df.head()

In [None]:
styles_df['filename'] = styles_df['id'].astype(str) + '.jpg'

In [None]:
styles_df

In [None]:
image_files = os.listdir(path)

In [None]:
styles_df['present'] = styles_df['filename'].apply(lambda x: x in image_files)

In [None]:
styles_df

In [None]:
styles_df = styles_df[styles_df['present']].reset_index(drop=True)

In [None]:
styles_df = styles_df.sample(10000)

In [None]:
styles_df

In [None]:
img_size = 224
datagen = ImageDataGenerator(rescale=1/255.) 
generator = datagen.flow_from_dataframe(dataframe=styles_df,
                                        directory=path,
                                        target_size=(img_size,img_size),
                                        x_col='filename',
                                        class_mode=None,
                                        batch_size=32,
                                        shuffle=False,
                                        classes=None)

In [None]:
base_model = VGG16(include_top=False, input_shape=(img_size,img_size,3))

for layer in base_model.layers:
    layer.trainable = False
    
input_layer = Input(shape=(img_size,img_size,3))
x = base_model(input_layer)
output = GlobalAveragePooling2D()(x)

embeddings = Model(inputs=input_layer, outputs=output)
embeddings.summary()

In [None]:
X = embeddings.predict(generator, verbose=1)

In [None]:
from sklearn.decomposition import PCA

In [None]:
pca = PCA(2)
X_pca = pca.fit_transform(X)

In [None]:
styles_df[['pc1','pc2']] = X_pca

In [None]:
plt.figure(figsize=(20,8))
sns.scatterplot(x='pc1',y='pc2', data=styles_df, hue='masterCategory')
plt.show()

In [None]:
def read_img(image_path):
    image = load_img(os.path.join(path,image_path),target_size=(img_size,img_size,3))
    image = img_to_array(image)
    image = image/255.
    return image

In [None]:
import random
from sklearn.neighbors import KNeighborsClassifier

In [None]:
y = styles_df['id']

In [None]:
nearest_neighbours = KNeighborsClassifier(n_neighbors=7)
nearest_neighbours.fit(X,y)

In [None]:
styles_df = styles_df.reset_index(drop=True)

In [None]:
X.shape

In [None]:
for _ in range(10):
    i = random.randint(0,len(styles_df))
    img1 = read_img(styles_df.loc[i,'filename'])
    dist, index = nearest_neighbours.kneighbors(X=X[i,:].reshape(1,-1))
    plt.figure(figsize = (4 , 4))
    plt.imshow(img1)
    plt.title("Input Image")
    plt.axis('off')
    
    plt.figure(figsize = (20 , 20))
    for i in range(1,6):
        plt.subplot(1 , 5, i)
        plt.subplots_adjust(hspace = 0.5 , wspace = 0.3)
        image = read_img(styles_df.loc[index[0][i],'filename'])
        plt.imshow(image)
        plt.title(f'Similar Product #{i}')
        plt.axis('off')