In [None]:
import cv2
import pandas as pd
import numpy as np
import os
import matplotlib.pyplot as plt
import tensorflow as tf
import tensorflow.keras as tfk
import tensorflow_datasets as tfds
import keras
from keras import layers
from keras.preprocessing import image
tfkl = tfk.layers

In [None]:
path = "/kaggle/input/fashion-product-images-dataset/fashion-dataset/fashion-dataset/"
print(os.listdir(path))

In [None]:
df = pd.read_csv(path + "styles.csv", nrows=5000, error_bad_lines=False)
df['image'] = df.apply(lambda row: str(row['id']) + ".jpg", axis=1)
df = df.reset_index(drop=True)
df.head(10)

In [None]:
df.shape

### **We will use the product image 15970.jpg as for example**

In [None]:
test_id = 15970

## Step1. Retrieval

Here I directly use the product of the same subcategory, but in our real case

In [None]:
def retrieval(df,image_id):
    selected_rows = df.loc[df['image'] == str(image_id)+'.jpg']
    sub_cat = selected_rows['subCategory'].iloc[0]
    gender = selected_rows['gender'].iloc[0]
    retr_dt = df.loc[(df['subCategory']==sub_cat) & (df['gender']==gender)]#retrived dataset
    return retr_dt

In [None]:
retr_dt = retrieval(df,test_id)
retr_dt.head(5)

## Step2. Ranking based on similarity of product images

### read in images and make plot

In [None]:
def read_image(image_id):
    img = str(image_id)+'.jpg'
    img = cv2.imread(path+"images/"+str(img))
    #print(img.shape)
    if img.shape != (2400,1800,3):
        img = image.load_img(path+"images/"+str(image_id)+'.jpg', target_size=(2400,1800,3))
        img = image.img_to_array(img)
    return img

In [None]:
def plot_image(image_id):
    img = str(image_id)+'.jpg'
    img = cv2.imread(path+"images/"+str(img))
    # If directly use cv2.imshow(img)m, the color is in wrong order
    b,g,r = cv2.split(img)
    frame_rgb = cv2.merge((r,g,b))
    plt.imshow(frame_rgb)

In [None]:
plot_image(test_id)

#### instance segmentation
Some pictures are with models while others are not, and perhaps some photos does not have a clear background. Therefore, it's important to apply instance segmentation for identifying the part of product image.

Unfortunately, the pre-trained model Resnet used the COCO dataset, which is generally for custom items (e.g person, car, etc), not specifically for clothes image segmentation, and we lack the training dataset to specify the part of clothes/products. 

Therefore, the rough solution here is: use ResNet to identify if there's a person. If there is a model, we would recommend similar products which are always with models, and select some product images without models for add-up.

**image-processing difficulties & Future Work:**

(1) semantic segmentation *(eliminate the background/models, only keep products)*

(2) resize picture matricies

(3) cope with image distortion / different filming angles

(3) compute similarities

In [None]:
#from imageai.Detection import ObjectDetection

In [None]:
def with_without_model(test_id):
    execution_path = "/kaggle/input/fashion-product-images-dataset/fashion-dataset/images/"
    detector = ObjectDetection()
    detector.setModelTypeAsRetinaNet()
    detector.setModelPath("/kaggle/input/imageai/resnet50_coco_best_v2.0.1.h5")
    detector.loadModel()
    
    detections = detector.detectObjectsFromImage(input_image=os.path.join(execution_path,str(test_id)+".jpg"), output_image_path=os.path.join(os.getcwd(),str(test_id)+".jpg"))
    
    for eachObject in detections:
        if eachObject["name"]=='person' and eachObject["percentage_probability"]>50:
            return 1
        else:
            continue
    return 0

There's a dataset ***model_dat*** which is generated from the process of mapping the function ***with_without_model*** to every image.

In [None]:
#with_without_model(test_id)

### image embedding

In [None]:
from keras.applications.resnet50 import ResNet50

tfkl = tfk.layers

In [None]:
#remember the input_shape set for this model is in_shape, which is a tuple, so the image should be resized
def build_model(in_shape,high_d=True):
    #build model for embedding
    resnet_base = ResNet50(weights='imagenet', 
                      include_top=False, 
                      input_shape = in_shape)
    resnet_base.trainable = False
    
    model = tfk.Sequential()
    model.add(resnet_base)
    if high_d==True:
        model.add(tfkl.GlobalMaxPooling2D()) #add layer embedding
    else:
        model.add(tfkl.Dense(100, activation=tf.nn.relu))
    
    print(model.summary())
    return model

model.compile(
    optimizer=tfk.optimizers.RMSprop(),
    loss=tfk.losses.CategoricalCrossentropy(),
    metrics=["acc"]
)

results = model.fit(ds_train, batch_size=32, steps_per_epoch=30, epochs=20,verbose=1)

In [None]:
in_shape = [2400,1800,3]
model = build_model(tuple(in_shape))

we have to reshape the images, because not every image is in the same shape

In [None]:
img = read_image(test_id)
emb = model.predict(img.reshape(tuple([1]+in_shape))) #(1, 2400, 1800, 3)
# emb
## convert the shape (1,2048) to (2048,)
emb = emb.reshape(-1)

emb.shape
emb

In [None]:
def get_embedding(mod, image_name, in_shape):
    # Reshape and load image
    img = image.load_img(path+"images/"+str(image_name), target_size=in_shape)
    img = image.img_to_array(img)
    ## img = cv2.imread(path+"images/"+str(image_name))
    return mod.predict(img.reshape(tuple([1]+in_shape))).reshape(-1)

Attach a column *embedding* to store image embedding for every photo. 

In [None]:
numRows = df.shape[0]
numCols = 2048 #representing dimensions for embedding, see the output dim of model
emb_matrix = pd.DataFrame(index=range(numRows),columns=range(numCols))

The output embedding for every image is 2048, is there a curse of dimensionality?

No problem arises, and 2048 dimension works better than 100 dimension.

In [None]:
%%time
# Compute every image's embedding in df, and attach it as a column
for r in range(0,df.shape[0]):
    im = df['image'][r]
    emb = get_embedding(model,im,in_shape)
    emb_matrix.iloc[r,:]=emb

In [None]:
emb_matrix.head(5)

In [None]:
#store emb_matrix，instead as a ram
emb_matrix.to_csv("emb_matrix.csv",index=False)

In [None]:
emb_store = pd.concat([emb_matrix, df[["image","id"]]],axis=1,ignore_index=False)

In [None]:
emb_store.to_csv("emb_store.csv",index=True)

### compute similarity for all retrieved images

In [None]:
#retr_dt.index
dt = emb_store.loc[retr_dt.index,]
dt.head(5)

In [None]:
def compute_similarity(dt,test_id):
    dt.index = dt["id"].apply(str)
    dt["sim"] = np.nan
    try:
        dt = dt.drop(["image","id"],axis=1)
    except:
        dt = dt
    target_vec = dt.loc[dt.index==str(test_id)]
    target_vec = list(target_vec.iloc[0,0:2048])
    #again, 2048 represents dimensions for embedding, see the output dim of model
    
    from scipy import spatial
    for i in dt.index:
        vec = dt.loc[dt.index==i, dt.columns!="sim"]
        vec = list(vec.iloc[0,:])
        cosine_similarity = 1 - spatial.distance.cosine(target_vec, vec)
        dt.loc[dt.index==i,"sim"] = round(cosine_similarity,3)
    
    sort_dt = dt.sort_values('sim',ascending=False)
    
    return sort_dt

In [None]:
sorted_dat = compute_similarity(dt, test_id)
sorted_dat.head(5)

In [None]:
#plot the top 10 recommendations for test_id
fig=plt.figure(figsize=(10, 10))
columns = 5
rows = 2
i = 1
for img in sorted_dat.iloc[0:10,i].index:
    im = cv2.imread(path+"images/"+str(img)+".jpg")
    b,g,r = cv2.split(im)
    frame_rgb = cv2.merge((r,g,b))
    fig.add_subplot(rows, columns, i)
    plt.imshow(frame_rgb)
    i+=1
plt.show()

**Add-up note, but not necessary:**

https://scikit-learn.org/stable/modules/generated/sklearn.metrics.pairwise_distances.html
from sklearn.metrics.pairwise import pairwise_distances

Calculate distance Matrix
cosine_sim = 1-pairwise_distances(dt.loc[:,dt.columns!="sim"], metric='cosine')
cosine_sim[:4, :4]

## Try another test_id

In [None]:
test_id_2 = 58183

In [None]:
retr_dt_2 = retrieval(df,test_id_2)
retr_dt_2.head(5)

In [None]:
plot_image(test_id_2)

In [None]:
in_shape_2 = [2400,1800,3]
model_2 = build_model(tuple(in_shape_2))

In [None]:
img_2 = read_image(test_id_2)
emb_2 = model_2.predict(img_2.reshape(tuple([1]+in_shape_2))) #(1, 2400, 1800, 3)
# emb
## convert the shape (1,2048) to (2048,)
emb_2 = emb_2.reshape(-1)

emb_2.shape
emb_2

In [None]:
dt_2 = emb_store.loc[retr_dt_2.index,]
dt_2.head(5)

In [None]:
sorted_dat_2 = compute_similarity(dt_2, test_id_2)
sorted_dat_2.head(10)

In [None]:
#plot the top 10 recommendations for test_id
fig=plt.figure(figsize=(10, 10))
columns = 5
rows = 2
i = 1
for img in sorted_dat_2.iloc[0:10,i].index:
    im = cv2.imread(path+"images/"+str(img)+".jpg")
    b,g,r = cv2.split(im)
    frame_rgb = cv2.merge((r,g,b))
    fig.add_subplot(rows, columns, i)
    plt.imshow(frame_rgb)
    i+=1
plt.show()