![](https://miro.medium.com/max/1400/1*gYe2FMr9lKys2Wmo1v-s7A.jpeg)

# 1. Importing Libraries

In [1]:
import os
import cv2
import numpy as np
import pandas as pd
from sklearn.utils import shuffle
from sklearn.preprocessing import LabelBinarizer
from keras.applications.xception import Xception,preprocess_input
import tensorflow as tf
from keras.preprocessing import image
from keras.layers import Input
from keras.backend import reshape
from sklearn.neighbors import NearestNeighbors
import matplotlib.pyplot as plt

In [2]:
import pandas as pd
df  = pd.read_csv('../input/h-and-m-personalized-fashion-recommendations/transactions_train.csv',
                            usecols= ['t_dat', 'customer_id', 'article_id'], 
                            dtype={'article_id': 'string', 't_dat': 'string', 'customer_id': 'string'})

In [3]:
from datetime import datetime
df['t_dat'] = pd.to_datetime(df['t_dat'])
df['week'] = datetime(2020, 9, 22) - df['t_dat']
df['week'] = (df['week'].dt.days / 7).astype('int')
df['week'] = 104 - df['week']

In [4]:
df[df['week'] == 102]['t_dat'].min()

In [5]:
df[df['week'] == 102]['t_dat'].max()

In [7]:
training_articles = df[df['week'] == 102]['article_id'].value_counts().index[0:1000]

In [8]:
training_articles

In [5]:
predicting_df = df.groupby('article_id')['week'].min()

In [24]:
predicting_df[predicting_df == 103].index

In [6]:
training_articles = predicting_df[predicting_df == 102].index

In [7]:
predicting_articles = predicting_df[predicting_df == 103].index

In [30]:
predicting_articles

# 2. Loading and Preprocessing of Data

In [8]:
images_dir = '../input/h-and-m-personalized-fashion-recommendations/images'

In [9]:
image_list = getImagePaths(images_dir)

In [9]:
def getImagePaths(path, training_articles):
    """
    Function to Combine Directory Path with individual Image Paths
    
    parameters: path(string) - Path of directory
    returns: image_names(string) - Full Image Path
    """
    image_names = []
    for dirname, _, filenames in os.walk(path):
        for filename in filenames:
            fullpath = os.path.join(dirname, filename)
            if filename.split('.jpg')[0] in training_articles:
                image_names.append(fullpath)
    return image_names

def preprocess_img(img_path):
    dsize = (225,225)
    new_image=cv2.imread(img_path)
    new_image=cv2.resize(new_image,dsize,interpolation=cv2.INTER_NEAREST)  
    new_image=np.expand_dims(new_image,axis=0)
    new_image=preprocess_input(new_image)
    return new_image

def load_data(training_articles):
    output=[]
    output=getImagePaths(images_dir, training_articles)
    return output

# 3. Defining model and extracting feature for all the training data

In [10]:
def model():
    model=Xception(weights='imagenet',include_top=False)
    for layer in model.layers:
        layer.trainable=False
        #model.summary()
    return model

def feature_extraction(image_data,model):
    features=model.predict(image_data)
    features=np.array(features)
    features=features.flatten()
    return features

# 4. Finding the similar image through LSH and cosine similarity

In [11]:
def result_vector_cosine(model,feature_vector,new_img):
    new_feature = model.predict(new_img)
    new_feature = np.array(new_feature)
    new_feature = new_feature.flatten()
    N_result = 12
    nbrs = NearestNeighbors(n_neighbors=N_result, metric="cosine").fit(feature_vector)
    distances, indices = nbrs.kneighbors([new_feature])
    return (distances, indices)

In [42]:
inputs[0:2]

# 5. Result
## Vector Cosine

In [12]:
def input_show(data):
    plt.title("Query Image")
    plt.imshow(data)
  
def show_result(data, result, scores, target_week):
    temp = df[df['week'] == target_week]
    fig = plt.figure(figsize=(12,8))
    for i in range(0,12):
        index_result=result[0][i]
        article = data[index_result].split('/')[-1].split('.jpg')[0]
        print(i, article,temp[temp['article_id'] == article].shape[0], scores[0][i] )
        plt.subplot(3,4,i+1)
        plt.imshow(cv2.imread(data[index_result]))
    plt.show()

  

In [57]:
result.shape

In [16]:
  
features=[]
inputs=load_data(predicting_articles)
outputs=load_data(training_articles)
main_model=model()
#Limiting the data for training
for i in inputs:
    new_img=preprocess_img(i)
    features.append(feature_extraction(new_img,main_model))
feature_vec = np.array(features)


In [19]:
predicting_articles

In [20]:
df[(df['week'] == 102)&(df['article_id'].isin(training_articles))]['article_id'].value_counts().index[0:1000]

In [26]:
for i, f in enumerate(outputs):
    if "0933989001" in f:
        print(i)

In [22]:
i = 157
scores, result=result_vector_cosine(main_model,feature_vec,preprocess_img(outputs[i]))
input_show(cv2.imread(outputs[i]))
show_result(inputs, result, scores, 103)

In [25]:
i = 233
scores, result=result_vector_cosine(main_model,feature_vec,preprocess_img(outputs[i]))
input_show(cv2.imread(outputs[i]))
show_result(inputs, result, scores, 103)

In [27]:
i = 716
scores, result=result_vector_cosine(main_model,feature_vec,preprocess_img(outputs[i]))
input_show(cv2.imread(outputs[i]))
show_result(inputs, result, scores, 103)