In [1]:
import pandas as pd
import numpy as np
import math
import matplotlib.pyplot as plt
%matplotlib inline
from skimage.io import imread

# Prepare Data

In [20]:
data = pd.read_excel('exercise.xlsx',head=True)

In [21]:
data = data.drop(columns="id")
data

Unnamed: 0,gender,short,color
0,1,1,3
1,1,0,5
2,1,1,4
3,0,1,0
4,0,0,1


In [4]:
product = np.array(data)

# Cosine Similarity

In [5]:
from math import sqrt
def cosine_similarity(v1,v2):
    sumxx,sumxy,sumyy = 0,0,0
    for i in range(len(v1)):
        x = v1[i]
        y = v2[i]
        
        sumxx += x*x
        sumyy += y*y
        sumxy += x*y
    return round(sumxy/math.sqrt(sumxx*sumyy),2)

In [6]:
# Test of cosine_similarity function
cosine_similarity(product[1],product[2])

0.97

# KNN 

In [7]:
def knn(product,k):
    neighbors = {}
    for i in range(len(product)):
        dist = {}
        for j in range(len(product)):
            dist[j] = cosine_similarity(product[i],product[j])
        neighbors[i] = sorted(dist.items(),key=lambda x:x[1],reverse=True)[1:k+1]
        print('Product ' + str(i) +'s neighbors is: ' + str(neighbors[i] ))

In [18]:
product

array([[1, 1, 3],
       [1, 0, 5],
       [1, 1, 4],
       [0, 1, 0],
       [0, 0, 1]], dtype=int64)

In [22]:
knn(product,3)

Product 0s neighbors is: [(2, 0.99), (1, 0.95), (4, 0.9)]
Product 1s neighbors is: [(4, 0.98), (2, 0.97), (0, 0.95)]
Product 2s neighbors is: [(0, 0.99), (1, 0.97), (4, 0.94)]
Product 3s neighbors is: [(0, 0.3), (2, 0.24), (1, 0.0)]
Product 4s neighbors is: [(1, 0.98), (2, 0.94), (0, 0.9)]


# Read picture features

## Load Images

In [9]:
for i in range(5):
    file_name = 'Shirt\\'+str(i)+'.png'

    image = imread(file_name)
    print(image.shape)

(177, 118, 4)
(215, 178, 4)
(215, 143, 4)
(196, 157, 4)
(196, 111, 4)


## Drop Alpha Chanel 

In [10]:
for i in range(5):
    file_name = 'Shirt\\'+str(i)+'.png'

    image = imread(file_name)[:,:,:3]
    print(image.shape)

(177, 118, 3)
(215, 178, 3)
(215, 143, 3)
(196, 157, 3)
(196, 111, 3)


## Resize 

In [11]:
min_height = 177
min_width = 111

In [12]:
images = {}
for i in range(5):
    file_name = 'Shirt\\'+str(i)+'.png'
    image = imread(file_name)[:,:,:3]
    width = len(image[0])
    start = (width-min_width)//2
    image = image[:min_height,start:start+min_width,:]
    images[i] = image
    print(image.shape)

(177, 111, 3)
(177, 111, 3)
(177, 111, 3)
(177, 111, 3)
(177, 111, 3)


##  Mean Pixel Value Features

In [13]:
products_image = []
for key in images:
    feature_matrix = np.zeros((177,111))
    image = images[key]

    for i in range(image.shape[0]):
        for j in range(image.shape[1]):
            feature_matrix[i][j] = int((int(image[i,j,0]) + int(image[i,j,1]) + int(image[i,j,2]))/3)
    images[key] = np.reshape(feature_matrix,(177,111))
    products_image.append(images[key][0])

In [14]:
products_image

[array([202., 202., 203., 203., 203., 202., 202., 203., 203., 203., 204.,
        204., 204., 206., 206., 206., 206., 207., 207., 208., 208., 209.,
        209., 209., 209., 209., 209., 209., 209., 212., 212., 212., 212.,
        213., 213., 215., 215., 137., 105., 126., 141., 136., 115.,  97.,
        124., 116., 135., 155., 167., 172., 168., 150., 130., 132., 104.,
         69., 113., 124., 152., 158., 151., 154.,  82.,  49., 207., 219.,
        207., 216., 202., 210., 209., 209., 210., 210., 211., 212., 212.,
        210., 210., 209., 209., 209., 208., 208., 208., 209., 209., 208.,
        208., 208., 207., 207., 207., 206., 206., 206., 206., 205., 205.,
        205., 204., 201., 201., 202., 202., 202., 202., 203., 203., 201.,
        201.]),
 array([218., 218., 218., 218., 218., 218., 218., 218., 218., 218., 218.,
        218., 218., 218., 218., 218., 218., 218., 218., 218., 218., 218.,
        218., 218., 218., 218., 218., 218., 218., 218., 218., 224., 219.,
        214., 213., 21

# Products_image KNN 

In [23]:
knn(products_image,3)

Product 0s neighbors is: [(1, 0.98), (4, 0.98), (2, 0.97)]
Product 1s neighbors is: [(4, 1.0), (0, 0.98), (3, 0.97)]
Product 2s neighbors is: [(0, 0.97), (4, 0.96), (1, 0.95)]
Product 3s neighbors is: [(1, 0.97), (4, 0.97), (0, 0.95)]
Product 4s neighbors is: [(4, 1.0), (0, 0.98), (3, 0.97)]


# Combined 

In [16]:
products_combined = np.concatenate((product,products_image),axis=1)
products_combined

array([[  1.,   1.,   3., 202., 202., 203., 203., 203., 202., 202., 203.,
        203., 203., 204., 204., 204., 206., 206., 206., 206., 207., 207.,
        208., 208., 209., 209., 209., 209., 209., 209., 209., 209., 212.,
        212., 212., 212., 213., 213., 215., 215., 137., 105., 126., 141.,
        136., 115.,  97., 124., 116., 135., 155., 167., 172., 168., 150.,
        130., 132., 104.,  69., 113., 124., 152., 158., 151., 154.,  82.,
         49., 207., 219., 207., 216., 202., 210., 209., 209., 210., 210.,
        211., 212., 212., 210., 210., 209., 209., 209., 208., 208., 208.,
        209., 209., 208., 208., 208., 207., 207., 207., 206., 206., 206.,
        206., 205., 205., 205., 204., 201., 201., 202., 202., 202., 202.,
        203., 203., 201., 201.],
       [  1.,   0.,   5., 218., 218., 218., 218., 218., 218., 218., 218.,
        218., 218., 218., 218., 218., 218., 218., 218., 218., 218., 218.,
        218., 218., 218., 218., 218., 218., 218., 218., 218., 218., 218.,
     

In [17]:
knn(products_combined,2)

Product 0s neighbors is: [(1, 0.98), (4, 0.98)]
Product 1s neighbors is: [(4, 1.0), (0, 0.98)]
Product 2s neighbors is: [(0, 0.97), (4, 0.96)]
Product 3s neighbors is: [(1, 0.97), (4, 0.97)]
Product 4s neighbors is: [(4, 1.0), (0, 0.98)]


# Add Scalar to Combine 

In [24]:
from sklearn import preprocessing
combine_scaled = preprocessing.scale(products_combined)

In [25]:
knn(combine_scaled,2)

Product 0s neighbors is: [(1, 0.64), (2, -0.26)]
Product 1s neighbors is: [(0, 0.64), (4, -0.36)]
Product 2s neighbors is: [(4, 0.06), (3, -0.1)]
Product 3s neighbors is: [(4, 0.17), (2, -0.1)]
Product 4s neighbors is: [(3, 0.17), (2, 0.06)]
