In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
from sklearn.neighbors import NearestNeighbors

%matplotlib inline

### Read the shoe data

In [2]:
shoe_data = pd.read_csv('Shoes.csv',encoding = "ISO-8859-1")

In [3]:
x = shoe_data['Gender']
x.value_counts()

Women     11100
Men        7253
Girls      2659
Boys       2546
Womens     1008
Mens        595
Unisex       96
Name: Gender, dtype: int64

### Filters the Data by Women, Men, Girls, Boys and Unisex

In [4]:
# Women
Women_Data = shoe_data[(shoe_data['Gender']=='Women') | 
                       (shoe_data['Gender']=='Womens')] 
Women_Data['Gender'].value_counts()
Women_Shoes = Women_Data['Row'] # number for the shoes

# Men
Men_Data = shoe_data[(shoe_data['Gender']=='Men') | 
                       (shoe_data['Gender']=='Mens')] 
Men_Data['Gender'].value_counts()
Men_Shoes = Men_Data['Row']

# Girls
Girls_Data = shoe_data[(shoe_data['Gender']=='Girls')] 
Girls_Data['Gender'].value_counts()
Girls_Shoes = Girls_Data['Row']

# Boys
Boys_Data = shoe_data[(shoe_data['Gender']=='Boys')] 
Boys_Data['Gender'].value_counts()
Boys_Shoes = Boys_Data['Row']

# Unisex
Unisex_Data = shoe_data[(shoe_data['Gender']=='Unisex')] 
Unisex_Data['Gender'].value_counts()
Unisex_Shoes = Unisex_Data['Row']

### Read the Feature Data

In [5]:
df = pd.read_csv('features.csv')
df.tail()

Unnamed: 0,shoe_number,0,1,2,3,4,5,6,7,8,...,4086,4087,4088,4089,4090,4091,4092,4093,4094,4095
25253,25253,0.0,0.0,2.822442,0.0,0.0,1.100513,0.0,3.466933,0.0,...,0.0,2.238948,0.0,2.618712,0.0,0.0,0.0,2.934636,0.0,1.678181
25254,25254,0.0,1.06385,0.183473,1.444844,0.0,0.563876,0.0,0.558887,0.0,...,0.0,0.492249,0.0,3.365807,0.0,0.0,0.0,2.263286,0.0,0.0
25255,25255,0.050072,0.0,2.465157,0.0,0.0,0.282674,0.0,0.0,0.0,...,0.0,0.197933,0.0,3.513182,0.0,2.358734,0.0,5.324767,0.0,0.630338
25256,25256,0.0,0.0,3.47569,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.531141,0.0,5.769824,0.0,0.0,0.0,4.421978,0.0,2.824965
25257,25257,0.0,0.0,0.173585,0.0,0.0,0.0,0.0,5.833847,0.0,...,0.0,2.430494,0.0,5.79744,0.0,1.900185,0.0,2.878265,0.0,0.0


In [6]:
# uses the shoe number as the index
df.set_index('shoe_number',inplace=True)

In [7]:
Women_Features = df.loc[Women_Shoes]
Men_Features = df.loc[Men_Shoes]
Girls_Features = df.loc[Girls_Shoes]
Boys_Features = df.loc[Boys_Shoes]
Unisex_Features = df.loc[Unisex_Shoes]

# Women_Features.head()

### Build the Model

In [8]:
eucl_W = NearestNeighbors(n_neighbors=4,metric='euclidean').fit(Women_Features)
eucl_M = NearestNeighbors(n_neighbors=4,metric='euclidean').fit(Men_Features)
eucl_G = NearestNeighbors(n_neighbors=4,metric='euclidean').fit(Girls_Features)
eucl_B = NearestNeighbors(n_neighbors=4,metric='euclidean').fit(Boys_Features)
eucl_U = NearestNeighbors(n_neighbors=4,metric='euclidean').fit(Unisex_Features)

### Fit the Model

In [9]:
distances, indices_W = eucl_W.kneighbors(Women_Features)

In [10]:
distances, indices_M = eucl_M.kneighbors(Men_Features)

In [11]:
distances, indices_G = eucl_G.kneighbors(Girls_Features)
distances, indices_B = eucl_B.kneighbors(Boys_Features)
distances, indices_U = eucl_U.kneighbors(Unisex_Features)

### Converts the Indices to Shoe Number

In [12]:
Data_Wom = pd.DataFrame(np.array(Women_Features.index[indices_W]))
Data_Men = pd.DataFrame(np.array(Men_Features.index[indices_M]))
Data_Gir = pd.DataFrame(np.array(Girls_Features.index[indices_G]))
Data_Boy = pd.DataFrame(np.array(Boys_Features.index[indices_B]))
Data_Uni = pd.DataFrame(np.array(Unisex_Features.index[indices_U]))

### Concatenates the Data and Saves the csv

In [13]:
Final_Data = pd.concat([Data_Wom,Data_Men,Data_Gir,Data_Boy,Data_Uni], ignore_index=True)

In [14]:
Final_Data.sort_values(by=0,inplace=True)
Final_Data.reset_index(inplace=True)
del Final_Data['index']
Final_Data.head()

Unnamed: 0,0,1,2,3
0,1,15789,12704,517
1,2,5385,9,3695
2,3,12865,3376,3130
3,4,21493,25038,3124
4,5,1349,16917,1338


In [15]:
Final_Data.to_csv('Final_Output.csv')