In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.neighbors import NearestNeighbors

In [3]:
df = pd.read_csv('eyewear_data_full_v1.csv') 

In [4]:
df

Unnamed: 0,id,image_src,shape,lens_width,gender,color,face_shape,skin,width
0,7.146340e+12,https://cdn.shopify.com/s/files/1/0387/1417/96...,rectangle,55,male,gold,oval,dark,54
1,7.146340e+12,https://cdn.shopify.com/s/files/1/0387/1417/96...,rectangle,55,female,black,oval,dark,54
2,7.146340e+12,https://cdn.shopify.com/s/files/1/0387/1417/96...,rectangle,55,male,black,oval,dark,54
3,7.147830e+12,https://cdn.shopify.com/s/files/1/0387/1417/96...,rectangle,53,male,black,oval,dark,52
4,6.975850e+12,https://cdn.shopify.com/s/files/1/0387/1417/96...,rectangle,52,female,pink,oval,light,51
...,...,...,...,...,...,...,...,...,...
533,5.310250e+12,https://cdn.shopify.com/s/files/1/0387/1417/96...,aviator,51,male,black,round,light,52
534,5.310250e+12,https://cdn.shopify.com/s/files/1/0387/1417/96...,aviator,53,male,black,round,light,54
535,5.310260e+12,https://cdn.shopify.com/s/files/1/0387/1417/96...,aviator,56,male,gold,round,light,57
536,5.320210e+12,https://cdn.shopify.com/s/files/1/0387/1417/96...,aviator,56,male,gold,round,light,57


In [5]:
features = df[['gender', 'face_shape', 'skin', 'width']]

In [6]:
categorical_features = ['gender', 'face_shape', 'skin']
encoder = OneHotEncoder()
encoded_features = encoder.fit_transform(features[categorical_features]).toarray()

In [7]:
scaler = StandardScaler()
numerical_features = scaler.fit_transform(features[['width']])

In [8]:
all_features = np.hstack([encoded_features, numerical_features])

In [20]:
model = NearestNeighbors(n_neighbors=5)
model.fit(all_features)

In [21]:
def preprocess_item(item, encoder, scaler):
    item_df = pd.DataFrame([item])
    encoded_item = encoder.transform(item_df[categorical_features]).toarray()
    scaled_numerical_item = scaler.transform(item_df[['width']])
    return np.hstack([encoded_item, scaled_numerical_item])

In [43]:
new_user = {'gender': 'female', 'face_shape': 'heart', 'skin': 'light', 'width': 54}

In [44]:
new_user_features = preprocess_item(new_user, encoder, scaler).reshape(1, -1)

# Find the nearest neighbors for the new user
distances, indices = model.kneighbors(new_user_features)

In [45]:
# Get the index of the best match
best_match_index = indices[0]

# Find the best matching glasses for the new user
best_match = df.iloc[best_match_index]

In [46]:
print('Best matching glasses:')
for idx, row in best_match.iterrows():
    print(f"Image URL: {row['image_src']}")

Best matching glasses:
Image URL: https://cdn.shopify.com/s/files/1/0387/1417/9628/files/ORP1900849_1.jpg?v=1705277330
Image URL: https://cdn.shopify.com/s/files/1/0387/1417/9628/products/LSO1926615_1_9a43ecd1-cb4c-4af3-b9bb-d32eba384b83.jpg?v=1603958540
Image URL: https://cdn.shopify.com/s/files/1/0387/1417/9628/files/GG1476OK001_1.jpg?v=1689328302
Image URL: https://cdn.shopify.com/s/files/1/0387/1417/9628/products/ORP2100863_1.jpg?v=1627363005
Image URL: https://cdn.shopify.com/s/files/1/0387/1417/9628/files/ORP1800833_1.jpg?v=1705277234


In [47]:
import joblib
joblib.dump(model, 'nearest_neighbors_model.pkl')

# Save the encoder and scaler as well
joblib.dump(encoder, 'encoder.pkl')
joblib.dump(scaler, 'scaler.pkl')

['scaler.pkl']