In [1]:
import warnings
warnings.filterwarnings('ignore')

import numpy as np
from numpy.linalg import norm
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline

from sklearn.decomposition import PCA
from sentence_transformers import SentenceTransformer
from sentence_transformers import util


import skillsnetwork

sns.set_context('notebook')
sns.set_style('white')

In [2]:
# This function will allow us to easily plot data taking in x values, y values, and a title
def plotter(x, y, title):
    plt.plot(x, y)
    plt.xlabel('X')
    plt.ylabel('Y')
    plt.title(title)
    plt.show()

In [3]:
df = pd.read_csv("final_perfume_data.csv", encoding="unicode_escape")
df.head()

Unnamed: 0,Name,Brand,Description,Notes,Image URL
0,Tihota Eau de Parfum,Indult,"Rapa Nui for sugar, Tihota is, quite simply, ...","Vanilla bean, musks",https://static.luckyscent.com/images/products/...
1,Sola Parfum,Di Ser,A tribute to the expanse of space extending f...,"Lavender, Yuzu, Lemongrass, Magnolia, Geraniu...",https://static.luckyscent.com/images/products/...
2,Kagiroi Parfum,Di Ser,An aromatic ode to the ancient beauty of Japa...,"Green yuzu, green shikuwasa, sansho seed, cor...",https://static.luckyscent.com/images/products/...
3,Velvet Fantasy Eau de Parfum,Montale,Velvet Fantasy is a solar fragrance where cit...,"tangerine, pink pepper, black coffee, leat...",https://static.luckyscent.com/images/products/...
4,A Blvd. Called Sunset Eau de Parfum,A Lab on Fire,There's no way A Lab On Fire could relocate t...,"Bergamot, almond, violet, jasmine, leather, s...",https://static.luckyscent.com/images/products/...


In [4]:
list(df.Notes[0:10])

[' Vanilla bean, musks',
 ' Lavender, Yuzu, Lemongrass, Magnolia, Geranium, Jasmine, Frankincense, Myrrh',
 ' Green yuzu, green shikuwasa, sansho seed, coriander, ylang-ylang, shiso, rosewood, vetiver, hinoki, cypriol, patchouli, agarwood',
 ' tangerine,  pink pepper,  black coffee,  leather,  violet,  jasmine,  lily of the valley,  heliotrope powder,  vanilla,  amber, sandalwood,  toffee,  musk,  oakmoss',
 ' Bergamot, almond, violet, jasmine, leather, sandalwood, vanilla, tonka',
 ' Orange flower, neroli, honeysuckle, warm milk, pastry, salicylates, sandalwood, vanilla bean, heliotrope',
 ' Timur JE, Soap Foam Accord (Aldehydes & Musk), Pink Pepper, Jasmine e-pure, Rose Superessence, Lily-of-the-valley Accord, Patchouli, Moss Absolute, Sandalwood Accord & Orcanox',
 ' Tobacco, hay, elemi, copaiba, olibanum, nutmeg, black pepper, castoreum, atlas cedar, oakmoss, cognac, ambroxan, norlimbanol, cinnamon, cumin seed, ash',
 ' Saffron, champaca, fir balsam, beeswax, amber, damask rose, ro

In [5]:
df.rename(columns={"ï»¿Name": "Name"}, inplace=True)
df['Name'] = df['Brand'] + " - " + df['Name']
df.drop(labels=['Description', 'Image URL', 'Brand'], axis=1, inplace=True)
df.head()

Unnamed: 0,Name,Notes
0,Indult - Tihota Eau de Parfum,"Vanilla bean, musks"
1,Di Ser - Sola Parfum,"Lavender, Yuzu, Lemongrass, Magnolia, Geraniu..."
2,Di Ser - Kagiroi Parfum,"Green yuzu, green shikuwasa, sansho seed, cor..."
3,Montale - Velvet Fantasy Eau de Parfum,"tangerine, pink pepper, black coffee, leat..."
4,A Lab on Fire - A Blvd. Called Sunset Eau de P...,"Bergamot, almond, violet, jasmine, leather, s..."


In [6]:
df.Notes.isnull().sum()

80

In [7]:
df.dropna(inplace = True)
df.reset_index(inplace=True, drop = True)
df.shape

(2111, 2)

In [8]:
words = ["Perfume Oil", "Extrait", "Travel", "Hair", "Body", "Hand", "Intense", "Intensivo", "Oil"] # check for these words in perfume names

index_to_drop = []
for index, name in enumerate(df.Name):
    if any(word.lower() in name.lower() for word in words):
        index_to_drop.append(index)

In [9]:
df.drop(index_to_drop, axis=0, inplace=True)
df.reset_index(inplace=True, drop = True)
df.shape

(1612, 2)

In [10]:
df.Notes = df.Notes.apply(lambda x: str(x))
notes = df.Notes.to_list()
len(notes)

1612

In [11]:
model = SentenceTransformer('all-MiniLM-L6-v2')

note_embeddings = model.encode(notes, show_progress_bar=True, batch_size=64)

Batches:   0%|          | 0/26 [00:00<?, ?it/s]

In [12]:
print(note_embeddings.shape)

print(note_embeddings[0][:50]) # first 50 values in the embedding of "Vanilla bean, musks"

(1612, 384)
[-0.00921524 -0.00588333  0.07965603  0.01108184  0.0920339  -0.06118878
  0.06980839  0.04193391  0.01312284 -0.00108816  0.06511044 -0.08008223
  0.01984981 -0.13829458 -0.02251236 -0.00748578  0.12410549  0.06448846
  0.00684044 -0.01724513  0.04117164  0.01706347  0.00854874  0.06719033
 -0.05611396  0.01932547  0.02498643 -0.02417995 -0.03028038 -0.12058594
 -0.01737692  0.03183731  0.01314924  0.02147919 -0.10208161  0.0320526
 -0.01535685 -0.02603945  0.06542445 -0.01513855  0.01592387 -0.06788438
  0.01501888 -0.01736868 -0.06789885 -0.01907123  0.01838118 -0.0566425
 -0.00130135 -0.01895572]


In [13]:
cosine_scores = util.cos_sim(note_embeddings, note_embeddings)
cosine_scores.shape

torch.Size([1612, 1612])

In [14]:
pairs = []

for i in range(len(cosine_scores)-1):
    for j in range(i+1, len(cosine_scores)):
        pairs.append({"index": [i,j], "score": cosine_scores[i][j]})

len(pairs)

1298466

In [15]:
sorted_pairs = sorted(pairs, key=lambda x: x['score'], reverse=True)

for pair in sorted_pairs[0:10]:
    i, j = pair['index']
    print(f"{df.iloc[i, 0]} | {df.iloc[j, 0]} \n Score: {pair['score']:.2f} \n")

Carthusia - Fiori di Capri Parfum | Carthusia - Fiori di Capri Eau de Parfum 
 Score: 1.00 

Carthusia - Mediterraneo Parfum | Carthusia - Mediterraneo Eau de Parfum 
 Score: 1.00 

Maison Francis Kurkdjian - Gentle fluidity Silver Eau de Parfum | Maison Francis Kurkdjian - gentle Fluidity Gold Eau de Parfum 
 Score: 1.00 

Comme des Garcons - 2 Candle | Comme des Garcons - 2 Eau de Parfum 
 Score: 1.00 

Roja Parfums - Elysium Parfum Cologne | Roja Parfums - Vetiver Parfum Cologne 
 Score: 1.00 

Juliette Has a Gun - Not A Perfume Superdose Eau de Parfum | Juliette Has a Gun - Not a Perfume Eau de Parfum 
 Score: 1.00 

Ormonde Jayne - Ormonde Elixir Parfum | Ormonde Jayne - Ormonde Woman Eau de Parfum 
 Score: 0.98 

PARFUMS DE NICOLAI - Incense Oud Eau de Parfum | PARFUMS DE NICOLAI - Oud Sublime Elixir de Parfum 
 Score: 0.97 

Ormonde Jayne - Ta'if Elixir Parfum | Ormonde Jayne - Ta'if Eau de Parfum 
 Score: 0.96 

J-Scent - Hisui (Jade) Eau de Parfum | J-Scent - Shaft of Light Ea

In [16]:
my_perfumes = pd.DataFrame([['Jo Malone - English Pear & Freesia', 'Pear, Melon, Freesia, Rose, Musk, Patchouli, Rhuburb, Amber'], 
                      ['Guerlain - Aqua Allegoria Nerolia Vetiver Eau de Toilette', 'Basil, Vetiver, Fig Accord, Neroli'],
                      ['Chloe Eau de Parfum', 'Peony, Litchi, Freesia, Rose, Lily-of-the-Valley, Magnolia, Virginia Cedar, Amber.']                     
                     ],
                   columns=df.columns)

my_perfumes

Unnamed: 0,Name,Notes
0,Jo Malone - English Pear & Freesia,"Pear, Melon, Freesia, Rose, Musk, Patchouli, R..."
1,Guerlain - Aqua Allegoria Nerolia Vetiver Eau ...,"Basil, Vetiver, Fig Accord, Neroli"
2,Chloe Eau de Parfum,"Peony, Litchi, Freesia, Rose, Lily-of-the-Vall..."


In [17]:
notes = list(my_perfumes.Notes)

model = SentenceTransformer('all-MiniLM-L6-v2')
my_embeddings = model.encode(notes, show_progress_bar=True)

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

In [18]:
cosine_scores = util.cos_sim(my_embeddings, note_embeddings)

In [19]:
my_pairs=[]

for i in range(cosine_scores.shape[0]):
    for j in range(cosine_scores.shape[1]):
        my_pairs.append({"index": [i,j], "score": cosine_scores[i][j]})
        
        
my_sorted_pairs = sorted(my_pairs, key=lambda x: x['score'], reverse=True)

In [20]:
for i in range(cosine_scores.shape[0]):

    print(f"Recommended for {my_perfumes.iloc[i, 0]}:")
    my_pairs = []
    for j in range(cosine_scores.shape[1]):
        my_pairs.append({"index": j, "score": cosine_scores[i][j]})
        my_sorted_pairs = sorted(my_pairs, key=lambda x: x['score'], reverse=True)
        
    for no, pair in enumerate(my_sorted_pairs[:5]):
        print(f" {no+1}. {df.iloc[pair['index'], 0]} (Score: {pair['score']:.2f})")
    print("\n")

Recommended for Jo Malone - English Pear & Freesia:
 1. Alexandre. J - Silver Ombre Eau de Parfum (Score: 0.80)
 2. Montale - Starry Nights Eau de Parfum (Score: 0.79)
 3. BDK Parfums - Bouquet de Hongrie Eau de Parfum (Score: 0.79)
 4. Jovoy Paris - Psychedelique Eau de Parfum (Score: 0.79)
 5. L'Artisan Parfumeur - Champ de Fleurs Eau de Cologne (Score: 0.78)


Recommended for Guerlain - Aqua Allegoria Nerolia Vetiver Eau de Toilette:
 1. Liquides Imaginaires - Tellus Eau de Parfum (Score: 0.61)
 2. BYREDO - Oud Immortel Eau de Parfum (Score: 0.61)
 3. Vilhelm Parfumerie - Basilico & Fellini Eau de Parfum (Score: 0.60)
 4. Nanadebary - Green Eau de Parfum (Score: 0.60)
 5. Reims Parfums - L'Eau de Reims (Score: 0.58)


Recommended for Chloe Eau de Parfum:
 1. Caron - French Cancan - Eau de Parfum (Score: 0.77)
 2. Ormonde Jayne - Frangipani Eau de Parfum (Score: 0.76)
 3. Floris London - 1962 Eau de Parfum (Score: 0.74)
 4. Parfums MDCI - Rose de Siwa Eau de Parfum (Score: 0.73)
 5. 

In [21]:
import pickle
with open('perfume_recommendation.sav', 'wb') as f:
    pickle.dump(model, f)
with open('perfume_recommendation.sav', 'rb') as f:
    model = pickle.load(f)


In [22]:
import pickle
from sentence_transformers import SentenceTransformer, util

# Load the saved model
with open('perfume_recommendation.sav', 'rb') as f:
    model = pickle.load(f)

# Get user input
user_perfume = 'Jo Malone - English Pear & Freesia'
user_notes = 'Pear, Melon, Freesia, Rose, Musk, Patchouli, Rhuburb, Amber'

# Encode user notes
user_embeddings = model.encode([user_notes], show_progress_bar=True)

# Calculate cosine similarity scores between user embeddings and dataset embeddings
cosine_scores = util.cos_sim(user_embeddings, note_embeddings)

# Sort the scores in descending order and recommend top 5 perfumes
my_pairs=[]
for j in range(cosine_scores.shape[1]):
    my_pairs.append({"index": j, "score": cosine_scores[0][j]})
my_sorted_pairs = sorted(my_pairs, key=lambda x: x['score'], reverse=True)

print(f"Recommended for {user_perfume}:")
for no, pair in enumerate(my_sorted_pairs[:5]):
    print(f" {no+1}. {df.iloc[pair['index'], 0]} (Score: {pair['score']:.2f})")


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Recommended for Jo Malone - English Pear & Freesia:
 1. Alexandre. J - Silver Ombre Eau de Parfum (Score: 0.80)
 2. Montale - Starry Nights Eau de Parfum (Score: 0.79)
 3. BDK Parfums - Bouquet de Hongrie Eau de Parfum (Score: 0.79)
 4. Jovoy Paris - Psychedelique Eau de Parfum (Score: 0.79)
 5. L'Artisan Parfumeur - Champ de Fleurs Eau de Cologne (Score: 0.78)
