In [3]:
import pandas as pd
import numpy as np
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.metrics.pairwise import cosine_similarity

In [4]:
sephora_df=pd.read_csv(r"D:\Gorian\Data science basics\Portfolio\sephora_website_dataset.csv",low_memory=False)
sephora_df.head(3)

Unnamed: 0,id,brand,category,name,size,rating,number_of_reviews,love,price,value_price,...,MarketingFlags,MarketingFlags_content,options,details,how_to_use,ingredients,online_only,exclusive,limited_edition,limited_time_offer
0,2218774,Acqua Di Parma,Fragrance,Blu Mediterraneo MINIATURE Set,5 x 0.16oz/5mL,4.0,4,3002,66.0,75.0,...,True,online only,no options,This enchanting set comes in a specially handc...,Suggested Usage:-Fragrance is intensified by t...,Arancia di Capri Eau de Toilette: Alcohol Dena...,1,0,0,0
1,2044816,Acqua Di Parma,Cologne,Colonia,0.7 oz/ 20 mL,4.5,76,2700,66.0,66.0,...,True,online only,- 0.7 oz/ 20 mL Spray - 1.7 oz/ 50 mL Eau d...,An elegant timeless scent filled with a fresh-...,no instructions,unknown,1,0,0,0
2,1417567,Acqua Di Parma,Perfume,Arancia di Capri,5 oz/ 148 mL,4.5,26,2600,180.0,180.0,...,True,online only,- 1oz/30mL Eau de Toilette - 2.5 oz/ 74 mL E...,Fragrance Family: Fresh Scent Type: Fresh Citr...,no instructions,Alcohol Denat.- Water- Fragrance- Limonene- Li...,1,0,0,0


###### Selecting features

In [5]:
sephora_df.columns

Index(['id', 'brand', 'category', 'name', 'size', 'rating',
       'number_of_reviews', 'love', 'price', 'value_price', 'URL',
       'MarketingFlags', 'MarketingFlags_content', 'options', 'details',
       'how_to_use', 'ingredients', 'online_only', 'exclusive',
       'limited_edition', 'limited_time_offer'],
      dtype='object')

In [6]:
features=sephora_df[['brand', 'category', 'name']]

In [7]:
def combine_features(row):
    return row['brand'] + ' ' + row['category'] + ' ' + row['name']

In [8]:
features['combined_features'] = features.apply(combine_features, axis=1)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  """Entry point for launching an IPython kernel.


In [9]:
features=features[['name','combined_features']]

In [51]:
features

Unnamed: 0_level_0,name,combined_features
name,Unnamed: 1_level_1,Unnamed: 2_level_1
Blu Mediterraneo MINIATURE Set,Blu Mediterraneo MINIATURE Set,Acqua Di Parma Fragrance Blu Mediterraneo MINI...
Colonia,Colonia,Acqua Di Parma Cologne Colonia
Arancia di Capri,Arancia di Capri,Acqua Di Parma Perfume Arancia di Capri
Mirto di Panarea,Mirto di Panarea,Acqua Di Parma Perfume Mirto di Panarea
Colonia Miniature Set,Colonia Miniature Set,Acqua Di Parma Fragrance Colonia Miniature Set
...,...,...
The Rose Gold Mask,The Rose Gold Mask,SEPHORA COLLECTION Face Masks The Rose Gold Mask
Give Me Some Sugar Colorful Gloss Balm Set,Give Me Some Sugar Colorful Gloss Balm Set,SEPHORA COLLECTION Lip Sets Give Me Some Sugar...
Weekend Warrior Tone Up Cream,Weekend Warrior Tone Up Cream,SEPHORA COLLECTION Tinted Moisturizer Weekend ...
Gift Card,Gift Card,SEPHORA COLLECTION no category Gift Card


In [36]:
#
features.index = features['name']

In [49]:
features.index

Index(['Blu Mediterraneo MINIATURE Set', 'Colonia', 'Arancia di Capri',
       'Mirto di Panarea', 'Colonia Miniature Set', 'Fico di Amalfi',
       'Rosa Nobile', 'Colonia Essenza', 'Peonia Nobile Hair Mist',
       'Rosa Nobile Hair Mist',
       ...
       'The Sculptor - Airbrush Sponge',
       'Moisturizing Body Milk - Beach Vibes',
       'Fizzing Bath Star Mini - Surprise Flowers',
       'Face the Day: Full Face Brush Set',
       'Exfoliating Body Granita Scrub - Surprise Flowers',
       'The Rose Gold Mask', 'Give Me Some Sugar Colorful Gloss Balm Set',
       'Weekend Warrior Tone Up Cream', 'Gift Card',
       'Happy Birthday Gift Card'],
      dtype='object', name='name', length=9168)

###### Modeling

In [41]:
count = CountVectorizer()
count_matrix = count.fit_transform(features['combined_features'])

# generating the cosine similarity matrix
cosine_sim = cosine_similarity(count_matrix, count_matrix)

In [42]:
cosine_sim

array([[1.        , 0.47434165, 0.47140452, ..., 0.        , 0.        ,
        0.        ],
       [0.47434165, 1.        , 0.59628479, ..., 0.        , 0.        ,
        0.        ],
       [0.47140452, 0.59628479, 1.        , ..., 0.        , 0.        ,
        0.        ],
       ...,
       [0.        , 0.        , 0.        , ..., 1.        , 0.27216553,
        0.23570226],
       [0.        , 0.        , 0.        , ..., 0.27216553, 1.        ,
        0.8660254 ],
       [0.        , 0.        , 0.        , ..., 0.23570226, 0.8660254 ,
        1.        ]])

In [45]:
# creating a Series for the product names so they are associated to an ordered numerical
# list to be used in the function to match the indexes
indices = pd.Series(features.index)

#  defining the function that takes in product name 
# as input and returns the top 10 recommended products
def recommendations(name, cosine_sim = cosine_sim):
    
    # initializing the empty list of recommended products
    recommended_products = []
    
    # gettin the index of the product that matches the name
    idx = indices[indices == name].index[0]

    # creating a Series with the similarity scores in descending order
    score_series = pd.Series(cosine_sim[idx]).sort_values(ascending = False)

    # getting the indexes of the 10 most similar products
    top_10_indexes = list(score_series.iloc[1:11].index)
    
    # populating the list with the titles of the best 10 matching products
    for i in top_10_indexes:
        recommended_products.append(list(features.index)[i])
        
    return recommended_products

## Testing the model

In [48]:
recommendations('Pore Corrector Anti-Aging Primer')

['GENIUS Ultimate Anti-Aging Vitamin C+ Serum',
 'Anti-Aging Primer',
 'Lifting Primer Anti-Aging Serum',
 'Premier Cru Anti-Aging Serum',
 'Advanced Anti-Aging Repairing Oil',
 'Rejuvenating Anti-Aging Serum',
 'PERFECT Clarifying Pore Corrector Mask',
 'Plantscription™ Anti-Aging Power Serum',
 'Gel Primer',
 'CARAT FACE']

In [52]:
recommendations('Mimosa & Cardamom Candle')

['Grapefruit Candle',
 'Orange Blossom Candle',
 'Red Roses Candle',
 'Honeysuckle & Davana Home Candle',
 'Peony & Blush Suede Candle',
 'English Pear & Freesia Candle',
 'Red Roses Travel Candle',
 'English Oak & Redcurrant Candle',
 'Blackberry & Bay Travel Candle',
 'Nectarine Blossom & Honey\xa0Candle']