In [1]:
#!pip install rake_nltk
#!pip install --upgrade pip

In [2]:
import nltk
import pandas as pd
import re
import unicodedata
import numpy as np

from rake_nltk import Rake
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.feature_extraction.text import CountVectorizer #tokenizes a collection of words extracted from a text doc
from ast import literal_eval #This evaluates whether an expresion is a Python datatype or not


In [3]:
cosmetics_path = "./data/cosmetics.csv"
raw_df = pd.read_csv(cosmetics_path)
raw_df.head()

Unnamed: 0,Label,Brand,Name,Price,Rank,Ingredients,Combination,Dry,Normal,Oily,Sensitive
0,Moisturizer,LA MER,Crème de la Mer,175,4.1,"Algae (Seaweed) Extract, Mineral Oil, Petrolat...",1,1,1,1,1
1,Moisturizer,SK-II,Facial Treatment Essence,179,4.1,"Galactomyces Ferment Filtrate (Pitera), Butyle...",1,1,1,1,1
2,Moisturizer,DRUNK ELEPHANT,Protini™ Polypeptide Cream,68,4.4,"Water, Dicaprylyl Carbonate, Glycerin, Ceteary...",1,1,1,1,0
3,Moisturizer,LA MER,The Moisturizing Soft Cream,175,3.8,"Algae (Seaweed) Extract, Cyclopentasiloxane, P...",1,1,1,1,1
4,Moisturizer,IT COSMETICS,Your Skin But Better™ CC+™ Cream with SPF 50+,38,4.1,"Water, Snail Secretion Filtrate, Phenyl Trimet...",1,1,1,1,1


In [4]:
raw_df.describe()

Unnamed: 0,Price,Rank,Combination,Dry,Normal,Oily,Sensitive
count,1472.0,1472.0,1472.0,1472.0,1472.0,1472.0,1472.0
mean,55.584239,4.153261,0.65625,0.61413,0.652174,0.607337,0.513587
std,45.014429,0.633918,0.47512,0.486965,0.476442,0.488509,0.499985
min,3.0,0.0,0.0,0.0,0.0,0.0,0.0
25%,30.0,4.0,0.0,0.0,0.0,0.0,0.0
50%,42.5,4.3,1.0,1.0,1.0,1.0,1.0
75%,68.0,4.5,1.0,1.0,1.0,1.0,1.0
max,370.0,5.0,1.0,1.0,1.0,1.0,1.0


In [5]:
raw_df.isnull().sum()

Label          0
Brand          0
Name           0
Price          0
Rank           0
Ingredients    0
Combination    0
Dry            0
Normal         0
Oily           0
Sensitive      0
dtype: int64

In [6]:
raw_df.columns

Index(['Label', 'Brand', 'Name', 'Price', 'Rank', 'Ingredients', 'Combination',
       'Dry', 'Normal', 'Oily', 'Sensitive'],
      dtype='object')

In [7]:
with_types = raw_df.copy()
def add_skin_types_names(row):
    types = []
    if row["Combination"]:
        types.append("combination")
    if row["Dry"]:
        types.append("dry")
    if row["Normal"]:
        types.append("normal")
    if row["Oily"]:
        types.append("oily")
    if row["Sensitive"]:
        types.append("sensitive")

    if not row["Combination"] and not row["Dry"] and not row["Normal"] and not row["Oily"] and not row["Sensitive"]:
        types.append("combination")
        types.append("dry")
        types.append("normal")
        types.append("oily")
        types.append("sensitive")
        types.append("alltypes")
    if row["Combination"] and row["Dry"] and row["Normal"] and row["Oily"]and row["Sensitive"]:
        types.append("alltypes")

    return " ".join(types)

with_types["Skin Types"] = with_types.apply(add_skin_types_names, axis=1)
with_types.head()


Unnamed: 0,Label,Brand,Name,Price,Rank,Ingredients,Combination,Dry,Normal,Oily,Sensitive,Skin Types
0,Moisturizer,LA MER,Crème de la Mer,175,4.1,"Algae (Seaweed) Extract, Mineral Oil, Petrolat...",1,1,1,1,1,combination dry normal oily sensitive alltypes
1,Moisturizer,SK-II,Facial Treatment Essence,179,4.1,"Galactomyces Ferment Filtrate (Pitera), Butyle...",1,1,1,1,1,combination dry normal oily sensitive alltypes
2,Moisturizer,DRUNK ELEPHANT,Protini™ Polypeptide Cream,68,4.4,"Water, Dicaprylyl Carbonate, Glycerin, Ceteary...",1,1,1,1,0,combination dry normal oily
3,Moisturizer,LA MER,The Moisturizing Soft Cream,175,3.8,"Algae (Seaweed) Extract, Cyclopentasiloxane, P...",1,1,1,1,1,combination dry normal oily sensitive alltypes
4,Moisturizer,IT COSMETICS,Your Skin But Better™ CC+™ Cream with SPF 50+,38,4.1,"Water, Snail Secretion Filtrate, Phenyl Trimet...",1,1,1,1,1,combination dry normal oily sensitive alltypes


In [8]:
#pasamos todas las columnas te tipo string a minusculas, si no son strings las deja tal cual
with_types = with_types.map(lambda content: content.lower() if type(content) == str else content)

In [9]:
with_types["Ingredients"]

0       algae (seaweed) extract, mineral oil, petrolat...
1       galactomyces ferment filtrate (pitera), butyle...
2       water, dicaprylyl carbonate, glycerin, ceteary...
3       algae (seaweed) extract, cyclopentasiloxane, p...
4       water, snail secretion filtrate, phenyl trimet...
                              ...                        
1467    water, alcohol denat., potassium cetyl phospha...
1468    water, isododecane, dimethicone, butyloctyl sa...
1469    water, dihydroxyacetone, glycerin, sclerocarya...
1470    water, dihydroxyacetone, propylene glycol, ppg...
1471                        visit the dermaflash boutique
Name: Ingredients, Length: 1472, dtype: object

In [10]:
def clean_ingredients(text):
    #quita los enters y los cambia por espacios en blanco
    text =text.replace("\n", " ")
    #si contiene "ingredientes:" divide el string en antes y despues de ingredients: gogemos lo que viene despes unicamente y con el strip removemos todos los espacios en blanco al principio y al final del string
    if "ingredients:" in text:
        text = text.split("ingredients:")[1].strip()
    #elimina las partes del texto que esten entre dos puntos : y ppunto siempre que no contenga una coma . esto esta hecho despues de los ingredientes a posta porque hubiera borrdo todo lo anterior y nunca hubiera entrado en ese if
    text = re.sub(r':((?!.*?,).*?)\.', '', text)
    text =text.replace(" -", ", ")
    text = text.replace(" =;", ",")
    if text.startswith("-"):
        text =text.replace("-", "")
    text =text.replace("(", "")
    text =text.replace(")", "")
    text =text.replace("*", "")
    list_of_text = text.split(", ")

    text = ", ".join([item.replace(" ", "") for item in list_of_text])
    if text.startswith("visit") or text.startswith("#name?"):
        return "No info"
    return text

with_types["Clean Ingredients"] = with_types["Ingredients"].apply(clean_ingredients)
with_types["Clean Ingredients"].head(50)

0     algaeseaweedextract, mineraloil, petrolatum, g...
1     galactomycesfermentfiltratepitera, butylenegly...
2     water, dicaprylylcarbonate, glycerin, cetearyl...
3     algaeseaweedextract, cyclopentasiloxane, petro...
4     water, snailsecretionfiltrate, phenyltrimethic...
5     water, saccharomyces/camelliasinensisleaf/clad...
6     water, glycerin, caprylic/caprictriglyceride, ...
7        100%unrefinedsclerocrayabirreamarulakerneloil.
8     water, glycerin, cyclohexasiloxane, squalane, ...
9     algaeseaweedextract, mineraloil, petrolatum, g...
10    water, glycerin, propyleneglycoldicaprylate/di...
11    caprylic/caprictriglyceridedicaprylylcarbonate...
12    water, dipropyleneglycol, glycerin, methltrime...
13    perseagratissimaextravirgin, coldpressedavocad...
14    water, lacticacid, propanediol, jojobaesters, ...
15    beste™no.9jellycleanser:water, sodiumlauroylme...
16    water, stearicacid, glycerin, c12-15alkylbenzo...
17    water, dimethicone, butyleneglycol, glycer

In [11]:
with_types["Clean Ingredients"].head(50)

0     algaeseaweedextract, mineraloil, petrolatum, g...
1     galactomycesfermentfiltratepitera, butylenegly...
2     water, dicaprylylcarbonate, glycerin, cetearyl...
3     algaeseaweedextract, cyclopentasiloxane, petro...
4     water, snailsecretionfiltrate, phenyltrimethic...
5     water, saccharomyces/camelliasinensisleaf/clad...
6     water, glycerin, caprylic/caprictriglyceride, ...
7        100%unrefinedsclerocrayabirreamarulakerneloil.
8     water, glycerin, cyclohexasiloxane, squalane, ...
9     algaeseaweedextract, mineraloil, petrolatum, g...
10    water, glycerin, propyleneglycoldicaprylate/di...
11    caprylic/caprictriglyceridedicaprylylcarbonate...
12    water, dipropyleneglycol, glycerin, methltrime...
13    perseagratissimaextravirgin, coldpressedavocad...
14    water, lacticacid, propanediol, jojobaesters, ...
15    beste™no.9jellycleanser:water, sodiumlauroylme...
16    water, stearicacid, glycerin, c12-15alkylbenzo...
17    water, dimethicone, butyleneglycol, glycer

In [12]:
def clean_names(text):
    normalized = unicodedata.normalize("NFD", text)
    without_accents = "".join(character for character in normalized if unicodedata.category(character) != "Mn")
    cleaned = re.sub(r"[^a-zA-Z0-9\s]", "", without_accents)
    return cleaned.lower()

In [13]:
with_types["Clean Name"] = with_types["Name"].apply(clean_names)
with_types[["Clean Name", "Name"]]

Unnamed: 0,Clean Name,Name
0,creme de la mer,crème de la mer
1,facial treatment essence,facial treatment essence
2,protini polypeptide cream,protini™ polypeptide cream
3,the moisturizing soft cream,the moisturizing soft cream
4,your skin but better cc cream with spf 50,your skin but better™ cc+™ cream with spf 50+
...,...,...
1467,yoghurt nourishing fluid veil face sunscreen b...,yoghurt nourishing fluid veil face sunscreen b...
1468,daily deflector waterlight broad spectrum spf ...,daily deflector™ waterlight broad spectrum spf...
1469,self tan dry oil spf 50,self tan dry oil spf 50
1470,pro light self tan bronzing mist,pro light self tan bronzing mist


In [14]:
with_types.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1472 entries, 0 to 1471
Data columns (total 14 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   Label              1472 non-null   object 
 1   Brand              1472 non-null   object 
 2   Name               1472 non-null   object 
 3   Price              1472 non-null   int64  
 4   Rank               1472 non-null   float64
 5   Ingredients        1472 non-null   object 
 6   Combination        1472 non-null   int64  
 7   Dry                1472 non-null   int64  
 8   Normal             1472 non-null   int64  
 9   Oily               1472 non-null   int64  
 10  Sensitive          1472 non-null   int64  
 11  Skin Types         1472 non-null   object 
 12  Clean Ingredients  1472 non-null   object 
 13  Clean Name         1472 non-null   object 
dtypes: float64(1), int64(6), object(7)
memory usage: 161.1+ KB


In [15]:
dataframe_cosine = with_types[["Label", "Brand", "Clean Name", "Clean Ingredients", "Skin Types", "Rank"]].copy()
dataframe_cosine.head()

Unnamed: 0,Label,Brand,Clean Name,Clean Ingredients,Skin Types,Rank
0,moisturizer,la mer,creme de la mer,"algaeseaweedextract, mineraloil, petrolatum, g...",combination dry normal oily sensitive alltypes,4.1
1,moisturizer,sk-ii,facial treatment essence,"galactomycesfermentfiltratepitera, butylenegly...",combination dry normal oily sensitive alltypes,4.1
2,moisturizer,drunk elephant,protini polypeptide cream,"water, dicaprylylcarbonate, glycerin, cetearyl...",combination dry normal oily,4.4
3,moisturizer,la mer,the moisturizing soft cream,"algaeseaweedextract, cyclopentasiloxane, petro...",combination dry normal oily sensitive alltypes,3.8
4,moisturizer,it cosmetics,your skin but better cc cream with spf 50,"water, snailsecretionfiltrate, phenyltrimethic...",combination dry normal oily sensitive alltypes,4.1


In [16]:
query = "no info"
dataframe_cosine[ dataframe_cosine["Clean Ingredients"].str.contains(query)]

Unnamed: 0,Label,Brand,Clean Name,Clean Ingredients,Skin Types,Rank


# Empezamos con las keywords desde aqui

In [17]:
dataframe_cosine["keywords"] = ""

In [18]:
# nltk.download('stopwords')

In [19]:
for index, row in dataframe_cosine.iterrows():
    ingredients = row["Clean Ingredients"]

    r = Rake()

    r.extract_keywords_from_text(ingredients)
    keyword_dict_scores = r.get_word_degrees()

    dataframe_cosine.at[index, "keywords"] = list(keyword_dict_scores.keys())

In [20]:
dataframe_cosine[["Clean Ingredients", "keywords"]]

Unnamed: 0,Clean Ingredients,keywords
0,"algaeseaweedextract, mineraloil, petrolatum, g...","[algaeseaweedextract, mineraloil, petrolatum, ..."
1,"galactomycesfermentfiltratepitera, butylenegly...","[galactomycesfermentfiltratepitera, butylenegl..."
2,"water, dicaprylylcarbonate, glycerin, cetearyl...","[water, dicaprylylcarbonate, glycerin, ceteary..."
3,"algaeseaweedextract, cyclopentasiloxane, petro...","[algaeseaweedextract, cyclopentasiloxane, petr..."
4,"water, snailsecretionfiltrate, phenyltrimethic...","[water, snailsecretionfiltrate, phenyltrimethi..."
...,...,...
1467,"water, alcoholdenat., potassiumcetylphosphate,...","[water, alcoholdenat, .,, potassiumcetylphosph..."
1468,"water, isododecane, dimethicone, butyloctylsal...","[water, isododecane, dimethicone, butyloctylsa..."
1469,"water, dihydroxyacetone, glycerin, sclerocarya...","[water, dihydroxyacetone, glycerin, sclerocary..."
1470,"water, dihydroxyacetone, propyleneglycol, ppg-...","[water, dihydroxyacetone, propyleneglycol, ppg..."


In [21]:
dataframe_cosine[dataframe_cosine["Clean Name"].duplicated()]

Unnamed: 0,Label,Brand,Clean Name,Clean Ingredients,Skin Types,Rank,keywords


In [22]:
dataframe_cosine  = dataframe_cosine.set_index("Clean Name")

In [23]:
dataframe_cosine

Unnamed: 0_level_0,Label,Brand,Clean Ingredients,Skin Types,Rank,keywords
Clean Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
creme de la mer,moisturizer,la mer,"algaeseaweedextract, mineraloil, petrolatum, g...",combination dry normal oily sensitive alltypes,4.1,"[algaeseaweedextract, mineraloil, petrolatum, ..."
facial treatment essence,moisturizer,sk-ii,"galactomycesfermentfiltratepitera, butylenegly...",combination dry normal oily sensitive alltypes,4.1,"[galactomycesfermentfiltratepitera, butylenegl..."
protini polypeptide cream,moisturizer,drunk elephant,"water, dicaprylylcarbonate, glycerin, cetearyl...",combination dry normal oily,4.4,"[water, dicaprylylcarbonate, glycerin, ceteary..."
the moisturizing soft cream,moisturizer,la mer,"algaeseaweedextract, cyclopentasiloxane, petro...",combination dry normal oily sensitive alltypes,3.8,"[algaeseaweedextract, cyclopentasiloxane, petr..."
your skin but better cc cream with spf 50,moisturizer,it cosmetics,"water, snailsecretionfiltrate, phenyltrimethic...",combination dry normal oily sensitive alltypes,4.1,"[water, snailsecretionfiltrate, phenyltrimethi..."
...,...,...,...,...,...,...
yoghurt nourishing fluid veil face sunscreen broad spectrum spf 30,sun protect,korres,"water, alcoholdenat., potassiumcetylphosphate,...",combination dry normal oily sensitive alltypes,3.9,"[water, alcoholdenat, .,, potassiumcetylphosph..."
daily deflector waterlight broad spectrum spf 50 pa antiaging sunscreen,sun protect,kate somerville,"water, isododecane, dimethicone, butyloctylsal...",combination dry normal oily sensitive alltypes,3.6,"[water, isododecane, dimethicone, butyloctylsa..."
self tan dry oil spf 50,sun protect,vita liberata,"water, dihydroxyacetone, glycerin, sclerocarya...",combination dry normal oily sensitive alltypes,3.5,"[water, dihydroxyacetone, glycerin, sclerocary..."
pro light self tan bronzing mist,sun protect,st. tropez tanning essentials,"water, dihydroxyacetone, propyleneglycol, ppg-...",combination dry normal oily sensitive alltypes,1.0,"[water, dihydroxyacetone, propyleneglycol, ppg..."


In [24]:
dataframe_cosine.drop("Clean Ingredients", axis=1, inplace=True)

In [25]:
dataframe_cosine.columns

Index(['Label', 'Brand', 'Skin Types', 'Rank', 'keywords'], dtype='object')

In [26]:
without_rank = dataframe_cosine[['Label', 'Skin Types', 'keywords']]
columns  = without_rank.columns
dataframe_cosine["bow"] = ""

for index, row in dataframe_cosine.iterrows():
    words = ""
    for column in columns:
        if isinstance(row[column], str):
            words = words + row[column] + " "
        if isinstance(row[column], list):
            words = words + " ".join(row[column])+ " "
        dataframe_cosine.at[index, "bow"] = words

In [27]:
dataframe_cosine.head(20)

Unnamed: 0_level_0,Label,Brand,Skin Types,Rank,keywords,bow
Clean Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
creme de la mer,moisturizer,la mer,combination dry normal oily sensitive alltypes,4.1,"[algaeseaweedextract, mineraloil, petrolatum, ...",moisturizer combination dry normal oily sensit...
facial treatment essence,moisturizer,sk-ii,combination dry normal oily sensitive alltypes,4.1,"[galactomycesfermentfiltratepitera, butylenegl...",moisturizer combination dry normal oily sensit...
protini polypeptide cream,moisturizer,drunk elephant,combination dry normal oily,4.4,"[water, dicaprylylcarbonate, glycerin, ceteary...",moisturizer combination dry normal oily water ...
the moisturizing soft cream,moisturizer,la mer,combination dry normal oily sensitive alltypes,3.8,"[algaeseaweedextract, cyclopentasiloxane, petr...",moisturizer combination dry normal oily sensit...
your skin but better cc cream with spf 50,moisturizer,it cosmetics,combination dry normal oily sensitive alltypes,4.1,"[water, snailsecretionfiltrate, phenyltrimethi...",moisturizer combination dry normal oily sensit...
the water cream,moisturizer,tatcha,combination normal oily sensitive,4.2,"[water, saccharomyces, camelliasinensisleaf, c...",moisturizer combination normal oily sensitive ...
lala retro whipped cream,moisturizer,drunk elephant,combination dry normal oily,4.2,"[water, glycerin, caprylic, caprictriglyceride...",moisturizer combination dry normal oily water ...
virgin marula luxury facial oil,moisturizer,drunk elephant,combination dry normal oily,4.4,"[100, unrefinedsclerocrayabirreamarulakerneloil]",moisturizer combination dry normal oily 100 un...
ultra facial cream,moisturizer,kiehl's since 1851,combination dry normal oily sensitive alltypes,4.4,"[water, glycerin, cyclohexasiloxane, squalane,...",moisturizer combination dry normal oily sensit...
little miss miracle limitededition creme de la mer,moisturizer,la mer,combination dry normal oily sensitive alltypes,5.0,"[algaeseaweedextract, mineraloil, petrolatum, ...",moisturizer combination dry normal oily sensit...


In [28]:
dataframe_cosine.shape

(1472, 6)

In [29]:
len(dataframe_cosine["bow"].iloc(0)[1].split(" "))

15

In [30]:
count = CountVectorizer()
count_matrix = count.fit_transform(dataframe_cosine["bow"])

In [31]:
count_matrix.shape

(1472, 5557)

In [32]:
dataframe_cosine.index

Index(['creme de la mer', 'facial treatment essence',
       'protini polypeptide cream', 'the moisturizing soft cream',
       'your skin but better cc cream with spf 50', 'the water cream',
       'lala retro whipped cream', 'virgin marula luxury facial oil',
       'ultra facial cream',
       'little miss miracle limitededition creme de la mer',
       ...
       'summer duo', 'passionflower  argan dry oil broad spectrum spf 50',
       'antiaging armour super smart skinperfecting beauty fluid spf 50',
       'naked skin bronzing beauty balm broad spectrum spf 20',
       'daily deflector moisturizer broad spectrum spf 50',
       'yoghurt nourishing fluid veil face sunscreen broad spectrum spf 30',
       'daily deflector waterlight broad spectrum spf 50 pa antiaging sunscreen',
       'self tan dry oil spf 50', 'pro light self tan bronzing mist',
       'dermaprotect daily defense broad spectrum spf 50'],
      dtype='object', name='Clean Name', length=1472)

In [33]:
list(count.vocabulary_.items())

[('moisturizer', 3330),
 ('combination', 1599),
 ('dry', 1942),
 ('normal', 3455),
 ('oily', 3531),
 ('sensitive', 4595),
 ('alltypes', 469),
 ('algaeseaweedextract', 458),
 ('mineraloil', 3321),
 ('petrolatum', 3834),
 ('glycerin', 2288),
 ('isohexadecane', 2794),
 ('microcrystallinewax', 3313),
 ('lanolinalcohol', 2909),
 ('citrusaurantifolialimeextract', 1438),
 ('sesamumindicumsesameseedoil', 4607),
 ('eucalyptusglobuluseucalyptusleafoil', 2069),
 ('sesamumindicumsesameseedpowder', 4608),
 ('medicagosativaalfalfaseedpowder', 3205),
 ('helianthusannuussunflowerseedcake', 2448),
 ('prunusamygdalusdulcissweetalmondseedmeal', 4168),
 ('sodiumgluconate', 4749),
 ('coppergluconate', 1630),
 ('calciumgluconate', 1006),
 ('magnesiumgluconate', 3123),
 ('zincgluconate', 5524),
 ('magnesiumsulfate', 3130),
 ('paraffin', 3743),
 ('tocopherylsuccinate', 5203),
 ('niacin', 3438),
 ('water', 5439),
 ('beta', 839),
 ('carotene', 1150),
 ('decyloleate', 1772),
 ('aluminumdistearate', 516),
 ('octy

In [34]:
# cosine_similarity?

In [35]:
cosine_sim = cosine_similarity(count_matrix, count_matrix)
cosine_sim

array([[1.        , 0.34016803, 0.13522468, ..., 0.24494897, 0.232379  ,
        0.28284271],
       [0.34016803, 1.        , 0.25555063, ..., 0.4114756 , 0.34156503,
        0.53452248],
       [0.13522468, 0.25555063, 1.        , ..., 0.18401748, 0.17457431,
        0.15936381],
       ...,
       [0.24494897, 0.4114756 , 0.18401748, ..., 1.        , 0.42163702,
        0.51320024],
       [0.232379  , 0.34156503, 0.17457431, ..., 0.42163702, 1.        ,
        0.4868645 ],
       [0.28284271, 0.53452248, 0.15936381, ..., 0.51320024, 0.4868645 ,
        1.        ]])

In [36]:
indices = pd.Series(dataframe_cosine.index)

indices

0                                                               creme de la mer
1                                                      facial treatment essence
2                                                     protini polypeptide cream
3                                                   the moisturizing soft cream
4                                     your skin but better cc cream with spf 50
                                         ...                                   
1467         yoghurt nourishing fluid veil face sunscreen broad spectrum spf 30
1468    daily deflector waterlight broad spectrum spf 50 pa antiaging sunscreen
1469                                                    self tan dry oil spf 50
1470                                           pro light self tan bronzing mist
1471                           dermaprotect daily defense broad spectrum spf 50
Name: Clean Name, Length: 1472, dtype: object

In [37]:
indices[indices == "lala retro whipped cream"].index[0]

np.int64(6)

In [38]:
pd.Series(cosine_sim[1]).sort_values(ascending=False)[1:5+1]

1      1.000000
116    0.897085
93     0.661438
257    0.661438
288    0.661438
dtype: float64

In [39]:
dataframe_cosine.iloc[116]

Label                                                                                                                                                                            moisturizer
Brand                                                                                                                                                                                  sk-ii
Skin Types                                                                                                                                    combination dry normal oily sensitive alltypes
Rank                                                                                                                                                                                     0.0
keywords                                                        [galactomycesfermentfiltrate, butyleneglycol, pentyleneglycol, water, sodiumbenzoate, methylparaben, sorbicacid, pitera, ™.]
bow           moisturizer combination dry normal oily s

In [40]:
dataframe_cosine.iloc[1]

Label                                                                                                                                                                        moisturizer
Brand                                                                                                                                                                              sk-ii
Skin Types                                                                                                                                combination dry normal oily sensitive alltypes
Rank                                                                                                                                                                                 4.1
keywords                                                          [galactomycesfermentfiltratepitera, butyleneglycol, pentyleneglycol, water, sodiumbenzoate, methylparaben, sorbicacid]
bow           moisturizer combination dry normal oily sensitive alltypes ga

In [41]:
def recommend_cosmetic(name, n = 5):
    recommended_list = []
    idx = indices[indices == name].index[0]

    min_recommend_value = 0.2

    score_series = pd.Series(cosine_sim[idx]).sort_values(ascending=False)
    score_series = score_series[score_series >= min_recommend_value]
    print(score_series.values)
    top_n_indexes = list(score_series.iloc[1:n+1].index)
    print(top_n_indexes)

    for i in top_n_indexes:

        recommended_list.append(list(dataframe_cosine.index)[i])

    return dataframe_cosine.loc[recommended_list]

In [42]:
name = "ultra repair cream intense hydration"
recommend_cosmetic(name)

[1.         0.59375    0.55       ... 0.20044593 0.20044593 0.20044593]
[157, 1357, 892, 141, 187]


Unnamed: 0_level_0,Label,Brand,Skin Types,Rank,keywords,bow
Clean Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
ultra repair face moisturizer,moisturizer,first aid beauty,combination dry normal oily sensitive alltypes,4.2,"[water, glycerylstearatese, glycerin, caprylic, caprictriglyceride, cetylalcohol, collodialoatmeal, dimethicone, squalane, urea, allantoin, ceramidenp, butyrospermumparkiisheabutter, limnanthesalbameadowfoamseedoil, perseagratissimaavocadooil, chrysanthemumpartheniumfeverfewextract, camelliasinensisleafextract, glycyrrhizaglabralicoricerootextract, caprylylglycol, phenoxyethanol, carbomer, sodiumhydroxide, leuconostoc, radishrootfermentfiltrate, edta]",moisturizer combination dry normal oily sensitive alltypes water glycerylstearatese glycerin caprylic caprictriglyceride cetylalcohol collodialoatmeal dimethicone squalane urea allantoin ceramidenp butyrospermumparkiisheabutter limnanthesalbameadowfoamseedoil perseagratissimaavocadooil chrysanthemumpartheniumfeverfewextract camelliasinensisleafextract glycyrrhizaglabralicoricerootextract caprylylglycol phenoxyethanol carbomer sodiumhydroxide leuconostoc radishrootfermentfiltrate edta
ultra repair pure mineral sunscreen moisturizer broad spectrum spf 40,sun protect,first aid beauty,combination dry normal oily sensitive alltypes,3.6,"[water, butyloctylsalicylate, cetearylalcohol, caprylic, caprictriglyceride, steareth, 2, dimethicone, squalane, glycerin, butyleneglycol, alumina, ceteareth, 20, colloidaloatmeal, urea, allantoin, butyrospermumparkiisheabutter, copaiferaofficinalisbalsamcopaibaresin, limnanthesalbameadowfoamseedoil, perseagratissimaavocadooil, chrysanthemumpartheniumfeverfewextract, camelliasinensisleafextract, glycyrrhizaglabralicoricerootextract, ceramidenp, tocopherylacetate, triethoxycaprylylsilane, caprylylglycol, isopropyltitaniumtriisostearate, c13, 14isoparaffin, c12, 15alkylbenzoate, polyhydroxystearicacid, laureth, 7, methicone, leuconostoc, radishrootfermentfiltrate, xanthangum, polyacrylamide, phenoxyethanol, disodiumedta, edta]",sun protect combination dry normal oily sensitive alltypes water butyloctylsalicylate cetearylalcohol caprylic caprictriglyceride steareth 2 dimethicone squalane glycerin butyleneglycol alumina ceteareth 20 colloidaloatmeal urea allantoin butyrospermumparkiisheabutter copaiferaofficinalisbalsamcopaibaresin limnanthesalbameadowfoamseedoil perseagratissimaavocadooil chrysanthemumpartheniumfeverfewextract camelliasinensisleafextract glycyrrhizaglabralicoricerootextract ceramidenp tocopherylacetate triethoxycaprylylsilane caprylylglycol isopropyltitaniumtriisostearate c13 14isoparaffin c12 15alkylbenzoate polyhydroxystearicacid laureth 7 methicone leuconostoc radishrootfermentfiltrate xanthangum polyacrylamide phenoxyethanol disodiumedta edta
ultra repair instant oatmeal mask,face mask,first aid beauty,combination dry normal oily sensitive alltypes,4.4,"[water, colloidaloatmeal, glycerylstearatecitrate, cetylalcohol, glycerylstearatese, caprylic, capric, myristic, stearictriglyceride, butyleneglycoldicaprylate, dicaprate, butyrospermumparkiibutter, octyldodecanol, glycerin, lactobacillusferment, polysorbate20, cetearylalcohol, decylglucoside, theobromacacaoseedbutter, polysorbate60, prunusarmeniacakerneloil, xanthangum, tetrasodiumedta, dimethicone, bisabolol, camelliasinensisleafextract, chrysanthemumpartheniumextract, glycyrrhizaglabrarootextract, hippophaerhamnoidesextract, vanillaplanifoliafruitextract, vanillin, avenasativakernelextract, zingiber, officinalerootextract, phenoxyethanol, potassiumsorbate]",face mask combination dry normal oily sensitive alltypes water colloidaloatmeal glycerylstearatecitrate cetylalcohol glycerylstearatese caprylic capric myristic stearictriglyceride butyleneglycoldicaprylate dicaprate butyrospermumparkiibutter octyldodecanol glycerin lactobacillusferment polysorbate20 cetearylalcohol decylglucoside theobromacacaoseedbutter polysorbate60 prunusarmeniacakerneloil xanthangum tetrasodiumedta dimethicone bisabolol camelliasinensisleafextract chrysanthemumpartheniumextract glycyrrhizaglabrarootextract hippophaerhamnoidesextract vanillaplanifoliafruitextract vanillin avenasativakernelextract zingiber officinalerootextract phenoxyethanol potassiumsorbate
squalane probiotic gel moisturizer,moisturizer,biossance,combination dry normal oily sensitive alltypes,4.4,"[water, squalane, glycerin, dimethicone, pentyleneglycol, ammoniumacryloyldimethyltaurate, vpcopolymer, lactococcusfermentlysate, caprylic, caprictriglyceride, phenoxyethanol, caprylylglycol, dimethiconol, chrondruscrispusextract, allantoin, bisabolol, glycinesojasoybeansterols, linoleicacid, phospholipids, sodiumphytate, lechithin, sodiumhydroxide, sodiumhyaluronate, lacticacid, lepidiumsativumsproutextract, sodiumchloride, sodiumbenzoate, citricacid, zingiberofficinalegingerrootextract]",moisturizer combination dry normal oily sensitive alltypes water squalane glycerin dimethicone pentyleneglycol ammoniumacryloyldimethyltaurate vpcopolymer lactococcusfermentlysate caprylic caprictriglyceride phenoxyethanol caprylylglycol dimethiconol chrondruscrispusextract allantoin bisabolol glycinesojasoybeansterols linoleicacid phospholipids sodiumphytate lechithin sodiumhydroxide sodiumhyaluronate lacticacid lepidiumsativumsproutextract sodiumchloride sodiumbenzoate citricacid zingiberofficinalegingerrootextract
overnight restorative cream,moisturizer,algenist,combination dry normal oily sensitive alltypes,4.4,"[water, isopropylisostearate, butyrospermumparkiibutterbutyrospermumparkiisheabutter, glycerin, stearicacid, squalane, butyleneglycol, dimethicone, glycerylstearatese, cetearylalcohol, dimethiconecrosspolymer, ascorbylglucoside, pentyleneglycol, algaeexopolysaccharides, algaeextract, tocopherylacetate, citrullusvulgarisfruitextractcitrullusvulgariswatermelonfruitextract, lensesculentafruitextractlensesculentalentilfruitextract, oleanolicacid, palmitoyloligopeptide, palmitoyltetrapeptide, 7, pyrusmalusfruitextractpyrusmalusapplefruitextract, enantiachloranthabarkextract, crithmummaritimumextract, silybummarianumseedoil, caprylic, caprictriglyceride, ceteareth, 20, acrylates, c10, 30alkylacrylatecrosspolymer, sodiumhydroxide, caprylylglycol, hydrogenatedvegetableoil, hexyleneglycol, phenoxyethanol, disodiumedta, fragrance, sodiumpca, carbomer, polysorbate20, sodiumlactate, ethylhexylglycerin]",moisturizer combination dry normal oily sensitive alltypes water isopropylisostearate butyrospermumparkiibutterbutyrospermumparkiisheabutter glycerin stearicacid squalane butyleneglycol dimethicone glycerylstearatese cetearylalcohol dimethiconecrosspolymer ascorbylglucoside pentyleneglycol algaeexopolysaccharides algaeextract tocopherylacetate citrullusvulgarisfruitextractcitrullusvulgariswatermelonfruitextract lensesculentafruitextractlensesculentalentilfruitextract oleanolicacid palmitoyloligopeptide palmitoyltetrapeptide 7 pyrusmalusfruitextractpyrusmalusapplefruitextract enantiachloranthabarkextract crithmummaritimumextract silybummarianumseedoil caprylic caprictriglyceride ceteareth 20 acrylates c10 30alkylacrylatecrosspolymer sodiumhydroxide caprylylglycol hydrogenatedvegetableoil hexyleneglycol phenoxyethanol disodiumedta fragrance sodiumpca carbomer polysorbate20 sodiumlactate ethylhexylglycerin


In [43]:
dataframe_cosine.loc[name]

Label                                                                                                                                                                                                                                                                                                                                                                                                                                                                    moisturizer
Brand                                                                                                                                                                                                                                                                                                                                                                                                                                                               first aid beauty
Skin Types                                                    