# Skincare Recommendation Engine


This notebook walks you through the development of a content-based recommendation engine that should take a list of skin metrics/concerns (skin type, tone, acne, blemishes, redness, etc) as input and return several products that might suit the user's skin. 

In [274]:
import numpy as np 
import pandas as pd
from sklearn.metrics.pairwise import cosine_similarity
import heapq


In [275]:
# 'essentials' implies essential facial skincare products
df = pd.read_csv('result.csv')
makeup = pd.read_csv('result2.csv')

In [276]:
df.head()

Unnamed: 0,label,url,brand,name,price,skin type,spf,concern,concern 2,concern 3,key ingredient,formulation
0,face-moisturisers,https://www.myntra.com/face-moisturisers/lakme...,Lakme,Absolute Perfect Radiance Skin Lightening Day ...,₹ 79,All,,General Care,,,,Cream
1,face-moisturisers,https://www.myntra.com/face-moisturisers/bioti...,Biotique,Bio Morning Nectar Flawless Sustainable Skin M...,₹ 165,All,,Uneven Skin Tone,Hydration,Dark Spots,Honey,Lotion
2,face-moisturisers,https://www.myntra.com/face-moisturisers/nivea...,Nivea,Unisex Aloe Hydration Skin Cream 100 ml,₹ 92,All,,Dull Skin,Dryness,General Care,Aloe Vera,Cream
3,face-moisturisers,https://www.myntra.com/face-moisturisers/vi-jo...,VI-JOHN,Women Set of 5 Saffron Fairness Cream Advanced,₹ 187,All,15 to 30,Softening and Smoothening,Softening and Smoothening,Sun Protection,Vitamin E,Cream
4,face-moisturisers,https://www.myntra.com/face-moisturisers/lakme...,Lakme,Peach Milk Soft Creme 150g,₹ 192,All,,General Care,,,,Cream


In [277]:
makeup.head()

Unnamed: 0,label,url,brand,name,price,skin type,skin tone,spf,concern,concern 2,concern 3,key ingredient,formulation
0,primer,https://www.myntra.com/foundation-and-primer/l...,Lakme,Absolute Blur Perfect Makeup Primer - Mini 10g,₹ 254,All,Light to Medium,,Brightening,,,,Gel
1,primer,https://www.myntra.com/foundation-and-primer/c...,Colorbar,Perfect Match Primer 30 ml,₹ 595,,,,Dullness,,,Vitamin E,Cream
2,primer,https://www.myntra.com/foundation-and-primer/w...,Wet n Wild,Sustainable Photo Focus Natural Finish Setting...,₹ 454,Normal,Light to Medium,,,,,,Spray
3,primer,https://www.myntra.com/foundation-and-primer/m...,M.A.C,Mini Prep+Prime Fix+ 30 ml,₹ 1200,,,,Dryness,,,Vitamin E,Spray
4,primer,https://www.myntra.com/foundation-and-primer/w...,Wet n Wild,Sustainable Photo Focus Matte Face Primer - Pa...,₹ 454,Normal,Light to Medium,,,,,,Cream


## Data Preprocessing

## Imputation of values

In [278]:
df['label'].value_counts()

mask-and-peel        300
face-moisturisers    300
cleanser             300
concealer            300
foundation           300
sunscreen            272
eye-cream            144
Name: label, dtype: int64

In [279]:
df.isna().sum()

label                0
url                  0
brand               46
name                46
price               46
skin type          309
spf               1496
concern            381
concern 2         1142
concern 3         1334
key ingredient    1186
formulation        384
dtype: int64

In [280]:
df['concern 2'].fillna('', inplace = True)
df['concern 3'].fillna('', inplace = True)
df['concern'] = df['concern'] + ',' + df['concern 2'] + ',' + df['concern 3']


In [281]:
df['concern']

0                                          General Care,,
1                   Uneven Skin Tone,Hydration,Dark Spots
2                          Dull Skin,Dryness,General Care
3       Softening and Smoothening,Softening and Smooth...
4                                          General Care,,
                              ...                        
1911                   Dark Circles,Dark Circles,Eye Bags
1912                               Dark Circles,Eye Bags,
1913                                Dark Circles,Dryness,
1914                                    Dryness,Eye Bags,
1915                       Eye Bags,Dark Circles,Eye Bags
Name: concern, Length: 1916, dtype: object

In [282]:
df.drop(columns=['concern 2', 'concern 3', 'spf', 'key ingredient'], inplace = True)
df['concern'].value_counts()

Sun Protection,,                                           202
Dark Spots,,                                                81
General Care,,                                              68
Dark Circles,,                                              54
Fine Lines and Wrinkles,,                                   45
                                                          ... 
Deep Nourishment,Dark Spots,Acne or Blemishes                1
Oily Skin,Skin Inflammation,                                 1
Hydration,Dull Skin,Dryness                                  1
Softening and Smoothening,Skin Sagging,Uneven Skin Tone      1
Acne or Blemishes,Pore Care,Pore Care                        1
Name: concern, Length: 441, dtype: int64

In [283]:
df2 = df[((df['label'] == 'face-moisturisers') | (df['label'] == 'mask-and-peel') | (df['label'] == 'cleanser') | (df['label'] == 'eye-cream'))]
df2
LABELS = list(df2.label.unique())

In [284]:
df2 = df2[df2['skin type'].isna() == False]
df2.index = [i for i in range(0, len(df2))]
df2.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 1011 entries, 0 to 1010
Data columns (total 8 columns):
 #   Column       Non-Null Count  Dtype 
---  ------       --------------  ----- 
 0   label        1011 non-null   object
 1   url          1011 non-null   object
 2   brand        1011 non-null   object
 3   name         1011 non-null   object
 4   price        1011 non-null   object
 5   skin type    1011 non-null   object
 6   concern      982 non-null    object
 7   formulation  710 non-null    object
dtypes: object(8)
memory usage: 71.1+ KB


In [285]:
df2[df2['concern'].isna() == True]['label'].value_counts()

face-moisturisers    10
mask-and-peel         9
eye-cream             5
cleanser              5
Name: label, dtype: int64

In [286]:
df2[df2['label'] ==  'eye-cream']['concern'].value_counts()

Fine Lines and Wrinkles,Dark Circles,Eye Bags    20
Dark Circles,Eye Bags,                           18
Dark Circles,,                                   14
Dark Circles,Dark Circles,Eye Bags               14
Dark Circles,Eye Bags,Dark Circles               11
Fine Lines and Wrinkles,Dark Circles,             9
Dark Circles,Eye Bags,Dryness                     9
Fine Lines and Wrinkles,,                         5
Fine Lines and Wrinkles,Dark Circles,Dryness      5
Lightening,Dark Circles,Eye Bags                  5
Fine Lines and Wrinkles,Dryness,                  4
Dryness,Dark Circles,                             3
Eye Bags,Dark Circles,Eye Bags                    3
Dark Circles,Dryness,                             3
Fine Lines and Wrinkles,Dryness,Eye Bags          2
Dryness,Eye Bags,                                 2
Dryness,,                                         2
Dryness,Eye Bags,Dark Circles                     2
Dark Circles,Dark Circles,                        2
Eye Bags,Dar

In [287]:
df2['concern'] = df2['concern'].str.lower()
# df2[df2['label'] == 'face-moisturisers']['concern'].value_counts()
# df2[df2['label'] == 'mask-and-peel']['concern'].value_counts()
df2[df2['label'] == 'sunscreen']['concern'].value_counts()



Series([], Name: concern, dtype: int64)

In [288]:
top_concerns = {'face-moisturisers':'general care', 'mask-and-peel':'daily use', 'cleanser':'general care', 'eye-cream':'fine lines,wrinkles,dark circles,eye bags' }
entries = len(df2)
for i in range(entries):
    label = df2.iloc[i]['label']
    if pd.isnull(df2.iloc[i]['concern']):
        df2.iloc[i]['concern'] = top_concerns[label]
df2.info()

    

<class 'pandas.core.frame.DataFrame'>
Int64Index: 1011 entries, 0 to 1010
Data columns (total 8 columns):
 #   Column       Non-Null Count  Dtype 
---  ------       --------------  ----- 
 0   label        1011 non-null   object
 1   url          1011 non-null   object
 2   brand        1011 non-null   object
 3   name         1011 non-null   object
 4   price        1011 non-null   object
 5   skin type    1011 non-null   object
 6   concern      1011 non-null   object
 7   formulation  710 non-null    object
dtypes: object(8)
memory usage: 71.1+ KB


In [289]:
df2

Unnamed: 0,label,url,brand,name,price,skin type,concern,formulation
0,face-moisturisers,https://www.myntra.com/face-moisturisers/lakme...,Lakme,Absolute Perfect Radiance Skin Lightening Day ...,₹ 79,All,"general care,,",Cream
1,face-moisturisers,https://www.myntra.com/face-moisturisers/bioti...,Biotique,Bio Morning Nectar Flawless Sustainable Skin M...,₹ 165,All,"uneven skin tone,hydration,dark spots",Lotion
2,face-moisturisers,https://www.myntra.com/face-moisturisers/nivea...,Nivea,Unisex Aloe Hydration Skin Cream 100 ml,₹ 92,All,"dull skin,dryness,general care",Cream
3,face-moisturisers,https://www.myntra.com/face-moisturisers/vi-jo...,VI-JOHN,Women Set of 5 Saffron Fairness Cream Advanced,₹ 187,All,"softening and smoothening,softening and smooth...",Cream
4,face-moisturisers,https://www.myntra.com/face-moisturisers/lakme...,Lakme,Peach Milk Soft Creme 150g,₹ 192,All,"general care,,",Cream
...,...,...,...,...,...,...,...,...
1006,eye-cream,https://www.myntra.com/eye-cream/stbotanica/st...,St.Botanica,StBotanica Pomegranate Radiant Glow Under Eye ...,₹ 749,Normal,"dark circles,dark circles,eye bags",Cream
1007,eye-cream,https://www.myntra.com/eye-cream/stbotanica/st...,St.Botanica,StBotanica Vitamin C Brightening Under Eye Ser...,₹ 999,Normal,"dark circles,eye bags,",Serum
1008,eye-cream,https://www.myntra.com/eye-cream/shiseido/shis...,SHISEIDO,Unisex White Lucent Anti-Dark Circles Eye Trea...,"₹ 3,150",All,"dark circles,dryness,",Cream
1009,eye-cream,https://www.myntra.com/eye-cream/yves-rocher/y...,YVES ROCHER,Yves Rocher Unisex Hydra Vegetal Anti-Fatigue ...,₹ 861,All,"dryness,eye bags,",Gel


In [290]:
df2['brand'] = df2['brand'].str.lower()
df2['name'] = df2['name'].str.lower()
df2['skin type'] = df2['skin type'].str.lower()
df2['concern'] = df2['concern'].str.replace(' and ', ',').str.replace(' or ', ',')
df2

Unnamed: 0,label,url,brand,name,price,skin type,concern,formulation
0,face-moisturisers,https://www.myntra.com/face-moisturisers/lakme...,lakme,absolute perfect radiance skin lightening day ...,₹ 79,all,"general care,,",Cream
1,face-moisturisers,https://www.myntra.com/face-moisturisers/bioti...,biotique,bio morning nectar flawless sustainable skin m...,₹ 165,all,"uneven skin tone,hydration,dark spots",Lotion
2,face-moisturisers,https://www.myntra.com/face-moisturisers/nivea...,nivea,unisex aloe hydration skin cream 100 ml,₹ 92,all,"dull skin,dryness,general care",Cream
3,face-moisturisers,https://www.myntra.com/face-moisturisers/vi-jo...,vi-john,women set of 5 saffron fairness cream advanced,₹ 187,all,"softening,smoothening,softening,smoothening,su...",Cream
4,face-moisturisers,https://www.myntra.com/face-moisturisers/lakme...,lakme,peach milk soft creme 150g,₹ 192,all,"general care,,",Cream
...,...,...,...,...,...,...,...,...
1006,eye-cream,https://www.myntra.com/eye-cream/stbotanica/st...,st.botanica,stbotanica pomegranate radiant glow under eye ...,₹ 749,normal,"dark circles,dark circles,eye bags",Cream
1007,eye-cream,https://www.myntra.com/eye-cream/stbotanica/st...,st.botanica,stbotanica vitamin c brightening under eye ser...,₹ 999,normal,"dark circles,eye bags,",Serum
1008,eye-cream,https://www.myntra.com/eye-cream/shiseido/shis...,shiseido,unisex white lucent anti-dark circles eye trea...,"₹ 3,150",all,"dark circles,dryness,",Cream
1009,eye-cream,https://www.myntra.com/eye-cream/yves-rocher/y...,yves rocher,yves rocher unisex hydra vegetal anti-fatigue ...,₹ 861,all,"dryness,eye bags,",Gel


In [291]:
def concern_elements(comma_sep_concerns):
    words = comma_sep_concerns.split(',')
    for w in words:
        if w != '':
            temp = w.rstrip()
            if temp in concerns:
                concerns[temp] += 1
            else:
                concerns[temp] = 1


            
# features
list(df2['skin type'].unique())
concerns = {}
for i in range(entries):
    concern_elements(df2.iloc[i]['concern'])
    

In [292]:
print(sorted(concerns.items(), key =
             lambda kv:(kv[1], kv[0])))   

[('anti acne scarring', 1), ('anti-ageing', 2), ('skin tightening', 2), ('blackheads removal', 4), ('irregular textures', 6), ('skin inflammation', 11), ('oil control', 15), ('redness', 15), ('oily skin', 16), ('skin sagging', 20), ('sun protection', 21), ('lightening', 30), ('tan removal', 48), ('anti-pollution', 60), ('blackheads', 61), ('whiteheads', 61), ('dark spots', 71), ('pigmentation', 71), ('uneven skin tone', 76), ('excess oil', 90), ('eye bags', 96), ('deep nourishment', 106), ('pore care', 110), ('smoothening', 112), ('softening', 112), ('dryness', 129), ('fine lines', 145), ('wrinkles', 145), ('acne', 155), ('blemishes', 155), ('dark circles', 155), ('hydration', 156), ('daily use', 158), ('general care', 158), ('dull skin', 166)]


In [293]:
concerns.pop('anti acne scarring')
concerns.pop('anti-ageing')
concerns.pop('skin tightening')
concerns.pop('blackheads removal')
concerns.pop('irregular textures')
concerns.pop('skin inflammation')
concerns.pop('oil control')
concerns.pop('redness')
concerns.pop('oily skin')
concerns.pop('skin sagging')
concerns.pop('sun protection')
concerns.pop('lightening')
concerns.pop('tan removal')
concerns.pop('excess oil')

# concerns.pop('anti acne scarring')
# concerns['scarring'] = 
# concerns.remo('blackheads removal')
# concerns.remove('dull skin')
# # concerns.remove('dullness')
# concerns.add('dull')
# concerns.remove('oily skin')
# concerns.remove('excess oil')
# concerns.remove('oil control')
# concerns.add('oil')

90

In [294]:
features = list(df2['skin type'].unique()) + list(concerns)
(features)

features = ['normal','dry','oily','combination', 'acne','sensitive','fine lines', 'wrinkles', 'redness', 'dull', 'pore', 'pigmentation', 'blackheads','whiteheads', 'blemishes', 'dark circles', 'eye bags', 'dark spots']

In [295]:
len(features)

18

In [296]:
def search_concern(target, i):
    if target in df2.iloc[i]['concern']:
        return True
    return False

one_hot_encodings = np.zeros([entries, len(features)])

#skin types first
for i in range(entries):
    for j in range(5):
        target = features[j]
        sk_type = df2.iloc[i]['skin type']
        if sk_type == 'all':
            one_hot_encodings[i][0:5] = 1
        elif target == sk_type:
            one_hot_encodings[i][j] = 1

#other features
for i in range(entries):
    for j in range(5, len(features)):
        feature = features[j]
        if feature in df2.iloc[i]['concern']:
            one_hot_encodings[i][j] = 1
            
    

In [297]:
x = one_hot_encodings[456]

In [298]:
from sklearn.neighbors import NearestNeighbors
nbrs = NearestNeighbors(n_neighbors=6, algorithm='ball_tree').fit(one_hot_encodings)
distances, indices = nbrs.kneighbors(one_hot_encodings)

get_index_from_name('peach milk soft creme 150g')


4

In [299]:
# finding the closes data points
def recs_nn(query=None,id=None):
    if id:
        for id in indices[id][1:]:
            print(anime.ix[id]["name"])
    if query:
        found_id = get_index_from_name(query)
        for id in indices[found_id][1:]:
            print(index2prod(id))

## Cosine Similarity

In [300]:
# utility functions
def name2index(name):
    return df2[df2["name"]==name].index.tolist()[0]

def index2prod(index):
    return df2.iloc[index]

In [301]:
# recommend top 10 similar items from a category
def recs_cs(vector = None, name = None, label = None, count = 5):
    if name:
        idx = name2index(name)
        fv = one_hot_encodings[idx]
    elif vector:
        fv = vector
    cs_values = cosine_similarity(np.array([fv, ]), one_hot_encodings)
    df2['cs'] = cs_values[0]
    dff = df2
    if label:
        dff = df2[df2['label'] == label]
    if name:
        dff = dff[dff['name'] != name]
    recommendations = dff.sort_values('cs', ascending=False).head(count)
    print(f"Top {count} matching {label} items")
    return recommendations[['label', 'brand', 'concern', 'cs']]

In [302]:
# overall recommendation
def recs_essentials(vector = None, name = None):
    print("ESSENTIALS:")
    for label in LABELS:
        print(f"{label}:")
        if name: 
            print(recs_cs(None, name, label))
        elif vector:
            print(recs_cs(vector, None, label))        

In [303]:
# features = ['normal','dry','oily','combination', 'acne','sensitive','fine lines', 'wrinkles', 'redness', 'dull', 'pore ', 'pigmentation', 'blackheads','whiteheads', 'blemishes', 'dark circles', 'eye bags', 'dark spots']
x = [1,1,1,1,0,1,0,0,0,1,0,0,0,0,1,0,1,0]

recs_essentials(x, None)

ESSENTIALS:
face-moisturisers:
Top 5 matching face-moisturisers items
                 label              brand  \
118  face-moisturisers  azani active care   
151  face-moisturisers          mamaearth   
141  face-moisturisers            garnier   
194  face-moisturisers           biotique   
227  face-moisturisers          innisfree   

                                       concern        cs  
118  deep nourishment,acne,blemishes,dull skin  0.801784  
151      acne,blemishes,pigmentation,dull skin  0.750000  
141                           acne,blemishes,,  0.721688  
194       uneven skin tone,dull skin,hydration  0.721688  
227         sun protection,dull skin,hydration  0.721688  
cleanser:
Top 5 matching cleanser items
        label             brand  \
462  cleanser        just herbs   
298  cleanser          biotique   
305  cleanser          biotique   
358  cleanser  wow skin science   
314  cleanser  wow skin science   

                                            concern   

## Makeup Items

In [304]:
# info
makeup.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 790 entries, 0 to 789
Data columns (total 13 columns):
 #   Column          Non-Null Count  Dtype  
---  ------          --------------  -----  
 0   label           790 non-null    object 
 1   url             790 non-null    object 
 2   brand           790 non-null    object 
 3   name            790 non-null    object 
 4   price           790 non-null    object 
 5   skin type       443 non-null    object 
 6   skin tone       494 non-null    object 
 7   spf             108 non-null    object 
 8   concern         379 non-null    object 
 9   concern 2       0 non-null      float64
 10  concern 3       0 non-null      float64
 11  key ingredient  118 non-null    object 
 12  formulation     786 non-null    object 
dtypes: float64(2), object(11)
memory usage: 80.4+ KB


In [305]:
makeup.drop(columns = ['concern 2', 'concern 3', 'key ingredient', 'formulation', 'spf', 'concern'], inplace = True)

In [306]:
makeup.isna().sum()

label          0
url            0
brand          0
name           0
price          0
skin type    347
skin tone    296
dtype: int64

In [307]:
makeup.dropna(subset=['skin tone'], inplace=True)


In [314]:
makeup[makeup['label'] == 'foundation']['skin type'].value_counts()

All            73
Normal         31
Oily            7
Combination     3
Dry             1
Name: skin type, dtype: int64

In [317]:
makeup[makeup['label'] == 'foundation']['skin type'].isna().sum()

82

In [318]:
makeup[makeup['label'] == 'primer']['skin type'].isna().sum()

53

In [315]:
makeup[makeup['label'] == 'primer']['skin type'].value_counts()

All            66
Normal         53
Oily           10
Combination     1
Name: skin type, dtype: int64

In [319]:
makeup[makeup['label'] == 'concealer']['skin type'].isna().sum()

70

In [321]:
makeup[makeup['label'] == 'concealer']['skin type'].value_counts()

All            32
Normal         11
Combination     1
Name: skin type, dtype: int64

In [322]:
makeup['skin type'].fillna('All', inplace = True)

In [327]:
makeup['brand'] = makeup['brand'].str.lower()
makeup['name'] = makeup['name'].str.lower()
makeup['skin type'] = makeup['skin type'].str.lower()
makeup['skin tone'] = makeup['skin tone'].str.lower()


In [341]:
def makeup_recommendation(skin_tone, skin_type):
    
    dff = pd.DataFrame()
    dff = dff.append(makeup[(makeup['skin tone'] == skin_tone) & (makeup['skin type'] == skin_type) & (makeup['label'] == 'foundation')].head(2))
    dff = dff.append(makeup[(makeup['skin tone'] == skin_tone) & (makeup['skin type'] == skin_type) & (makeup['label'] == 'concealer')].head(2))
    dff = dff.append(makeup[(makeup['skin tone'] == skin_tone) & (makeup['skin type'] == skin_type) & (makeup['label'] == 'primer')].head(2))
    dff= dff.sample(frac = 1)
    return dff



In [342]:
makeup_recommendation('dark to deep', 'all')

Unnamed: 0,label,url,brand,name,price,skin type,skin tone
274,foundation,https://www.myntra.com/foundation-and-primer/m...,miss claire,professional stick foundation - fs27,₹ 675,all,dark to deep
568,concealer,https://www.myntra.com/concealer/lakme/lakme-a...,lakme,absolute mattreal mousse concealer - caramel 0...,₹ 510,all,dark to deep
46,primer,https://www.myntra.com/foundation-and-primer/m...,myglamm,tinted perfection brightening banana primer,₹ 876,all,dark to deep
557,concealer,https://www.myntra.com/concealer/maybelline/ma...,maybelline,new york fit me concealer - caramel 40 6.8 ml,₹ 332,all,dark to deep
228,foundation,https://www.myntra.com/foundation-and-primer/m...,miss claire,professional stick foundation - ng1,₹ 472,all,dark to deep
