In [16]:
import tensorflow as tf
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
import plotly.express as px
import plotly.graph_objects as go
import plotly.offline as pyo
import plotly.io as pio
import seaborn as sns
import missingno as mno
from sklearn.feature_extraction.text import TfidfVectorizer

In [5]:
from google.colab import files
uploaded = files.upload()

Saving export_skincare.csv to export_skincare.csv


In [18]:
skincare = pd.read_csv('export_skincare.csv')

In [19]:
skincare.info()
skincare.head()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1210 entries, 0 to 1209
Data columns (total 16 columns):
 #   Column           Non-Null Count  Dtype 
---  ------           --------------  ----- 
 0   Unnamed: 0       1210 non-null   int64 
 1   product_href     1210 non-null   object
 2   product_name     1210 non-null   object
 3   product_type     1210 non-null   object
 4   brand            1210 non-null   object
 5   notable_effects  1210 non-null   object
 6   skintype         1210 non-null   object
 7   price            1210 non-null   object
 8   description      1210 non-null   object
 9   picture_src      1210 non-null   object
 10  labels           1210 non-null   int64 
 11  Sensitive        1210 non-null   int64 
 12  Combination      1210 non-null   int64 
 13  Oily             1210 non-null   int64 
 14  Dry              1210 non-null   int64 
 15  Normal           1210 non-null   int64 
dtypes: int64(7), object(9)
memory usage: 151.4+ KB


Unnamed: 0.1,Unnamed: 0,product_href,product_name,product_type,brand,notable_effects,skintype,price,description,picture_src,labels,Sensitive,Combination,Oily,Dry,Normal
0,0,https://www.beautyhaul.com/product/detail/bubb...,ACWELL Bubble Free PH Balancing Cleanser,Face Wash,ACWELL,"Acne-Free, Pore-Care, Brightening, Anti-Aging",Oily,Rp 209.000,Mengangkat kotoran dan menghapus makeup dalam ...,https://www.beautyhaul.com/assets/uploads/prod...,18,0,0,1,0,0
1,1,https://www.sociolla.com/face-wash/62452-ph-ba...,ACWELL pH Balancing Soothing Cleansing Foam,Face Wash,ACWELL,"Soothing, Balancing","Normal, Dry, Combination",Rp 181.800,Membersihkan dan menenangkan kulit sensitif de...,https://images.soco.id/8f08ced0-344d-41f4-a15e...,127,0,1,0,1,1
2,2,https://www.sociolla.com/toner/15871-licorice-...,Acwell Licorice pH Balancing Cleansing Toner,Toner,ACWELL,"Soothing, Balancing","Normal, Dry, Oily, Combination, Sensitive",Rp 149.000,"Mengangkat sisa kotoran, debu, dan make up sek...","https://www.soco.id/cdn-cgi/image/w=73,format=...",127,1,1,1,1,1
3,3,https://www.beautyhaul.com/product/detail/aqua...,ACWELL Aquaseal Soothing Tonic,Toner,ACWELL,"Acne-Free, Pore-Care, Brightening, Anti-Aging",Oily,Rp 290.000,Pre-essence yang diformulasikan dengan ekstrak...,https://www.beautyhaul.com/assets/uploads/prod...,18,0,0,1,0,0
4,4,https://www.sociolla.com/essence/38023-licoric...,Licorice pH Balancing Essence Mist,Toner,ACWELL,"Brightening, Soothing","Normal, Dry",Rp 194.650,Essens mist dengan kelembapan tinggi yang memb...,"https://www.sociolla.com/cdn-cgi/image/w=425,f...",40,0,0,0,1,1


In [20]:
# Check duplicated rows
skincare.duplicated().sum()

0

In [38]:
# Describe the features
cat = ['product_type', 'brand', 'notable_effects', 'skintype', 'product_name']

skincare[cat].describe()

Unnamed: 0,product_type,brand,notable_effects,skintype,product_name
count,1210,1210,1210,1210,1210
unique,5,211,151,15,1192
top,Serum,SOMETHINC,"Pore-Care, Brightening, Anti-Aging",Oily,Dear Me Beauty Watermelon Multipurpose Gel
freq,296,70,149,307,3


In [22]:
# Skin Type That Suitable For the Product
counts = skincare['skintype'].value_counts()
count_percentage = skincare['skintype'].value_counts(1)*100
counts_df = pd.DataFrame({'Skin_type':counts.index,'Counts':counts.values,'Percent%':np.round(count_percentage.values,2)})
counts_df

Unnamed: 0,Skin_type,Counts,Percent%
0,Oily,307,25.37
1,"Normal, Dry, Oily, Combination, Sensitive",258,21.32
2,Dry,167,13.8
3,"Normal, Dry",92,7.6
4,"Normal, Dry, Oily, Combination",68,5.62
5,"Normal, Dry, Oily, Sensitive",52,4.3
6,Sensitive,51,4.21
7,"Oily, Combination, Sensitive",42,3.47
8,"Dry, Oily, Sensitive",36,2.98
9,"Dry, Sensitive",27,2.23


In [23]:
# Skin Type
top_7 = counts_df.head(7)
sns.set(style='white')
fig = px.bar(data_frame=top_7, x='Skin_type', y='Counts',
       color='Counts', color_continuous_scale='Inferno_r',
       width=800, height=600,
       text_auto=True, title=f'Count of Skin Type That Suitable For The Product')
fig.update_layout(plot_bgcolor='#FFFCF2', xaxis_tickangle = 15)
fig.show()


In [24]:
# Creating numerical labels of notable_effects due to many effects shown by the diagram above
skincare['notable_effects']=skincare['notable_effects'].astype('category')
skincare['labels']=skincare['notable_effects'].cat.codes
skincare.head()

Unnamed: 0.1,Unnamed: 0,product_href,product_name,product_type,brand,notable_effects,skintype,price,description,picture_src,labels,Sensitive,Combination,Oily,Dry,Normal
0,0,https://www.beautyhaul.com/product/detail/bubb...,ACWELL Bubble Free PH Balancing Cleanser,Face Wash,ACWELL,"Acne-Free, Pore-Care, Brightening, Anti-Aging",Oily,Rp 209.000,Mengangkat kotoran dan menghapus makeup dalam ...,https://www.beautyhaul.com/assets/uploads/prod...,18,0,0,1,0,0
1,1,https://www.sociolla.com/face-wash/62452-ph-ba...,ACWELL pH Balancing Soothing Cleansing Foam,Face Wash,ACWELL,"Soothing, Balancing","Normal, Dry, Combination",Rp 181.800,Membersihkan dan menenangkan kulit sensitif de...,https://images.soco.id/8f08ced0-344d-41f4-a15e...,127,0,1,0,1,1
2,2,https://www.sociolla.com/toner/15871-licorice-...,Acwell Licorice pH Balancing Cleansing Toner,Toner,ACWELL,"Soothing, Balancing","Normal, Dry, Oily, Combination, Sensitive",Rp 149.000,"Mengangkat sisa kotoran, debu, dan make up sek...","https://www.soco.id/cdn-cgi/image/w=73,format=...",127,1,1,1,1,1
3,3,https://www.beautyhaul.com/product/detail/aqua...,ACWELL Aquaseal Soothing Tonic,Toner,ACWELL,"Acne-Free, Pore-Care, Brightening, Anti-Aging",Oily,Rp 290.000,Pre-essence yang diformulasikan dengan ekstrak...,https://www.beautyhaul.com/assets/uploads/prod...,18,0,0,1,0,0
4,4,https://www.sociolla.com/essence/38023-licoric...,Licorice pH Balancing Essence Mist,Toner,ACWELL,"Brightening, Soothing","Normal, Dry",Rp 194.650,Essens mist dengan kelembapan tinggi yang memb...,"https://www.sociolla.com/cdn-cgi/image/w=425,f...",40,0,0,0,1,1


In [25]:
# Skin Type That Suitable For the Product
counts_effect = skincare['notable_effects'].value_counts()
count_percentage = skincare['notable_effects'].value_counts(1)*100
counts_effect_df = pd.DataFrame({'Notable_Effects':counts_effect.index,'Counts':counts_effect.values,'Percent%':np.round(count_percentage.values,2)})
counts_effect_df.head(10)

Unnamed: 0,Notable_Effects,Counts,Percent%
0,"Pore-Care, Brightening, Anti-Aging",149,12.31
1,Anti-Aging,104,8.6
2,"Acne-Free, Oil-Control, Pore-Care",83,6.86
3,"Hydrating, Moisturizing",70,5.79
4,"Moisturizing, Brightening, Black-Spot",69,5.7
5,"Acne-Free, Pore-Care, Brightening, Anti-Aging",61,5.04
6,"Brightening, Anti-Aging",57,4.71
7,"Hydrating, Soothing",53,4.38
8,UV-Protection,43,3.55
9,"Soothing, Balancing",41,3.39


In [26]:
top_5 = counts_effect_df.head(5)
fig = px.pie(top_5, values = 'Counts', color = 'Notable_Effects', names = 'Notable_Effects',
             color_discrete_sequence = ['#99582A','#F4ACB7', '#FFCCD5', '#FDFCDC', '#C6AC8F'])
fig.update_traces(textposition = 'inside', textfont = dict(color = 'black', size = 15), textinfo = 'percent')
fig.update_layout(title={'text':'Skin Care Product Notable Effects', 'y':0.95, 'x':0.5, 'xanchor': 'center', 'yanchor': 'top'})
fig.show()

In [27]:
# Remove duplicated rows (14 in total)
skincare.drop_duplicates(inplace=True)

# Re-checking the existence of duplicated rows
skincare.duplicated().sum()

0

In [28]:
skincare.head(5)


Unnamed: 0.1,Unnamed: 0,product_href,product_name,product_type,brand,notable_effects,skintype,price,description,picture_src,labels,Sensitive,Combination,Oily,Dry,Normal
0,0,https://www.beautyhaul.com/product/detail/bubb...,ACWELL Bubble Free PH Balancing Cleanser,Face Wash,ACWELL,"Acne-Free, Pore-Care, Brightening, Anti-Aging",Oily,Rp 209.000,Mengangkat kotoran dan menghapus makeup dalam ...,https://www.beautyhaul.com/assets/uploads/prod...,18,0,0,1,0,0
1,1,https://www.sociolla.com/face-wash/62452-ph-ba...,ACWELL pH Balancing Soothing Cleansing Foam,Face Wash,ACWELL,"Soothing, Balancing","Normal, Dry, Combination",Rp 181.800,Membersihkan dan menenangkan kulit sensitif de...,https://images.soco.id/8f08ced0-344d-41f4-a15e...,127,0,1,0,1,1
2,2,https://www.sociolla.com/toner/15871-licorice-...,Acwell Licorice pH Balancing Cleansing Toner,Toner,ACWELL,"Soothing, Balancing","Normal, Dry, Oily, Combination, Sensitive",Rp 149.000,"Mengangkat sisa kotoran, debu, dan make up sek...","https://www.soco.id/cdn-cgi/image/w=73,format=...",127,1,1,1,1,1
3,3,https://www.beautyhaul.com/product/detail/aqua...,ACWELL Aquaseal Soothing Tonic,Toner,ACWELL,"Acne-Free, Pore-Care, Brightening, Anti-Aging",Oily,Rp 290.000,Pre-essence yang diformulasikan dengan ekstrak...,https://www.beautyhaul.com/assets/uploads/prod...,18,0,0,1,0,0
4,4,https://www.sociolla.com/essence/38023-licoric...,Licorice pH Balancing Essence Mist,Toner,ACWELL,"Brightening, Soothing","Normal, Dry",Rp 194.650,Essens mist dengan kelembapan tinggi yang memb...,"https://www.sociolla.com/cdn-cgi/image/w=425,f...",40,0,0,0,1,1


## TF-IDF Vectorizer
The TF-IDF Vectorizer will be used in the recommendation system to find a representation of the important features of each notable_effects category. We will use the tfidfvectorizer() function from the sklearn library.

In [29]:
# Modeling with Content Based Filtering
# Initializing TfidfVectorizer
tf = TfidfVectorizer()

# Perform IDF calculation on 'notable_efects' data
tf.fit(skincare['notable_effects'])

# Mapping array from integer index feature to name feature
tf.get_feature_names_out()

array(['acne', 'aging', 'anti', 'balancing', 'barrier', 'black',
       'brightening', 'care', 'control', 'free', 'hydrating',
       'moisturizing', 'no', 'oil', 'pore', 'protection', 'refreshing',
       'skin', 'soothing', 'spot', 'uv', 'whitecast'], dtype=object)

In [30]:
# Doing fit then transformed to matrix form
tfidf_matrix = tf.fit_transform(skincare['notable_effects'])

# Viewing matrix size TF IDF
shape = tfidf_matrix.shape
shape

(1210, 22)

In [31]:
# Convert TF-IDF vector in matrix form with todense() function
tfidf_matrix.todense()

matrix([[0.44938623, 0.32725364, 0.32725364, ..., 0.        , 0.        ,
         0.        ],
        [0.        , 0.        , 0.        , ..., 0.        , 0.        ,
         0.        ],
        [0.        , 0.        , 0.        , ..., 0.        , 0.        ,
         0.        ],
        ...,
        [0.41603571, 0.        , 0.        , ..., 0.        , 0.        ,
         0.        ],
        [0.        , 0.        , 0.        , ..., 0.        , 0.        ,
         0.        ],
        [0.        , 0.42409154, 0.42409154, ..., 0.        , 0.        ,
         0.        ]])

In [32]:
# Making dataframe to see TF-IDF matrix

pd.DataFrame(
    tfidf_matrix.todense(),
    columns=tf.get_feature_names_out(),
    index=skincare.product_name
).sample(shape[1], axis=1).sample(10, axis=0)

Unnamed: 0_level_0,moisturizing,hydrating,oil,black,barrier,care,brightening,no,balancing,spot,...,whitecast,control,pore,refreshing,uv,soothing,aging,skin,free,anti
product_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
SCARLETT WHITENING Brightly Ever After Night Cream,0.40144,0.0,0.0,0.608239,0.0,0.0,0.318128,0.0,0.0,0.606371,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Everwhite Hydrating Essence Toner,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.822681,0.0,...,0.0,0.0,0.0,0.0,0.0,0.568503,0.0,0.0,0.0,0.0
BREYLEE UV Sunscreen SPF 50+ - Krim Pelindung Wajah,0.0,0.0,0.0,0.0,0.0,0.0,0.364179,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.544741,0.0,0.370059,0.0,0.0,0.370059
The Aubree Centella + Greentea Power Cream,0.631427,0.775435,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Rayou x Rahasia Gadis Dailyscreen SPF 30,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.608815,0.0,0.0,...,0.608815,0.0,0.0,0.0,0.359644,0.0,0.0,0.0,0.0,0.0
SENKA Perfect Whip Sakura,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.707107,0.0,0.0,0.707107
Raiku Bright Radiance Morning Cream,0.0,0.0,0.0,0.0,0.0,0.482757,0.417353,0.0,0.0,0.0,...,0.0,0.0,0.482757,0.0,0.0,0.0,0.424092,0.0,0.0,0.424092
SENKA Facial Foam Perfect Whip,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.707107,0.0,0.0,0.707107
WARDAH Nature Daily Aloe Hydramild Facial Wash 100 ml,0.648175,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.761492,0.0,0.0,0.0,0.0
SKEYNDOR Pure Cleansing Foam,0.0,0.0,0.489826,0.0,0.0,0.325307,0.0,0.0,0.0,0.0,...,0.0,0.489826,0.325307,0.0,0.0,0.0,0.0,0.0,0.393056,0.0


In [33]:
# Calculating Cosine Similarity on the TF-IDF matrix
from sklearn.metrics.pairwise import cosine_similarity
cosine_sim = cosine_similarity(tfidf_matrix)
cosine_sim

array([[1.        , 0.        , 0.        , ..., 0.37452125, 0.        ,
        0.77165804],
       [0.        , 1.        , 1.        , ..., 0.        , 0.        ,
        0.        ],
       [0.        , 1.        , 1.        , ..., 0.        , 0.        ,
        0.        ],
       ...,
       [0.37452125, 0.        , 0.        , ..., 1.        , 0.        ,
        0.        ],
       [0.        , 0.        , 0.        , ..., 0.        , 1.        ,
        0.        ],
       [0.77165804, 0.        , 0.        , ..., 0.        , 0.        ,
        1.        ]])

In [34]:
# Creating a dataframe from the cosine_similarity variable with rows and columns in the form of product names
cosine_sim_df = pd.DataFrame(cosine_sim, index=skincare['product_name'], columns=skincare['product_name'])

# See the similarity matrix for each product
cosine_sim_df.sample(5, axis=1).sample(10, axis=0)

product_name,Erha Perfect Shield Helios 30g - Daily Sunscreen,SKIN GAME Skin Barricade,BE THE SKIN Botanical Pore Serum,Somethinc Paket Atasi Kulit Berjerawat Anti Ribet - Bakuchiol Skinpair Serum,PURIVERA BOTANICAL Everlasting Tamanu Serum Oil
product_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
MACARIA Sunscreen Spray,0.827184,0.0,0.0,0.0,0.0
WARDAH Nature Daily Aloe Hydramild Facial Wash 100 ml,0.0,0.0,0.0,0.0,0.0
Avoskin The Great Shield Sunscreen,0.677397,0.0,0.0,0.0,0.0
Dear Klairs Freshly Juiced Vitamin E Mask Miniature 15ml,0.0,0.0,0.0,0.0,0.0
Garnier Bright Complete Brightening Face Scrub,0.0,0.0,0.0,0.392521,0.0
HAPLE La Luna Anti-Aging Serum,0.561931,1.0,1.0,0.0,1.0
The Body Shop Himalayan Charcoal Purifying Face Wash,0.0,0.0,0.0,0.736544,0.0
HANASUI Serum Whitening Gold,0.0,0.0,0.0,0.0,0.0
Innisfree Brightening Pore Serum 30 mL,0.0,0.0,0.0,0.392521,0.0
Erha Truwhite Brightening Neck Cream,0.0,0.0,0.0,0.0,0.0


In [35]:
def skincare_recommendations(nama, similarity_data=cosine_sim_df, items=skincare[['product_name', 'notable_effects']], k=5):

    # Retrieve data by using argpartition to partition indirectly along a given axis
    # Dataframe converted to be numpy
    # Range(start, stop, step)
    index = similarity_data.loc[:,nama].to_numpy().argpartition(
        range(-1, -k, -1))

    # Retrieve data with the greatest similarity from the existing index
    closest = similarity_data.columns[index[-1:-(k+2):-1]]

    # Drop a name so that the name of the product we are looking for doesnt' appear in the list of recommendations
    closest = closest.drop(nama, errors='ignore')

    return pd.DataFrame(closest).merge(items).head(k)

In [36]:
skincare[skincare.product_name.eq('ELSHE SKIN Radiant Supple Serum')].head()

Unnamed: 0.1,Unnamed: 0,product_href,product_name,product_type,brand,notable_effects,skintype,price,description,picture_src,labels,Sensitive,Combination,Oily,Dry,Normal
292,292,https://www.beautyhaul.com/product/detail/radi...,ELSHE SKIN Radiant Supple Serum,Serum,ELSHE SKIN,"Acne-Free, Brightening, Black-Spot","Normal, Dry, Oily, Combination, Sensitive",Rp 182.750,ElsheSkin Radiant Supple Serum atau Radiant Sk...,https://www.beautyhaul.com/assets/uploads/prod...,3,1,1,1,1,1


In [37]:
 # Getting skin care product recommendation which similar to Wardah Renew You Anti Aging Day Cream
skincare_recommendations("ELSHE SKIN Radiant Supple Serum")

Unnamed: 0,product_name,notable_effects
0,SOMETHINC Dark Spot Reducer Ampoule,"Acne-Free, Brightening, Black-Spot"
1,AVOSKIN YOUR SKIN BAE SERIES Niacinamide 12% +...,"Hydrating, Acne-Free, Brightening, Black-Spot"
2,Mineral Botanica Perfect Purifying Acne Night ...,"Moisturizing, Acne-Free, Black-Spot"
3,PURIVERA BOTANICALS Sugar Willow Serum - Fruit...,"Acne-Free, Pore-Care, Black-Spot"
4,BREYLEE Step 2 Pore Minimizer Serum - Pengecil...,"Acne-Free, Pore-Care, Black-Spot"
