In [None]:
# import library
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib
import matplotlib.pyplot as plt

# split train set dan test set
from sklearn.model_selection import train_test_split

# standardization
from sklearn.preprocessing import StandardScaler

# mencari model dengan setting-an hyperparameter terbaik
from sklearn.model_selection import GridSearchCV

# model Decision Tree
from sklearn.ensemble import RandomForestClassifier

# menilai performa model
from sklearn.metrics import classification_report

# visualisasi untuk model Decision Tree
from sklearn import tree

# save model untuk deploy
import joblib
import pickle

# library untuk mengabaikan pesan warning
import warnings
warnings.filterwarnings("ignore")

In [None]:
df = pd.read_csv("Data_Skincare.csv")
df.head()

Unnamed: 0,Jenis Kelamin,Usia/Umur,Jenis Kulit,Masalah Kulit,Harga,Produk/Merk,Jenis Skincare
0,Perempuan,20,Berminyak,Komedo,Sedang,MsGlow,Day Cream
1,Perempuan,22,Kombinasi,Dark Spot (Bekas Jerawat),Murah,Skintific,Night Cream
2,Laki-Laki,21,Normal,Komedo,Sedang,Skintific,Moisturizer
3,Perempuan,21,Berminyak,Kusam,Murah,Skintific,Sun Protection
4,Perempuan,21,Normal,Dark Spot (Bekas Jerawat),Sedang,Acnes,Serum & Essence


In [None]:
print(df['Jenis Kulit'].unique())
print(df['Masalah Kulit'].unique())
print(df['Produk/Merk'].unique())
print(df['Jenis Skincare'].unique())

['Berminyak' 'Kombinasi' 'Normal' 'Kering']
['Komedo' 'Dark Spot (Bekas Jerawat)' 'Kusam' 'Jerawat' 'Beruntusan'
 'Kerutan' 'Dark Circle (Mata Panda)' 'Flek Hitam' 'Milia']
['MsGlow' 'Skintific' 'Acnes' 'Wardah' 'Nivea' 'Avoskin' 'Garnier'
 'Hanasui' 'Benings' 'BioAqua' 'Biore' 'Ponds' 'Loreal' 'Scarlett' 'Emina'
 'Himalaya' 'Mustika Ratu' 'Cetaphil' 'Clear & Clean' 'Safi' 'WhiteLab'
 'Citra' 'Azarine' 'St.Ives']
['Day Cream' 'Night Cream' 'Moisturizer' 'Sun Protection'
 'Serum & Essence' 'Acne Treatment' 'Facial Wash' 'Face Oil'
 'Eye Treatment' 'Toner' 'Scrub & Exfoliator' 'Sleeping Mask' 'Mask Sheet'
 'Peeling']


In [None]:
df.isnull().sum()

Jenis Kelamin     0
Usia/Umur         0
Jenis Kulit       0
Masalah Kulit     0
Harga             0
Produk/Merk       0
Jenis Skincare    0
dtype: int64

In [None]:
# label encoding kolom Jenis Kelamin
df['Jenis Kelamin'] = df['Jenis Kelamin'].map({'Perempuan':0, 'Laki-Laki':1})

# label encoding kolom Jenis Kulit
df['Jenis Kulit'] = df['Jenis Kulit'].map({'Normal':0, 'Berminyak':1, 'Kering':2, 'Kombinasi':3})

# label encoding untuk Harga
df['Harga '] = df['Harga '].map({'Murah':0,'Sedang':1,'Mahal ':2})

# label encoding kolom Masalah Kulit
df['Masalah Kulit'] = df['Masalah Kulit'].map({'Jerawat':0,
                               'Komedo':1,
                               'Kusam':2,
                               'Beruntusan':3,
                               'Flek Hitam':4,
                               'Milia':5,
                               'Dark Circle (Mata Panda)':6,
                               'Kerutan':7,
                               'Dark Spot (Bekas Jerawat)':8})

# # label encoding untuk Jenis Skincare
# df['Jenis Skincare'] = df['Jenis Skincare'].map({'Night Cream':0,
#                                'Day Cream':1,
#                                'Sun Protection':2,
#                                'Face Oil':3,
#                                'Eye Treatment':4,
#                                'Skin Soothing Treatment':5,
#                                'Acne Treatment':6,
#                                'Serum & Essence':7,
#                                'Peeling':8,
#                                'Sleeping Mask':9,
#                                'Mask Sheet':10,
#                                'Scrub & Exfoliator':11,
#                                'Toner':12,
#                                'Facial Wash':13,
#                                'Moisturizer':14,
#                                'Nose Pack':15})

# label encoding kolom Produk
df['Produk/Merk'] = df['Produk/Merk'].map({'Emina':0,
                               'Wardah':1,
                               'Sari Ayu':2,
                               'Biore':3,
                               'Nivea':4,
                               'Mustika Ratu':5,
                               'Safi':6,
                               'Skintific':7,
                               'Cetaphil':8,
                               'Garnier':9,
                               'Ponds':10,
                               'Loreal':11,
                               'Scarlett':12,
                               'Clear & Clean':13,
                               'Acnes':14,
                               'Himalaya':15,
                               'Citra':16,
                               'BioAqua':17,
                               'MsGlow':18,
                               'Benings':19,
                               'St.Ives':20,
                               'Azarine':21,
                               'Avoskin':22,
                               'WhiteLab':23,
                               'Hanasui':24})
df

Unnamed: 0,Jenis Kelamin,Usia/Umur,Jenis Kulit,Masalah Kulit,Harga,Produk/Merk,Jenis Skincare
0,0,20,1,1,1,18,Day Cream
1,0,22,3,8,0,7,Night Cream
2,1,21,0,1,1,7,Moisturizer
3,0,21,1,2,0,7,Sun Protection
4,0,21,0,8,1,14,Serum & Essence
...,...,...,...,...,...,...,...
129,0,35,2,6,1,23,Facial Wash
130,1,21,1,0,1,9,Moisturizer
131,0,21,0,1,1,23,Sun Protection
132,0,21,3,0,1,7,Acne Treatment


In [None]:
X = df.drop(['Jenis Skincare'],axis=1).values
y = df['Jenis Skincare'].values

In [None]:
from imblearn.over_sampling import RandomOverSampler
ovr = RandomOverSampler(random_state=42) # feature ke nol (kolom Gender) merupakan feature kategorikal
X_res, y_res = ovr.fit_resample(X, y)

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X_res, y_res, test_size=0.2, random_state=42)

In [None]:
model = RandomForestClassifier(bootstrap=False, ccp_alpha=0.0, class_weight={},
                       criterion='entropy', max_depth=5, max_features=1.0,
                       max_leaf_nodes=None, max_samples=None,
                       min_impurity_decrease=0.0002,
                       min_samples_leaf=5, min_samples_split=10,
                       min_weight_fraction_leaf=0.0, n_estimators=150,
                       n_jobs=-1, oob_score=False, random_state=123, verbose=0,
                       warm_start=False)
model.fit(X_train, y_train)

RandomForestClassifier(bootstrap=False, class_weight={}, criterion='entropy',
                       max_depth=5, max_features=1.0,
                       min_impurity_decrease=0.0002, min_samples_leaf=5,
                       min_samples_split=10, n_estimators=150, n_jobs=-1,
                       random_state=123)

In [None]:
y_pred = model.predict(X_test)
print(classification_report(y_test, y_pred))

                    precision    recall  f1-score   support

    Acne Treatment       0.25      0.50      0.33         4
         Day Cream       0.13      0.50      0.21         4
     Eye Treatment       0.67      1.00      0.80         6
          Face Oil       1.00      0.50      0.67         2
       Facial Wash       1.00      0.33      0.50         9
        Mask Sheet       0.71      1.00      0.83         5
       Moisturizer       0.00      0.00      0.00         7
       Night Cream       1.00      0.50      0.67        10
           Peeling       1.00      1.00      1.00         5
Scrub & Exfoliator       0.20      1.00      0.33         6
   Serum & Essence       0.00      0.00      0.00        11
     Sleeping Mask       0.75      0.25      0.38        12
    Sun Protection       0.00      0.00      0.00         7
             Toner       0.71      0.50      0.59        10

          accuracy                           0.44        98
         macro avg       0.53      0.5

# Rekomender System

In [None]:
df_rec = pd.read_csv("Data_Skincare.csv")
df_rec = df_rec.drop(['Jenis Kelamin','Usia/Umur','Jenis Kulit','Harga '],axis = 1)
df_rec = df_rec.rename(columns={'Jenis Skincare': 'JenisSkincare'})
df_rec = df_rec.rename(columns={'Produk/Merk': 'Merk'})
df_rec

Unnamed: 0,Masalah Kulit,Merk,JenisSkincare
0,Komedo,MsGlow,Day Cream
1,Dark Spot (Bekas Jerawat),Skintific,Night Cream
2,Komedo,Skintific,Moisturizer
3,Kusam,Skintific,Sun Protection
4,Dark Spot (Bekas Jerawat),Acnes,Serum & Essence
...,...,...,...
129,Dark Circle (Mata Panda),WhiteLab,Facial Wash
130,Jerawat,Garnier,Moisturizer
131,Komedo,WhiteLab,Sun Protection
132,Jerawat,Skintific,Acne Treatment


In [None]:
from sklearn.feature_extraction.text import TfidfVectorizer

In [None]:
# Inisialisasi objek tfidf
tfidf = TfidfVectorizer(max_features=5000)

# Transform data
vectorized_data = tfidf.fit_transform(df_rec['Masalah Kulit'].values)

In [None]:
vectorized_data

<134x14 sparse matrix of type '<class 'numpy.float64'>'
	with 258 stored elements in Compressed Sparse Row format>

In [None]:
vectorized_dataframe = pd.DataFrame(vectorized_data.toarray(), index=df_rec['Masalah Kulit'].index.tolist())

In [None]:
from sklearn.metrics.pairwise import cosine_similarity

In [None]:
similarity = cosine_similarity(vectorized_dataframe)

In [None]:
def recommendation(position1,position2):
    id_of_position1 = df_rec[df_rec['JenisSkincare']==position1].index[0]
    distances1 = similarity[id_of_position1]
    position_list1 = sorted(list(enumerate(distances1)), reverse=True, key=lambda x:x[1])[1:10]
    id_of_position2 = df_rec[df_rec['Merk']==position2].index[0]
    distances2 = similarity[id_of_position2]
    position_list2 = sorted(list(enumerate(distances2)), reverse=True, key=lambda x:x[1])[1:10]

    for i in position_list1:
       a = df_rec.iloc[i[0]].JenisSkincare

    for i in position_list2:
       b = df_rec.iloc[i[0]].Merk

    print(f'recomendation Skin Care lain: {a} {b}', end='')

# Interface Preparation

In [None]:
def dept_con(ProdukMerk):
  if ProdukMerk == 'MsGlow':
    result = 0
  elif ProdukMerk == 'Skintific':
    result = 1
  elif ProdukMerk == 'Acnes':
    result = 2
  elif ProdukMerk == 'Wardah':
    result = 3
  elif ProdukMerk == 'Nivea':
    result = 4
  elif ProdukMerk == 'Avoskin':
    result = 5
  elif ProdukMerk == 'Garnier':
    result = 6
  elif ProdukMerk == 'Hanasui':
    result = 7
  elif ProdukMerk == 'Benings':
    result = 8
  elif ProdukMerk == 'BioAqua':
    result = 9
  elif ProdukMerk == 'Biore':
    result = 10
  elif ProdukMerk == 'Ponds':
    result = 11
  elif ProdukMerk == 'Loreal':
    result = 12
  elif ProdukMerk == 'Scarlett':
    result = 13
  elif ProdukMerk == 'Emina':
    result = 14
  elif ProdukMerk == 'Himalaya':
    result = 15
  elif ProdukMerk == 'Mustika Ratu':
    result = 16
  elif ProdukMerk == 'Cetaphil':
    result = 17
  elif ProdukMerk == 'Clear & Clean':
    result = 18
  elif ProdukMerk == 'Safi':
    result = 19
  elif ProdukMerk == 'WhiteLab':
    result = 20
  elif ProdukMerk == 'Citra':
    result = 21
  elif ProdukMerk == 'Azarine':
    result = 22
  elif ProdukMerk == 'St.Ives':
    result = 23
  return result

In [None]:
def Jenis_Kelamin(Sex):
  if Sex == 'Laki-Laki':
    result = 0
  elif Sex == 'Perempuan':
    result = 1

# User Interface

In [None]:
# input data baru
# Sex = 'Laki-Laki'#@param ['Laki-Laki', 'Perempuan']{type:"string"}
# JenisKelamin = Jenis_Kelamin(Sex)
JenisKelamin = 0#@param [0, 1]{type:"raw"}
Usia = 22#@param {type:"number"}
JenisKulit = 1#@param [0, 1, 2, 3]{type:"raw"}
MasalahKulit = 4#@param [0, 1, 2, 3, 4, 5, 6, 7, 8]{type:"raw"}
Harga = 0#@param [0, 1, 2]{type:"raw"}
Merk = 'MsGlow' #@param ['MsGlow','Skintific','Acnes','Wardah','Nivea','Avoskin','Garnier','Hanasui','Benings','BioAqua','Biore','Ponds','Loreal','Scarlett','Emina','Himalaya','Mustika,Ratu','Cetaphil','Clear & Clean','Safi','WhiteLab','Citra','Azarine','St.Ives']{type:"string"}
ProdukMerk = dept_con(Merk)
# Jenis_Skincare = 'Moisturizer' #@param {type:"string"}

# initialize list of lists
data = [[JenisKelamin,Usia,JenisKulit,MasalahKulit,Harga,ProdukMerk]]

# Create the pandas DataFrame
New_Data = pd.DataFrame(data, columns=['Jenis Kelamin','Usia/Umur','Jenis Kulit','Masalah Kulit','Harga ','Produk/Merk'])

Prediction = model.predict(New_Data)
pred = Prediction
rec_str = pred.item()[:]


print("\n")
print(f'Jenis Skincare yang cocok adalah: {rec_str} {Merk}', end='')
print("\n")
rec = recommendation(rec_str,Merk)




Jenis Skincare yang cocok adalah: Scrub & Exfoliator Emina

recomendation Skin Care lain: Night Cream Emina

# Save Model

In [None]:
pickle.dump(model, open('model_ds.pkl', 'wb'))