In [None]:
# Cosmetic Ingredient Recommendation System

**Problem:** Choosing the right cosmetic product for dry/sensitive skin is hard due to complex ingredient lists.

**Goal:** Build a content-based recommendation system using cosmetic ingredient data.

**Data Source:** 1472 Sephora products with ingredient details.


In [None]:
import pandas as pd
import numpy as np
from sklearn.manifold import TSNE
import matplotlib.pyplot as plt
from bokeh.plotting import figure, show, output_notebook
from bokeh.models import ColumnDataSource, HoverTool
output_notebook()


In [None]:
df = pd.read_csv("datasets/cosmetics.csv")
df.sample(5)
df['Label'].value_counts()


In [None]:
moisturizers = df[df['Label'] == 'Moisturizer']
moisturizers_dry = moisturizers[moisturizers['Dry'] == 1].reset_index(drop=True)
moisturizers_dry.head()



In [None]:
corpus = []
ingredient_idx = {}
idx = 0

for row in moisturizers_dry['Ingredients']:
    tokens = row.lower().split(', ')
    corpus.append(tokens)
    for ing in tokens:
        if ing not in ingredient_idx:
            ingredient_idx[ing] = idx
            idx += 1




In [None]:
M = len(moisturizers_dry)
N = len(ingredient_idx)
A = np.zeros((M, N))




In [None]:
def oh_encoder(tokens):
    x = np.zeros(N)
    for token in tokens:
        if token in ingredient_idx:
            x[ingredient_idx[token]] = 1
    return x


In [None]:
for i, tokens in enumerate(corpus):
    A[i] = oh_encoder(tokens)


In [None]:
model = TSNE(n_components=2, learning_rate=200, random_state=42)
tsne_features = model.fit_transform(A)

moisturizers_dry['X'] = tsne_features[:, 0]
moisturizers_dry['Y'] = tsne_features[:, 1]


In [None]:
source = ColumnDataSource(data=moisturizers_dry)

hover = HoverTool(tooltips=[
    ('Item', '@Name'),
    ('Brand', '@Brand'),
    ('Price', '$@Price'),
    ('Rank', '@Rank')
])

plot = figure(title="t-SNE Map of Moisturizers", tools=[hover], width=800, height=500)
plot.scatter('X', 'Y', source=source, size=8)

show(plot)



In [None]:

product1 = moisturizers_dry[moisturizers_dry['Name'].str.contains("Color Control Cushion", case=False)]
product2 = moisturizers_dry[moisturizers_dry['Name'].str.contains("BB Cushion Hydra", case=False)]

display(product1[['Name', 'Ingredients']])
display(product2[['Name', 'Ingredients']])
