In [41]:
import pandas as pd
import numpy as np
from sklearn.manifold import TSNE

df = pd.read_csv("datasets/cosmetics.csv")
display(df.sample(5))
print(df['Label'].value_counts())


Unnamed: 0,Label,Brand,Name,Price,Rank,Ingredients,Combination,Dry,Normal,Oily,Sensitive
1140,Eye cream,ESTÉE LAUDER,Advanced Night Repair Eye Serum Synchronized C...,66,3.5,Advanced Night Rp Eye Sr Syn Cmpii Division: E...,1,1,1,1,1
218,Moisturizer,JOSIE MARAN,Argan Cleansing Oil,32,4.1,"Helianthus Annuus (Sunflower) Seed Oil**, Poly...",1,1,1,1,1
367,Cleanser,LANCER,The Method: Polish,75,4.6,"Butylene Glycol, Sodium Bicarbonate, PEG-8, Ma...",1,0,1,0,0
1362,Sun protect,SUPERGOOP!,Supergoop! x Milly Defense Refresh Setting Mis...,28,3.7,Visit the Supergoop! boutique,1,1,1,1,1
391,Cleanser,IT COSMETICS,Confidence in a Cleanser™ Skin-Transforming Hy...,28,4.7,"Water, Disodium Cocoamphodiacetate, Glycerin, ...",1,1,1,1,1


Label
Moisturizer    298
Cleanser       281
Face Mask      266
Treatment      248
Eye cream      209
Sun protect    170
Name: count, dtype: int64


In [42]:
moisturizers = df[df['Label'] == 'Moisturizer']
moisturizers_dry = moisturizers[moisturizers['Dry'] == 1]
moisturizers_dry = moisturizers_dry.reset_index(drop=True)


In [43]:
corpus = []
ingredient_idx = {}
idx = 0

for row in moisturizers_dry['Ingredients']:
    tokens = row.lower().split(', ')
    corpus.append(tokens)
    for ingredient in tokens:
        if ingredient not in ingredient_idx:
            ingredient_idx[ingredient] = idx
            idx += 1


In [44]:
M = len(corpus)
N = len(ingredient_idx)
A = np.zeros((M, N))


In [45]:
def oh_encoder(tokens):
    x = np.zeros(N)
    for ing in tokens:
        index = ingredient_idx[ing]
        x[index] = 1
    return x


In [46]:
for i in range(M):
    x = oh_encoder(corpus[i])
    A[i] = x


In [47]:
import os
from sklearn.manifold import TSNE

# 🔧 Fix OpenBLAS thread issue
os.environ["OMP_NUM_THREADS"] = "1"

# 🔍 Select only numeric columns for t-SNE input
A = moisturizers_dry[['Combination', 'Dry', 'Normal', 'Oily', 'Sensitive']].copy()
A = A.dropna()  # drop rows with missing values if any

# 🔄 Sync moisturizers_dry with A
moisturizers_dry = moisturizers_dry.loc[A.index]

# ✅ Run t-SNE
model = TSNE(n_components=2, learning_rate=200, random_state=42)
tsne_features = model.fit_transform(A)

# 🧭 Assign coordinates to DataFrame
moisturizers_dry['X'] = tsne_features[:, 0]
moisturizers_dry['Y'] = tsne_features[:, 1]




In [48]:
# ✅ FINAL RECOMMENDATION BLOCK

from sklearn.metrics.pairwise import euclidean_distances

# 🔍 Product name from dataset — match exactly
product_name = "Clear Genius Clarifying Gel Cleanser"

# ✅ Check if it exists in the dataset
if product_name in moisturizers_dry['Name'].values:
    # Get index of the selected product
    product_index = moisturizers_dry[moisturizers_dry['Name'] == product_name].index[0]

    # Calculate Euclidean distance from this product to all others
    distances = euclidean_distances(
        moisturizers_dry[['X', 'Y']],
        moisturizers_dry[['X', 'Y']].iloc[[product_index]]
    ).reshape(-1)

    # Get top 5 closest products (excluding the selected one)
    similar_indices = distances.argsort()[1:6]

    # Display recommended products
    recommendations = moisturizers_dry.iloc[similar_indices][['Name', 'Brand', 'Price', 'Ingredients']]
    print(f"🔁 Recommended Products similar to: {product_name}")
    display(recommendations)

else:
    print("❌ Product not found! Check the spelling.")



❌ Product not found! Check the spelling.


In [49]:
# ✅ Final Product Recommendation & Display
product_name = "Polypeptide Cream"  # ← Change to any product name from dataset
recommended_product = moisturizers_dry[moisturizers_dry['Name'].str.contains(product_name, case=False)]

if not recommended_product.empty:
    print("⭐️ Recommended Product Based on Dry Skin Analysis:\n")
    print(recommended_product[['Brand', 'Name', 'Price', 'Rank', 'Ingredients']])
else:
    print("⚠️ Product not found. Please check the name spelling.")



⭐️ Recommended Product Based on Dry Skin Analysis:

            Brand                        Name  Price  Rank  \
2  DRUNK ELEPHANT  Protini™ Polypeptide Cream     68   4.4   

                                         Ingredients  
2  Water, Dicaprylyl Carbonate, Glycerin, Ceteary...  


In [50]:
print("✅ Project Summary:\n")
print("Step 1: Filtered products suitable for Dry skin type.")
print("Step 2: Applied t-SNE to reduce high-dimensional features to 2D.")
print("Step 3: Visualized the results using Bokeh interactive plot.")
print("Step 4: Displayed product details for a given input name.")



✅ Project Summary:

Step 1: Filtered products suitable for Dry skin type.
Step 2: Applied t-SNE to reduce high-dimensional features to 2D.
Step 3: Visualized the results using Bokeh interactive plot.
Step 4: Displayed product details for a given input name.
