In [1]:
import pandas as pd
import numpy as np
from sklearn.manifold import TSNE

df = pd.read_csv("cosmetics.csv")
display(df.sample(5))
df.Label.value_counts()

Unnamed: 0,Label,Brand,Name,Price,Rank,Ingredients,Combination,Dry,Normal,Oily,Sensitive
56,Moisturizer,ORIGINS,GinZing™ Energy-Boosting Gel Moisturizer,29,4.4,"Water, Methyl Trimethicone, Butylene Glycol, G...",1,1,1,1,1
759,Treatment,MURAD,Pure Skin® Clarifying Dietary Supplement,50,4.1,"Vitamin A, Vitamin C, Vitamin E, Vitamin B1, V...",1,0,0,1,0
1313,Sun protect,NEOGEN DERMALOGY,Day-Light Protection Sun Screen SPF 50 PA+++,30,4.6,"Water, Ethylhexyl Methoxycinnamate, Glycerin, ...",1,1,1,1,1
863,Face Mask,KIEHL'S SINCE 1851,Turmeric & Cranberry Seed Energizing Radiance ...,39,4.5,"Water, Kaolin, Sorbitol, Bentonite, Butylene G...",1,1,1,1,1
773,Treatment,MURAD,Rapid Collagen Infusion,84,4.2,"Water, Isodecyl Neopentanoate, Polymethylsilse...",0,0,0,0,0


Label
Moisturizer    298
Cleanser       281
Face Mask      266
Treatment      248
Eye cream      209
Sun protect    170
Name: count, dtype: int64

In [2]:
moisturizers = df[df.Label == "Moisturizer"]
moisturizers_dry = moisturizers[moisturizers.Dry==1]
moisturizers_dry = moisturizers_dry.reset_index(drop = True)

In [3]:
ingredient_idx = {}
c = []
idx = 0

for i in range(len(moisturizers_dry)):
    ingredients = moisturizers_dry["Ingredients"][i]
    ingredients_low = ingredients.lower()
    tokens = ingredients_low.split(', ')
    c.append(tokens)
    for ingredients in tokens:
        if ingredients not in ingredient_idx:
            ingredient_idx[ingredients] = idx
            idx +=1

print("the index of Decyl oleate", ingredient_idx['decyl oleate'])

the index of Decyl oleate 25


In [4]:
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity


In [5]:
df = pd.read_csv('cosmetics.csv')


df['text'] = df['Name'] + " " + df['Ingredients']  # Combine Name and Ingredients
documents = df['text'].fillna("").tolist()  # Fill missing values with an empty string

In [6]:
from sklearn.feature_extraction.text import TfidfVectorizer

# Initialize the TF-IDF Vectorizer
tfidf_vectorizer = TfidfVectorizer()

# Generate the TF-IDF matrix
tfidf_matrix = tfidf_vectorizer.fit_transform(documents)


In [7]:
from sklearn.metrics.pairwise import cosine_similarity

# Calculate cosine similarity matrix
cosine_similarities = cosine_similarity(tfidf_matrix)


In [13]:
def get_recommendations(product_name, skin_type, df, cosine_similarities, top_n=5):
    # Map skin types to column names
    skin_type_column = {
        "Combination": "Combination",
        "Dry": "Dry",
        "Normal": "Normal",
        "Oily": "Oily",
        "Sensitive": "Sensitive"
    }
    
    # Ensure the skin type exists in the mapping
    if skin_type not in skin_type_column:
        print(f"Invalid skin type: {skin_type}")
        return []

    # Filter DataFrame based on the specified skin type
    try:
        filtered_df = df[df[skin_type_column[skin_type]] == 1]
    except KeyError as e:
        print(f"Error: {e}. Check if the column names in your DataFrame match the ones in skin_type_column.")
        return []

    # Find the index of the product name in the filtered DataFrame
    try:
        product_index = filtered_df[filtered_df['Name'].str.contains(product_name, case=False)].index[0]
    except IndexError:
        print("Product not found in filtered DataFrame.")
        return [] 
    
    # Calculate similarity scores
    similarity_scores = list(enumerate(cosine_similarities[product_index]))
    similarity_scores = sorted(similarity_scores, key=lambda x: x[1], reverse=True)
    top_recommendations = similarity_scores[1:top_n + 1]

    # Retrieve recommended products
    recommended_products = df.iloc[[index for index, score in top_recommendations]]
    return recommended_products[['Brand', 'Name', 'Price']].to_dict(orient='records')


In [None]:
import json

In [17]:
product_name = input("Enter the product name: ")
skin_type = input("Enter your skin type (Combination, Dry, Normal, Oily, Sensitive): ")

recommendations = get_recommendations(product_name, skin_type, df, cosine_similarities)

print("Recommended products:", recommendations)

with open('recommendations_output.json', 'w') as f:
    json.dump(recommendations, f, indent=4)

Recommended products: [{'Brand': 'SATURDAY SKIN', 'Name': 'Waterfall Glacier Water Cream', 'Price': 39}, {'Brand': 'SATURDAY SKIN', 'Name': 'Quench Intense Hydration Mask', 'Price': 6}, {'Brand': 'SATURDAY SKIN', 'Name': 'Spotlight Brightening Mask', 'Price': 6}, {'Brand': 'SATURDAY SKIN', 'Name': 'Featherweight Daily Moisturizing Cream', 'Price': 49}, {'Brand': 'SATURDAY SKIN', 'Name': 'Wide Awake Brightening Eye Cream', 'Price': 46}]
