In [None]:
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity


In [None]:
df = pd.read_csv('/content/skincare_products_clean.csv')

In [None]:
print(df.shape)
print(df.columns)
print(df.dtypes)
print(df.head())

(1138, 5)
Index(['product_name', 'product_url', 'product_type', 'clean_ingreds',
       'price'],
      dtype='object')
product_name     object
product_url      object
product_type     object
clean_ingreds    object
price            object
dtype: object
                                        product_name  \
0  The Ordinary Natural Moisturising Factors + HA...   
1      CeraVe Facial Moisturising Lotion SPF 25 52ml   
2  The Ordinary Hyaluronic Acid 2% + B5 Hydration...   
3          AMELIORATE Transforming Body Lotion 200ml   
4                     CeraVe Moisturising Cream 454g   

                                         product_url product_type  \
0  https://www.lookfantastic.com/the-ordinary-nat...  Moisturiser   
1  https://www.lookfantastic.com/cerave-facial-mo...  Moisturiser   
2  https://www.lookfantastic.com/the-ordinary-hya...  Moisturiser   
3  https://www.lookfantastic.com/ameliorate-trans...  Moisturiser   
4  https://www.lookfantastic.com/cerave-moisturis...  Moisturise

In [None]:
def recommend_products(clean_ingreds, product_type=None, price=None):
    vectorizer = TfidfVectorizer()
    ingredient_vectors = vectorizer.fit_transform(df['clean_ingreds'])
    input_vector = vectorizer.transform([clean_ingreds])
    similarities = cosine_similarity(input_vector, ingredient_vectors)


    top_indices = similarities.argsort()[0][-5:][::-1]


    if product_type:
        top_indices = [i for i in top_indices if df.iloc[i]['product_type'] == product_type]
    if price:
        top_indices = [i for i in top_indices if df.iloc[i]['price'] >= price[0] and df.iloc[i]['price'] <= price[1]]


    return df.iloc[top_indices]

In [None]:
print(df['clean_ingreds'].unique())

["['capric triglyceride', 'cetyl alcohol', 'propanediol', 'stearyl alcohol', 'glycerin', 'sodium hyaluronate', 'arganine', 'aspartic acid', 'glycine', 'alanine', 'serine', 'valine', 'isoleucine', 'proline', 'threonine', 'histidine', 'phenylalanine', 'glucose', 'maltose', 'fructose', 'trehalose', 'sodium pca', 'pca', 'sodium lactate', 'urea', 'allantoin', 'linoleic acid', 'oleic acid', 'phytosteryl canola glycerides', 'palmitic acid', 'stearic acid', 'lecithin', 'triolein', 'tocopherol', 'carbomer', 'isoceteth-20', 'polysorbate 60', 'sodium chloride', 'citric acid', 'trisodium ethylenediamine disuccinate', 'pentylene glycol', 'triethanolamine', 'sodium hydroxide', 'phenoxyethanol', 'chlorphenesin']"
 "['homosalate', 'glycerin', 'octocrylene', 'ethylhexyl', 'salicylate', 'niacinamide', 'silica', 'butyl methoxydibenzoylmethane', 'dimethicon', 'cetearyl alcohol', 'peg-100 stearate', 'glyceryl stearate', 'phenoxyethanol', 'stearic acid', 'behentrimonium methosulfate', 'caprylyl glycol', 'pa

In [None]:
clean_ingreds = 'sodium hyaluronate, sodium hyaluronate, panthenol'
product_type = 'moisturizer'
price = (20, 50)

In [None]:
recommended_products = recommend_products(clean_ingreds)
print(recommended_products[['product_name', 'product_url', 'price']])

                                          product_name  \
619        La Roche-Posay Hydraphase Intense Eyes 15ml   
128                         Medik8 Hydr8 B5 Serum 30ml   
541   FOREO UFO Activated Masks - Make My Day (7 Pack)   
213  FOREO 'Serum Serum Serum' Micro-Capsule Youth ...   
121  The Ordinary Hyaluronic Acid 2% + B5 Supersize...   

                                           product_url   price  
619  https://www.lookfantastic.com/la-roche-posay-h...  £16.00  
128  https://www.lookfantastic.com/medik8-hydr8-b5-...  £40.00  
541  https://www.lookfantastic.com/foreo-ufo-activa...   £9.99  
213  https://www.lookfantastic.com/foreo-serum-seru...  £49.00  
121  https://www.lookfantastic.com/the-ordinary-hya...  £11.00  
