In [5]:
import pandas as pd

df = pd.read_csv("C:/Users/Beth Angela/Desktop/skincare.csv")

print(df.head())

                                        product_name  \
0  The Ordinary Natural Moisturising Factors + HA...   
1      CeraVe Facial Moisturising Lotion SPF 25 52ml   
2  The Ordinary Hyaluronic Acid 2% + B5 Hydration...   
3          AMELIORATE Transforming Body Lotion 200ml   
4                     CeraVe Moisturising Cream 454g   

                                         product_url product_type  \
0  https://www.lookfantastic.com/the-ordinary-nat...  Moisturizer   
1  https://www.lookfantastic.com/cerave-facial-mo...  Moisturizer   
2  https://www.lookfantastic.com/the-ordinary-hya...  Moisturizer   
3  https://www.lookfantastic.com/ameliorate-trans...  Moisturizer   
4  https://www.lookfantastic.com/cerave-moisturis...  Moisturizer   

                                         ingredients  price_ksh  \
0  Aqua (Water), Caprylic/Capric Triglyceride, Ce...       1040   
1  Aqua/Water, Homosalate, Glycerin, Octocrylene,...       2700   
2  Aqua (Water), Sodium Hyaluronate, Sodium Hya

In [6]:
print("Column names:\n", df.columns)

Column names:
 Index(['product_name', 'product_url', 'product_type', 'ingredients',
       'price_ksh', 'skin_type', 'concerns'],
      dtype='object')


In [7]:
print("\nMissing values per column:\n", df.isnull().sum())


Missing values per column:
 product_name    0
product_url     0
product_type    0
ingredients     0
price_ksh       0
skin_type       0
concerns        0
dtype: int64


In [8]:
print(df['skin_type'].unique())
print(df['product_type'].unique())
print(df['concerns'].head(10))


['normal, dry, sensitive' 'normal, dry' 'dry' 'dry, sensitive'
 'dry, combination' 'oily, sensitive' 'oily, combination, sensitive'
 'normal, combination, sensitive' 'normal, dry, combination'
 'normal, oily, sensitive' 'normal, dry, combination, sensitive'
 'normal, dry, oily' 'normal, combination, dry'
 'normal, oily, combination,  sensitive'
 'normal, oily, dry, combination, sensitive' 'normal, sensitive'
 'oily, combination' 'sensitive, dry' 'normal, oily, dry, combination'
 'oily' 'normal, combination' 'sensitive' 'normal, oily, combination'
 'normal, oily, combination, dry'
 'normal, oily, combination, dry, sensitive' 'combination'
 'dry, combination, oily' 'combination, oily' 'normal,dry' 'normal, oily'
 'sensitive, combination' 'normal, oily, combination, dry , sensitive'
 'normal ,oily, combination, dry, sensitive' 'dry,sensitive' 'dry, oily'
 'normal' 'normal, dry, oily, combination , sensitive'
 'normal, dry, oily, sensitive']
['Moisturizer' 'Serum' 'Eye Care' 'Face Mask' 'C

In [9]:
df['skin_type_list'] = df['skin_type'].apply(lambda x: [i.strip().lower() for i in x.split(',')])


In [10]:
df['concerns_list'] = df['concerns'].apply(lambda x: [i.strip().lower() for i in x.split(',')])


In [11]:
df[['skin_type', 'skin_type_list', 'concerns_list']].head()


Unnamed: 0,skin_type,skin_type_list,concerns_list
0,"normal, dry, sensitive","[normal, dry, sensitive]","[dryness, damaged skin barrier, uneven skin te..."
1,"normal, dry","[normal, dry]","[dryness, damaged skin barrier, sun damage]"
2,"normal, dry, sensitive","[normal, dry, sensitive]","[dryness, damaged skin barrier, fine lines and..."
3,"normal, dry","[normal, dry]","[irritation, uneven skin texture, dryness]"
4,dry,[dry],"[dryness, damaged skin barrier]"


In [12]:
user_skin_type = "dry"
user_concern = "dryness"

recommendations = df[
    df['skin_type_list'].apply(lambda x: user_skin_type in x) &
    df['concerns_list'].apply(lambda x: user_concern in x)
]


In [13]:
from collections import Counter

# Flatten the list of skin types and count occurrences
skin_type_counter = Counter([stype.strip() for sublist in df['skin_type_list'] for stype in sublist])
print("Skin type distribution:\n", skin_type_counter)


Skin type distribution:
 Counter({'dry': 455, 'oily': 433, 'normal': 432, 'combination': 423, 'sensitive': 342})


In [14]:
concerns_counter = Counter([c.strip() for sublist in df['concerns_list'] for c in sublist])
print("\nSkin concern distribution:\n", concerns_counter)



Skin concern distribution:
 Counter({'dryness': 407, 'dullness': 267, 'fine lines and wrinkles': 179, 'uneven skin texture': 178, 'irritation': 142, 'excess oil': 141, 'clogged pores': 122, 'hyperpigmentation': 74, 'damaged skin barrier': 70, 'puffiness': 67, 'sagging': 64, 'dark circles': 60, 'acne': 54, 'redness': 46, 'sun damage': 30, 'enlarged pores': 13, 'stretch marks': 6, 'anti-bacterial': 1, '': 1})


In [17]:
def recommend_products(skin_type, concern, top_n=5):
    filtered_df = df[
        df['skin_type_list'].apply(lambda x: skin_type.lower() in [s.lower() for s in x]) &
        df['concerns_list'].apply(lambda x: concern.lower() in [c.lower() for c in x])
    ]
    
    if filtered_df.empty:
        return "No products found for your selection."
    
    return filtered_df[['product_name', 'product_type', 'price_ksh']].head(top_n)


In [18]:
recommend_products("dry", "dryness")



Unnamed: 0,product_name,product_type,price_ksh
0,The Ordinary Natural Moisturising Factors + HA...,Moisturizer,1040
1,CeraVe Facial Moisturising Lotion SPF 25 52ml,Moisturizer,2700
2,The Ordinary Hyaluronic Acid 2% + B5 Hydration...,Moisturizer,1240
3,AMELIORATE Transforming Body Lotion 200ml,Moisturizer,4500
4,CeraVe Moisturising Cream 454g,Moisturizer,3200


In [19]:
recommend_products("oily", "clogged pores")
recommend_products("sensitive", "irritation")
recommend_products("normal", "fine lines and wrinkles")


Unnamed: 0,product_name,product_type,price_ksh
2,The Ordinary Hyaluronic Acid 2% + B5 Hydration...,Moisturizer,1240
9,Clinique Moisture Surge 72 Hour Moisturiser 75ml,Moisturizer,7400
28,Estée Lauder DayWear Advanced Multi-Protection...,Moisturizer,8100
30,The Chemistry Brand Intense Youth Complex Hand...,Moisturizer,3798
34,Elemis Pro-Collagen Marine Cream SPF30 50ml,Moisturizer,17400


In [20]:
user_skin_type = "dry"
user_concern = "dryness"


In [21]:
recommendations = df[
    df['skin_type_list'].apply(lambda x: user_skin_type in x) &
    df['concerns_list'].apply(lambda x: user_concern in x)
]


In [22]:
print("Top Recommendations for You:\n")
print(recommendations[['product_name', 'product_type', 'price_ksh']].head(10))


Top Recommendations for You:

                                        product_name product_type  price_ksh
0  The Ordinary Natural Moisturising Factors + HA...  Moisturizer       1040
1      CeraVe Facial Moisturising Lotion SPF 25 52ml  Moisturizer       2700
2  The Ordinary Hyaluronic Acid 2% + B5 Hydration...  Moisturizer       1240
3          AMELIORATE Transforming Body Lotion 200ml  Moisturizer       4500
4                     CeraVe Moisturising Cream 454g  Moisturizer       3200
5                   CeraVe Moisturising Lotion 473ml  Moisturizer       3000
6      CeraVe Facial Moisturising Lotion No SPF 52ml  Moisturizer       2600
7  The Ordinary Natural Moisturizing Factors + HA...  Moisturizer       1360
8                       CeraVe Smoothing Cream 177ml  Moisturizer       2400
9   Clinique Moisture Surge 72 Hour Moisturiser 75ml  Moisturizer       7400


In [27]:

def categorize_price(price):
    if price <= 2500:
        return "0–2500 Ksh"
    elif 2500 < price <= 5000:
        return "2500–5000 Ksh"
    elif 5000 < price <= 10000:
        return "5000–10000 Ksh"
    else:
        return "Above 10000 Ksh"

df['price_range'] = df['price_ksh'].apply(categorize_price)

In [28]:
user_skin_type = "dry"
user_concern = "dryness"
user_price_range = "0–2500 Ksh"  # Set to "All" if you want no filtering by price

In [29]:
filtered_df = df[
    (df['skin_type_list'].apply(lambda x: user_skin_type in x)) &
    (df['concerns_list'].apply(lambda x: user_concern in x))
]


In [33]:
if user_price_range != "All":
    filtered_df = filtered_df[filtered_df['price_range'] == user_price_range]

In [34]:
recommendations = filtered_df[['product_name', 'product_type', 'price_ksh']]
print(recommendations.head())

                                         product_name product_type  price_ksh
0   The Ordinary Natural Moisturising Factors + HA...  Moisturizer       1040
2   The Ordinary Hyaluronic Acid 2% + B5 Hydration...  Moisturizer       1240
7   The Ordinary Natural Moisturizing Factors + HA...  Moisturizer       1360
8                        CeraVe Smoothing Cream 177ml  Moisturizer       2400
10                     CeraVe Moisturising Cream 50ml  Moisturizer        800


In [51]:
concern_to_outcome = {
    'dryness': ['hydration', 'moisturizing'],
    'dullness': ['radiance'],
    'fine lines and wrinkles': ['anti-aging'],
    'uneven skin texture': ['smoother skin', 'exfoliating'],
    'irritation': ['soothing'],
    'excess oil': ['oil control'],
    'clogged pores': ['deep cleansing'],
    'hyperpigmentation': ['brightening', 'even skin tone'],
    'damaged skin barrier': ['skin barrier repair'],
    'puffiness': ['de-puffing'],
    'sagging': ['firm skin', 'anti-aging'],
    'dark circles': ['brightening'],
    'acne': ['acne treatment'],
    'redness': ['soothing'],
    'sun damage': ['UV protection'],
    'enlarged pores': ['pore minimizing'],
    'stretch marks': ['moisturizing', 'exfoliating'],
    'anti-bacterial': ['acne treatment']
}


In [52]:
from collections import defaultdict

outcome_to_concerns = defaultdict(list)

for concern, outcomes in concern_to_outcome.items():
    for outcome in outcomes:
        outcome_to_concerns[outcome].append(concern)


In [53]:
user_desired_outcome = "moisturizing"
print(outcome_to_concerns[user_desired_outcome])


['dryness', 'stretch marks']


In [55]:
df['concerns_list'] = df['concerns_list'].apply(lambda clist: [c.lower().strip() for c in clist])
df['skin_type_list'] = df['skin_type_list'].apply(lambda slist: [s.lower().strip() for s in slist])


In [57]:
user_skin_type = "dry"
user_desired_outcomes = ["moisturizing", "hydration"]  # Can be one or multiple outcomes
user_price_range = "0–2500 Ksh"  # Or "All" for no price filter

In [58]:
# Convert desired outcomes to relevant concerns
matched_concerns = []
for outcome in user_desired_outcomes:
    matched_concerns.extend(outcome_to_concerns.get(outcome, []))

In [59]:
# Filter by skin type and concerns
recommendations = df[
    df['skin_type_list'].apply(lambda x: user_skin_type in x) &
    df['concerns_list'].apply(lambda clist: any(c in clist for c in matched_concerns))
]

In [60]:
# Apply price range filter if selected
if user_price_range != "All":
    min_price, max_price = map(int, user_price_range.replace("Ksh", "").split("–"))
    recommendations = recommendations[
        (recommendations['price_ksh'] >= min_price) &
        (recommendations['price_ksh'] <= max_price)
    ]

In [62]:

print(recommendations[['product_name', 'product_type', 'price_ksh']])

                                          product_name product_type  price_ksh
0    The Ordinary Natural Moisturising Factors + HA...  Moisturizer       1040
2    The Ordinary Hyaluronic Acid 2% + B5 Hydration...  Moisturizer       1240
7    The Ordinary Natural Moisturizing Factors + HA...  Moisturizer       1360
8                         CeraVe Smoothing Cream 177ml  Moisturizer       2400
10                      CeraVe Moisturising Cream 50ml  Moisturizer        800
..                                                 ...          ...        ...
580  Sukin Energising Body Scrub with Coffee and Co...   Face scrub       2390
587  Sanctuary Spa Complexion Perfecting Radiance E...   Face scrub       2000
597  L'Oréal Paris Smooth Sugars Nourishing Sugar S...   Face scrub       1598
612  Beauty of Joseon Relief Sun Sunscreen SPF 50+ ...     suncreen       2300
614  Dr. Rashel Anti-Aging & Moisture Sun Cream SPF...     suncreen        700

[109 rows x 3 columns]
