# **Table of Contents:**
## This code will carry the implementation of the filtering process with a Test case together

### 1) passing user image to the VGG 16 model.
### 2) Filtering the data that was processed by the BERT model.
### 3) User- Based Collaborative Filtering.
### 4) Content- Based Filtering.
### 5) Final recommendation (Intersection)

In [None]:
import tensorflow as tf
import numpy as np
from tensorflow.keras.preprocessing import image
from tensorflow.keras.models import load_model

import pandas as pd
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.preprocessing import MinMaxScaler


# Loading VGG 16 Model

In [10]:
# Defining the custom Cast layer
class Cast(tf.keras.layers.Layer):
    def __init__(self, **kwargs):
        super(Cast, self).__init__(**kwargs)

    def call(self, inputs):
        return tf.cast(inputs, tf.float32)

# Creating the custom objects dictionary including the LeakyReLU
custom_objects = {
    'Cast': Cast,
    'LeakyReLU': tf.keras.layers.LeakyReLU
}

# Loading the model with the custom objects
model = load_model(
    '/kaggle/input/skin_model/tensorflow2/default/1/skin_classification_model_Leaky_Relu_3.h5',
    custom_objects=custom_objects
)


# The class names
class_names = ["dry", "normal","oily"]

# Function to preprocess the images
def preprocess_image(img_path, target_size=(224, 224)):
    img = image.load_img(img_path, target_size=target_size)
    img_array = image.img_to_array(img)
    img_array = img_array / 255.0 
    img_array = np.expand_dims(img_array, axis=0)  
    return img_array


image_path = "/kaggle/input/test-dry-img/test_dry.jpg"  
img_preprocessed = preprocess_image(image_path)

# prediction
pred_probs = model.predict(img_preprocessed)
pred_class_index = np.argmax(pred_probs, axis=1)[0]
pred_class_name = class_names[pred_class_index]

# Print the result
print(f"Predicted skin type: {skin_type_result}")

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 469ms/step
Predicted skin type: dry


#### The VGGG 16 model classified the input image correctly

# Filtering the Reviews data

In [None]:
balanced_df=pd.read_csv("/kaggle/input/final_recommendation_df.csv")

In [None]:
df= balanced_df.copy()

In [121]:
df['predicted_score'].unique()

array([5, 1, 3, 2, 4])

In [122]:
df = df[df['predicted_score'] >= 4].copy()

In [123]:
df.columns

Index(['author_id', 'rating', 'review_text', 'total_feedback_count',
       'total_neg_feedback_count', 'total_pos_feedback_count', 'product_id',
       'price_usd', 'product_name', 'ingredients', 'primary_category',
       'secondary_category', 'processed_review', 'review_length',
       'processed_ingredients', 'ingredient_count', 'has__glycerin_',
       'has__phenoxyethanol_', 'has__butylene_glycol_', 'has__propanediol_',
       'has__tocopherol_', 'has__citric_acid_', 'has__ethylhexylglycerin_',
       'has__sodium_hyaluronate_', 'has__xanthan_gum_',
       'has__sodium_benzoate_', 'has__caprylyl_glycol_',
       'has__potassium_sorbate_', 'has__polysorbate_',
       'has__sodium_hydroxide_', 'has__capryliccapric_triglyceride_',
       'has__tocopheryl_acetate_', 'has__squalane_', 'has__dimethicone_',
       'has__pentylene_glycol_', 'has_water_', 'skin_type_dry',
       'skin_type_normal', 'skin_type_oily', 'sentiment_label',
       'predicted_sentiment_label', 'predicted_score']

In [124]:
df["secondary_category"].unique()

array(['Treatments', 'Moisturizers', 'Eye Care', 'Cleansers', 'Sunscreen',
       'Self Tanners', 'Masks', 'Lip Balms & Treatments', 'Wellness'],
      dtype=object)

## Creating the user input

In [125]:
# The User Input
user_skin_type = "dry"  # from the VGG 16 model
user_secondary_category = "Cleansers" # The product type
user_allergy_ingredient = "has__pentylene_glycol_"
user_budget = 50.0

In [126]:
# 1: SKIN-TYPE FILTERING
skin_column = f"skin_type_{user_skin_type}"
skin_filtered_df = df[df[skin_column] == 1].copy()

In [127]:
# 2: CATEGORY & HIGH RATING FILTERING 
product_filtered_df = skin_filtered_df[
    (skin_filtered_df['secondary_category'] == user_secondary_category) &
    (skin_filtered_df['rating'] >= 4)
].copy()

In [128]:
# 3: ALLERGY FILTERING
if user_allergy_ingredient in product_filtered_df.columns:
    allergy_filtered_df = product_filtered_df[
        product_filtered_df[user_allergy_ingredient] == 0
    ].copy()
else:
    allergy_filtered_df = product_filtered_df.copy()

In [129]:
# 4: PRICE FILTERING 
price_filtered_df = allergy_filtered_df[
    allergy_filtered_df['price_usd'] <= user_budget
].copy()

In [130]:
# Save this filtered list for the user&content based filterings
filtered_df = price_filtered_df.reset_index(drop=True).copy()

In [131]:
filtered_df.shape

(11771, 42)

# User Collabourative Filterring

In [132]:
def user_based_collaborative_filtering(df, top_k=10):
    # Building a user-item matrix for rating
    user_item_matrix = df.pivot_table(index='author_id', columns='product_id', values='rating').fillna(0)

    # Calculating the cosine similarity between the users
    similarity_matrix = cosine_similarity(user_item_matrix)
    similarity_df = pd.DataFrame(similarity_matrix, index=user_item_matrix.index, columns=user_item_matrix.index)

    # Take the top similar users
    def get_similar_users(user_id, k=5):
        if user_id not in similarity_df:
            return []
        sim_scores = similarity_df[user_id].sort_values(ascending=False)[1:k+1]
        return sim_scores.index.tolist()

    # Let’s assume a dummy user
    dummy_user_id = df['author_id'].value_counts().idxmax()
    similar_users = get_similar_users(dummy_user_id, k=20) # chose the top 20 simillar users

    # Collect the highly rated products from the similar users
    similar_reviews = df[df['author_id'].isin(similar_users)]
    recommended_products = similar_reviews[similar_reviews['rating'] >= 4]['product_id'].value_counts().index.tolist()

    user_cf_df = df[df['product_id'].isin(recommended_products)].drop_duplicates('product_id')

    return user_cf_df

In [133]:
user_cf_result = user_based_collaborative_filtering(filtered_df)

In [134]:
user_cf_result.shape

(23, 42)

# Content-Based Filtering

In [135]:
def content_based_filtering(df, user_secondary_category, user_budget, allergy_column, top_k=10):
    # Selecting the relevant features (Ingredients & Price)
    ingredient_cols = [col for col in df.columns if col.startswith('has__')]
    features = ingredient_cols + ['price_usd']

    # Normalizing the price
    scaler = MinMaxScaler()
    df['price_scaled'] = scaler.fit_transform(df[['price_usd']])
    features.remove('price_usd')
    features.append('price_scaled')

    # Building the content matrix
    content_matrix = df[features]

    # Building a user profile vector (and this vector will act as our user profile for testing the system)
    user_profile = content_matrix.mean().to_frame().T.values

    # Calculating the similarity between the user profile and all the products
    similarity_scores = cosine_similarity(user_profile, content_matrix)[0]
    df['similarity'] = similarity_scores

    top_recommendations = df.sort_values(by='similarity', ascending=False).drop_duplicates('product_id')

    return top_recommendations.head(top_k)

In [136]:
content_based_result = content_based_filtering(
    filtered_df, user_secondary_category, user_budget, user_allergy_ingredient, top_k=15
)

In [137]:
content_based_result.shape

(15, 44)

# Intersection of Both


In [138]:
def hybrid_recommendation(user_cf_df, content_df):
    common_ids = set(user_cf_df['product_id']).intersection(set(content_df['product_id']))
    hybrid_df = content_df[content_df['product_id'].isin(common_ids)]
    final_df = hybrid_df.sort_values(by='rating', ascending=False).drop_duplicates('product_id')
    return final_df

In [139]:
final_recommendations = hybrid_recommendation(user_cf_result, content_based_result)

In [140]:
final_recommendations.shape

(2, 44)

In [141]:
print("Final Recommended Products:\n")
final_display = final_recommendations[['product_name', 'price_usd', 'rating', 'predicted_score', 'ingredients']]
print(final_display.to_string(index=False))

Final Recommended Products:

                                            product_name  price_usd  rating  predicted_score                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                ingredients
Acne+ 2% BHA + Azelaic Acid + Niacinamide + AHA Cleanser       35.0       5                5                                                                                       

In [142]:
final_display

Unnamed: 0,product_name,price_usd,rating,predicted_score,ingredients
9227,Acne+ 2% BHA + Azelaic Acid + Niacinamide + AH...,35.0,5,5,"['Salicylic Acid 2%, Aqua (Water, Eau), Cocami..."
7919,Pineapple Enzyme Pore Clearing Cleanser,29.0,4,4,"['Water/Aqua/Eau, Glycerin**, Decyl Glucoside*..."
