In [1]:
import pandas as pd
import pickle
import psycopg2
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import MinMaxScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, classification_report
from sklearn.metrics import accuracy_score
from sklearn.pipeline import Pipeline
import matplotlib.pyplot as plt
import matplotlib
import numpy as np
import seaborn as sns
matplotlib.use('TkAgg')

In [2]:
# Load the model
with open('random_forest_model.pkl', 'rb') as file:
    model = pickle.load(file)
print('DONE')

DONE


In [3]:

user_profile = {
    'professional_review':0, 
    'vibe_review': 1, 
    'redness_review': 1, 
    'dry_review': 1, 
    'light_coverage_review': 1, 
    'young_review': 0, 
    'mother_review': 0, 
    'skin_concerns_review': 1, 
    'white_review': 1, 
    'tan_review': 0, 
    'acne_review': 1,
    'poc_review': 0, 
    'comfortable_wear_review': 1, 
    'coverage_review': 0, 
    'medium_coverage_review': 0, 
    'full_coverage_review': 0, 
    'easy_use_review': 1, 
    'wrinkles_review': 1
}
product_data = pd.read_csv('product_embeddings.csv')
product_copy = product_data.copy()
product_copy = product_copy.drop(columns = ['product_link_id', 'overall_product_rating'])
le = LabelEncoder()
product_copy['category'] = le.fit_transform(product_copy['category'])

print(product_copy.columns)






Index(['young', 'mother', 'professional', 'vibe', 'acne', 'dry', 'wrinkles',
       'poc', 'white', 'tan', 'redness', 'coverage', 'light_coverage',
       'medium_coverage', 'full_coverage', 'expensive', 'inexpensive',
       'skin_concerns', 'comfortable_wear', 'easy_use', 'pilling',
       'shade_range', 'category', 'num_shades', 'num_reviews'],
      dtype='object')


In [4]:
for col, val in user_profile.items(): 
    product_copy[col] = user_profile[col]



In [5]:

feature_names = model.feature_names_in_  # This assumes scikit-learn's RandomForest model
print(model.classes_)
# Reorder the DataFrame to match the model's expected feature order
product_copy = product_copy[feature_names]
predictions = model.predict_proba(product_copy)
product_copy['predicted_score'] = predictions[:, 1] + 2* predictions[:, 1] + 3*predictions[:, 2] + 4*predictions[:, 3] + 5*predictions[:, 4]
print(predictions[0, :])
product_copy['product_link_id'] = product_data['product_link_id']
top_products = product_copy.sort_values(by='predicted_score', ascending=False)
# Output the top products
top_n = 10  # Number of top products to display
print(top_products[['product_link_id', 'predicted_score']].head(top_n))

[1 2 3 4 5]
[0.21433333 0.22       0.21666667 0.13133333 0.21766667]
      product_link_id  predicted_score
227             241.0         3.583571
233             247.0         3.583571
1208           1290.0         3.583571
1122           1199.0         3.583571
1264           1348.0         3.583571
296             314.0         3.583571
1191           1271.0         3.573571
357             376.0         3.563571
974            1037.0         3.563571
1200           1281.0         3.563571


[Parallel(n_jobs=16)]: Using backend ThreadingBackend with 16 concurrent workers.
[Parallel(n_jobs=16)]: Done  18 tasks      | elapsed:    0.0s
[Parallel(n_jobs=16)]: Done 100 out of 100 | elapsed:    0.0s finished


In [11]:
from scipy.optimize import linprog
import numpy as np
import pandas as pd

df = pd.read_csv('cleaned_makeup_products.csv')
budget = 50
product_info = df[['product_link_id', 'product_name', 'brand', 'price', 'category']].copy()
product_info.rename(columns={'category': 'category_name'}, inplace=True)

# Merge data to include product prices and predicted scores
product_budget = product_copy.copy()
product_budget = pd.merge(product_budget, product_info, how='inner', on='product_link_id')


product_budget['category_name'].fillna(value='Uncategorized', inplace=True)

if user_profile['light_coverage_review'] == 1:
    all_categories = ['Foundation', 'Tinted Moisturizer', 'Blush', 'Concealer', 'Setting Spray & Powder']
elif user_profile['medium_coverage_review'] == 1:
    all_categories = ['Foundation', 'Bronzer', 'Concealer', 'Blush', 'Setting Spray & Powder']
else:
    all_categories = product_budget['category_name'].unique()
top_products_per_category = []

for category in all_categories:
    category_data = product_budget[product_budget['category_name'] == category]
    if category_data.empty:
        print(f"No data for category: {category}")
        continue

    prices = category_data['price'].values
    scores = category_data['predicted_score'].values
    product_link_ids = category_data['product_link_id'].values
    names = category_data['product_name'].values

    prices = np.array(prices)
    scores = np.array(scores)
    product_link_ids = np.array(product_link_ids)

    if len(prices) == 0:
        print(f"No products found in category: {category}")
        continue

    A_ub = np.eye(len(prices))  # Identity matrix to enforce per-product constraints
    b_ub = np.ones(len(prices)) * budget   # Budget limits


    # Objective function: maximize score and minimize price
    c =  -scores

    result = linprog(c, A_ub=A_ub, b_ub=b_ub, bounds=[(0, 1)] * len(prices), method='highs')

    if result.success:
        optimal_indices = np.where(result.x > 0.5)[0]
        selected_products = [(product_link_ids[i], names[i], scores[i], prices[i]) for i in optimal_indices]
        
        # Sort products based on predicted scores in descending order
        selected_products.sort(key=lambda x: x[2], reverse=True)
    if selected_products:
        idx = 0
        top_product = selected_products[0]
        if top_product[3] > budget: 
            idx+=1
            top_product = selected_products[idx]
    
        top_products_per_category.append({
            'category': category,
            'product_link_id': top_product[0],
            'product_name': top_product[1], 
            'score': top_product[2],
            'price': top_product[3]
        })
        
# Print results
print("Top products per category:")
for category_info in top_products_per_category:
    print(f"\nCategory: {category_info['category']}")
    print(f"Product Link ID: {category_info['product_link_id']}")
    print(f"Product Name: {category_info['product_name']}")
    print(f"Predicted Score: {category_info['score']:.2f}")
    print(f"Price: ${category_info['price']:.2f}")

Top products per category:

Category: Foundation
Product Link ID: 2.0
Product Name: Face Bond Self-Setting Waterproof Foundation
Predicted Score: 3.50
Price: $40.00

Category: Tinted Moisturizer
Product Link ID: 241.0
Product Name: Dew Skin Tinted Moisturizer
Predicted Score: 3.58
Price: $50.00

Category: Blush
Product Link ID: 679.0
Product Name: Cloud Crush Blurring Blush
Predicted Score: 3.54
Price: $29.00

Category: Concealer
Product Link ID: 314.0
Product Name: Care and Glow Hydrating Serum Concealer
Predicted Score: 3.58
Price: $29.00

Category: Setting Spray & Powder
Product Link ID: 1290.0
Product Name: Light Reflecting Pressed Setting Powder
Predicted Score: 3.58
Price: $40.00
