In [17]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier

# Load the CSV file
df = pd.read_csv(r'C:/Users/NAINITA/OneDrive/Desktop/DMDW/cosmetics.csv')

# Data Cleaning: Drop rows with missing values
df.dropna(inplace=True)

# Feature Encoding: Convert skin types into binary format
df_encoded = pd.get_dummies(df, columns=['Dry', 'Normal', 'Oily', 'Sensitive'], drop_first=True)

# Define Features and Target Variable
X = df_encoded.drop(['Label', 'Brand', 'Name', 'Price', 'Rank', 'Ingredients'], axis=1)  # Adjust as needed
y = df_encoded[['Dry_1', 'Normal_1', 'Oily_1', 'Sensitive_1']].idxmax(axis=1)  # Get the skin type with the highest value

# Split the Data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train the Random Forest Model
model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

# Function to recommend products based on user input
def recommend_products(user_input):
    # Convert user input string to a list of integers
    input_values = list(map(int, user_input.split(',')))

    # Create a DataFrame for prediction using the user's input
    user_df = pd.DataFrame([input_values], columns=X.columns)  # Use X.columns to ensure all features are included
    
    # Predict skin type using the trained model
    predicted_skin_type = model.predict(user_df)[0]

    # Create a mask for filtering products based on predicted skin type
    mask = (df_encoded['Dry_1'] == (predicted_skin_type == 'Dry_1')) & \
           (df_encoded['Normal_1'] == (predicted_skin_type == 'Normal_1')) & \
           (df_encoded['Oily_1'] == (predicted_skin_type == 'Oily_1')) & \
           (df_encoded['Sensitive_1'] == (predicted_skin_type == 'Sensitive_1'))

    recommended = df[mask]

    return recommended[['Brand', 'Name', 'Price', 'Ingredients']]

# User Input for Skin Types as binary values
user_input = input("Enter your skin types as binary values (e.g., 0,1,0,1,0 for Dry and Oily): ")

# Validate user input length
expected_length = len(X.columns)
if len(user_input.split(',')) == expected_length:
    recommendations = recommend_products(user_input)
    if not recommendations.empty:
        print("\nRecommended Products:")
        print(recommendations)
    else:
        print("No products match your criteria.")
else:
    print(f"Invalid input. Please enter {expected_length} binary values corresponding to all features.")


Recommended Products:
                   Brand                                               Name  \
153   KIEHL'S SINCE 1851                    Ultra Facial Oil-Free Gel-Cream   
367               LANCER                                 The Method: Polish   
370              CLARINS            Gentle Foaming Cleanser with Cottonseed   
535               LANCER                                The Method: Cleanse   
671                 DIOR  Capture Youth Glow Booster Age-Delay Illuminat...   
767      KATE SOMERVILLE           EradiKate® Salicylic Acid Acne Treatment   
943                 DIOR            Hydra Life Glow Better Fresh Jelly Mask   
1168                DIOR                            One Essential Eye Serum   

      Price                                        Ingredients  
153      29  Water, Glycerin, Alcohol Denat., Dimethicone, ...  
367      75  Butylene Glycol, Sodium Bicarbonate, PEG-8, Ma...  
370      26  Water, Glycerin, Stearic Acid, Myristic Acid, ...  
535  

In [18]:
import joblib

# Save the model
joblib.dump(model, 'random_forest_recommendation_model.pkl')

['random_forest_recommendation_model.pkl']