Rain Forest

In [5]:

import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, classification_report

# Load the original dataset
file_path = "Dataset/processed_file.csv"
df = pd.read_csv(file_path)

# Categorize products based on notable effects and product type
def categorize_product(row):
    notable_effects = row['notable_effects']
    product_type = row['product_type']
    
    if 'hydrating' in notable_effects or 'moisturizing' in notable_effects:
        return 'Hydrating/Moisturizing ' + product_type
    elif 'acne-free' in notable_effects or 'acne-spot' in notable_effects:
        return 'Acne Treatment ' + product_type
    elif 'anti-aging' in notable_effects:
        return 'Anti-Aging ' + product_type
    elif 'brightening' in notable_effects or 'black-spot' in notable_effects:
        return 'Brightening ' + product_type
    elif 'oil-control' in notable_effects:
        return 'Oil Control ' + product_type
    elif 'pore-care' in notable_effects:
        return 'Pore Care ' + product_type
    elif 'skin-barrier' in notable_effects or 'soothing' in notable_effects:
        return 'Sensitive Skin Care ' + product_type
    elif 'uv-protection' in notable_effects:
        return 'Sunscreen/UV Protection ' + product_type
    else:
        return 'General ' + product_type

# Apply categorization
df['product_category'] = df.apply(categorize_product, axis=1)

# Save categorized data
df.to_csv("Dataset/categorized_skincare_data.csv", index=False)

# Select features and target
features = ['Sensitive', 'Combination', 'Oily', 'Dry', 'Normal',  # Skin types
            'product_type_face wash', 'product_type_moisturizer', 'product_type_serum', 'product_type_sunscreen', 'product_type_toner',  # Product types
            'notable_effects_acne-free', 'notable_effects_acne-spot', 'notable_effects_anti-aging', 'notable_effects_balancing',
            'notable_effects_black-spot', 'notable_effects_brightening', 'notable_effects_hydrating', 'notable_effects_moisturizing',
            'notable_effects_no-whitecast', 'notable_effects_oil-control', 'notable_effects_pore-care', 'notable_effects_refreshing',
            'notable_effects_skin-barrier', 'notable_effects_soothing', 'notable_effects_uv-protection']  # Notable effects

target = 'product_category'

# Encode target variable
label_encoder = LabelEncoder()
df[target] = label_encoder.fit_transform(df[target])

# Splitting dataset into training and testing
X = df[features]
y = df[target]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Standardize features for better SVM performance
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Initialize and train SVM model
svm_model = SVC(kernel='rbf', C=1.0, gamma='scale')  # RBF Kernel
svm_model.fit(X_train, y_train)

# Predictions
y_pred = svm_model.predict(X_test)

# Evaluate model
accuracy = accuracy_score(y_test, y_pred)
# Get unique class labels from y_test
unique_labels = sorted(set(y_test) | set(y_pred))

# Generate classification report only for present labels
report = classification_report(y_test, y_pred, labels=unique_labels, target_names=[label_encoder.classes_[i] for i in unique_labels])

print(f"Accuracy: {accuracy:.2f}")
print("Classification Report:")
print(report)



Accuracy: 0.97
Classification Report:
                                        precision    recall  f1-score   support

          Acne Treatment ['face wash']       1.00      0.90      0.95        10
        Acne Treatment ['moisturizer']       1.00      1.00      1.00         5
              Acne Treatment ['serum']       0.92      1.00      0.96        12
          Acne Treatment ['sunscreen']       1.00      0.67      0.80         3
              Acne Treatment ['toner']       0.90      1.00      0.95         9
              Anti-Aging ['face wash']       1.00      1.00      1.00         7
            Anti-Aging ['moisturizer']       1.00      1.00      1.00        12
                  Anti-Aging ['serum']       1.00      1.00      1.00        49
              Anti-Aging ['sunscreen']       1.00      1.00      1.00        12
                  Anti-Aging ['toner']       1.00      1.00      1.00         8
             Brightening ['face wash']       1.00      1.00      1.00         2
 

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
