In [None]:
#vectorize them into numerical vector
#positional embedings because of the presence of an ingredient in a product
#bert
#six transformers encoder layers
#which consist of two multi-head attention blocks and feed-forward blocks with normalization
#We used weighted binary cross-entropy loss to diminish the effects of class imbalances by utilizing the Adam optimizer.

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from transformers import BertTokenizer, BertModel
import numpy as np
from sklearn.preprocessing import MultiLabelBinarizer
from collections import defaultdict

class SkincareTransformer(nn.Module):
    def __init__(self, num_classes, d_model=256, nhead=8, num_layers=6):
        super(SkincareTransformer, self).__init__()
        self.ingredient_embedding = nn.Embedding(num_embeddings=10000, embedding_dim=d_model)
        #print("ingredient_embeding: ", self.ingredient_embedding)
        encoder_layers = nn.TransformerEncoderLayer(d_model=d_model, nhead=nhead)
        self.transformer_encoder = nn.TransformerEncoder(encoder_layers, num_layers=num_layers)
        self.classifier = nn.Linear(d_model, num_classes)
        self.layer_norm = nn.LayerNorm(d_model)

    def forward(self, x):
        x = self.ingredient_embedding(x)
        x = self.layer_norm(x)
        x = x.permute(1, 0, 2)
        x = self.transformer_encoder(x)
        x = x[0, :, :]
        logits = self.classifier(x)
        return logits

In [None]:
def preprocess_ingredients(products):
    tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
    all_ingredients = set()

    for product in products:
        for ing in product['ingredients']:
            ing_clean = ing.lower().strip()
            tokens = tokenizer.tokenize(ing_clean)
            all_ingredients.update(tokens)
    ingredient_to_idx = {ing: idx+1 for idx, ing in enumerate(all_ingredients)}
    return ingredient_to_idx

In [None]:
def vectorize_products(products, ingredient_to_idx, max_len=100):
    vectors = []
    for product in products:
        ing_indices = []
        for ing in product['ingredients']:
            tokens = tokenizer.tokenize(ing.lower().strip())
            ing_indices.extend([ingredient_to_idx.get(t, 0) for t in tokens])
            print("ingrediente: ", ing, " :  ", tokens)
        if len(ing_indices) > max_len:
            ing_indices = ing_indices[:max_len]
        else:
            ing_indices += [0] * (max_len - len(ing_indices))

        vectors.append(ing_indices)
    return torch.tensor(vectors)

In [None]:
class SkincareRecommender:
    def __init__(self, dermatologist_routine):
        self.routine = dermatologist_routine
        self.models = {}
        self.tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
        self.ingredient_to_idx = {}
        self.max_len = 100
        self._initialize_ingredient_mapping(dermatologist_routine['approved_products'])

    def _initialize_ingredient_mapping(self, approved_products):
        all_ingredients = set()

        for category, products in approved_products.items():
            for product in products:
                for ing in product['ingredients']:
                    ing_clean = ing.lower().strip()
                    tokens = self.tokenizer.tokenize(ing_clean)
                    all_ingredients.update(tokens)

        self.ingredient_to_idx = {ing: idx+1 for idx, ing in enumerate(all_ingredients)}

    def _prepare_data(self, products):
        X = []
        y = []

        for product in products:
            ing_indices = []
            for ing in product['ingredients']:
                tokens = self.tokenizer.tokenize(ing.lower().strip())
                ing_indices.extend([self.ingredient_to_idx.get(t, 0) for t in tokens])

            if len(ing_indices) > self.max_len:
                ing_indices = ing_indices[:self.max_len]
            else:
                ing_indices += [0] * (self.max_len - len(ing_indices))

            X.append(ing_indices)
            y.append(1.0)

        return torch.tensor(X, dtype=torch.long), torch.tensor(y, dtype=torch.float32).unsqueeze(1)

    def _vectorize_product(self, product):
        ing_indices = []
        for ing in product['ingredients']:
            tokens = self.tokenizer.tokenize(ing.lower().strip())
            ing_indices.extend([self.ingredient_to_idx.get(t, 0) for t in tokens])

        if len(ing_indices) > self.max_len:
            ing_indices = ing_indices[:self.max_len]
        else:
            ing_indices += [0] * (self.max_len - len(ing_indices))

        return torch.tensor([ing_indices], dtype=torch.long)

    def train(self, approved_products):
        for category, products in approved_products.items():
            X, y = self._prepare_data(products)
            model = SkincareTransformer(num_classes=1, d_model=128)
            criterion = nn.BCEWithLogitsLoss(pos_weight=torch.tensor([2.0]))
            optimizer = optim.Adam(model.parameters(), lr=1e-4)
            for epoch in range(5):
                outputs = model(X)
                loss = criterion(outputs, y)
                optimizer.zero_grad()
                loss.backward()
                optimizer.step()
                print(f"Category {category}, Epoch {epoch}, Loss: {loss.item()}")

            self.models[category] = model

    def recommend(self, input_products):
        recommendations = defaultdict(list)

        for product in input_products:
            X = self._vectorize_product(product)

            for category, model in self.models.items():
                if category in self.routine['steps']:
                    with torch.no_grad():
                        score = torch.sigmoid(model(X)).item()
                    recommendations[category].append((product, score))
        final_recommendations = {}
        for category, candidates in recommendations.items():
            if candidates:
                candidates.sort(key=lambda x: x[1], reverse=True)
                final_recommendations[category] = candidates[0][0]

        return final_recommendations

In [None]:
#prueba
dermatologist_routine = {
    'skin_type': 'dry',
    'steps': ['cleanser', 'moisturizer'],
    'approved_products': {
        'cleanser': [
            {'name': 'CeraVe', 'ingredients': ['ceramides', 'hyaluronic acid']},
            {'name': 'Cetaphil', 'ingredients': ['glycerin', 'niacinamide']}
        ],
        'moisturizer': [
            {'name': 'Vanicream', 'ingredients': ['glycerin', 'squalane']},
            {'name': 'Eucerin', 'ingredients': ['urea', 'ceramides']}
        ]
    }
}

# prods sinteticos
input_products = [
    {'name': 'Product A', 'ingredients': ['ceramides', 'hyaluronic acid', 'niacinamide']},
    {'name': 'Product B', 'ingredients': ['glycerin', 'shea butter', 'squalane']},
    {'name': 'Product C', 'ingredients': ['urea', 'ceramides', 'lactic acid']}
]


recommender = SkincareRecommender(dermatologist_routine)
recommender.train(dermatologist_routine['approved_products'])
recommendations = recommender.recommend(input_products)

print("\nProductos recomendados:")
for category, product in recommendations.items():
    print(f"{category}: {product['name']}")

ingredient_embeding:  Embedding(10000, 128)




Category cleanser, Epoch 0, Loss: 1.261245608329773
Category cleanser, Epoch 1, Loss: 0.3210048973560333
Category cleanser, Epoch 2, Loss: 0.12506555020809174
Category cleanser, Epoch 3, Loss: 0.06397704780101776
Category cleanser, Epoch 4, Loss: 0.04701791703701019
ingredient_embeding:  Embedding(10000, 128)
Category moisturizer, Epoch 0, Loss: 1.0994694232940674
Category moisturizer, Epoch 1, Loss: 0.33272960782051086
Category moisturizer, Epoch 2, Loss: 0.1203751266002655
Category moisturizer, Epoch 3, Loss: 0.05480682849884033
Category moisturizer, Epoch 4, Loss: 0.03442738205194473

Productos recomendados:
cleanser: Product A
moisturizer: Product C
