In [1]:
from transformers import AutoTokenizer, AutoModel
import torch
import pandas as pd
import numpy as np

class Agent():
    def __init__(self) -> None:
        self.tokenizer = AutoTokenizer.from_pretrained('bert-base-uncased')
        self.model = AutoModel.from_pretrained('bert-base-uncased')

    def words2vector(self, sentence: str):
        sentence = "This is an example sentence."
        tokens = self.tokenizer.tokenize(sentence)

        input_ids = self.tokenizer.convert_tokens_to_ids(tokens)
        input_ids = [self.tokenizer.cls_token_id] + input_ids + [self.tokenizer.sep_token_id]

        max_length = 64
        input_ids = input_ids + [self.tokenizer.pad_token_id] * (max_length - len(input_ids))
        input_tensor = torch.tensor([input_ids])

        with torch.no_grad():
            outputs = self.model(input_tensor)
            embedding = outputs[0][:, 0, :].numpy()

        return embedding[0]
    
model = Agent()

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


In [2]:
df_products = pd.read_csv('productlist.csv')
df_products.drop(df_products.columns[[0, 1, 4]], axis=1, inplace=True)
df_products

Unnamed: 0,product_ID,product_name,product_brand,price,product_description,product_type,skin_type,skin_concern,Img_link
0,6866178703429,Dew-Glow Moisturizer SPF 50,NATURIUM,$22,The Dew-Glow Moisturizer SPF 50 from Naturium ...,['Sun protection'],"['Combination', 'Dry', 'Normal', 'Oily']","['Anti-Aging/Wrinkles', 'Dryness/Hydration']",https://cdn.shopify.com/s/files/1/0249/1218/pr...
1,6857050914885,Retinol Expert 0.1%,IOPE,$80,"If you’re looking for a trusted retinol, look ...",['Serum/Ampoule'],"['Combination', 'Dry', 'Normal', 'Oily']","['Oil Control/Pores', 'Acne', 'Anti-Aging/Wrin...",https://cdn.shopify.com/s/files/1/0249/1218/pr...
2,6857051275333,Bio Conditioning Essence,IOPE,$60,An iconic Korean essence known for its best se...,['Essence'],[],"['Anti-Aging/Wrinkles', 'Dryness/Hydration']",https://cdn.shopify.com/s/files/1/0249/1218/pr...
3,6815255330885,Best of K-Beauty 5-Step Set,Soko Glam,$110,Introducing our 2022 Best of K-Beauty® Award ...,['Others'],"['Dry', 'Oily']","['Acne', 'Anti-Aging/Wrinkles', 'Pigmentation'...",https://cdn.shopify.com/s/files/1/0249/1218/pr...
4,10245844553,Licorice pH Balancing Cleansing Toner,ACWELL,$18,Deep clean and brighten your skin with this sp...,['Toner'],"['Combination', 'Dry', 'Normal', 'Oily']","['Acne', 'Anti-Aging/Wrinkles', 'Pigmentation'...",https://cdn.shopify.com/s/files/1/0249/1218/pr...
...,...,...,...,...,...,...,...,...,...
548,4477084074053,Fermentation Mask Pack,BENTON,$3,This premium Fermentation Mask Pack from Bento...,['Face Mask'],"['Combination', 'Normal']","['Anti-Aging/Wrinkles', 'Pigmentation', 'Dryne...",https://cdn.shopify.com/s/files/1/0249/1218/pr...
549,1313135558764,Aloe Soothing Mask Pack (10 Pack),BENTON,$24,Whether you got a little too much sun or are b...,['Face Mask'],"['Combination', 'Dry', 'Normal']","['Sensitive', 'Redness', 'Dryness/Hydration']",https://cdn.shopify.com/s/files/1/0249/1218/pr...
550,3935397314629,Cicaherb Restore Sheet Mask,ENATURE,$1.80,E Nature Cica Herb Restore Sheet Mask is now a...,['Face Mask'],"['Combination', 'Dry', 'Normal', 'Oily']","['Redness', 'Dryness/Hydration']",https://cdn.shopify.com/s/files/1/0249/1218/pr...
551,1978243940421,Squalane Silk Mask (5 pack),DR ALTHEA,$17,The Dr. Althea Squalane Silk Mask offers 360-d...,['Face Mask'],['Dry'],['Dryness/Hydration'],https://cdn.shopify.com/s/files/1/0249/1218/pr...


In [3]:
vectorlist = []
price_num = []
for idx, row in df_products.iterrows():
    item_info = row['product_name'] + row['product_brand'] + row['price'] + row['product_description'] + row['product_type'] + row['skin_type'] + row['skin_concern']
    price_num.append(float(row['price'][1:]))
    vectorlist.append(model.words2vector(item_info))

In [4]:
df_products['feature_vector'] = vectorlist
df_products['price_num'] = price_num

In [7]:
df_products.to_json("./productlist_with_vector.json", orient="records")

In [6]:
df_products

Unnamed: 0,product_ID,product_name,product_brand,price,product_description,product_type,skin_type,skin_concern,Img_link,feature_vector,price_num
0,6866178703429,Dew-Glow Moisturizer SPF 50,NATURIUM,$22,The Dew-Glow Moisturizer SPF 50 from Naturium ...,['Sun protection'],"['Combination', 'Dry', 'Normal', 'Oily']","['Anti-Aging/Wrinkles', 'Dryness/Hydration']",https://cdn.shopify.com/s/files/1/0249/1218/pr...,"[-0.6399795, 0.21811089, 0.30775434, 0.6942612...",22.0
1,6857050914885,Retinol Expert 0.1%,IOPE,$80,"If you’re looking for a trusted retinol, look ...",['Serum/Ampoule'],"['Combination', 'Dry', 'Normal', 'Oily']","['Oil Control/Pores', 'Acne', 'Anti-Aging/Wrin...",https://cdn.shopify.com/s/files/1/0249/1218/pr...,"[-0.6399795, 0.21811089, 0.30775434, 0.6942612...",80.0
2,6857051275333,Bio Conditioning Essence,IOPE,$60,An iconic Korean essence known for its best se...,['Essence'],[],"['Anti-Aging/Wrinkles', 'Dryness/Hydration']",https://cdn.shopify.com/s/files/1/0249/1218/pr...,"[-0.6399795, 0.21811089, 0.30775434, 0.6942612...",60.0
3,6815255330885,Best of K-Beauty 5-Step Set,Soko Glam,$110,Introducing our 2022 Best of K-Beauty® Award ...,['Others'],"['Dry', 'Oily']","['Acne', 'Anti-Aging/Wrinkles', 'Pigmentation'...",https://cdn.shopify.com/s/files/1/0249/1218/pr...,"[-0.6399795, 0.21811089, 0.30775434, 0.6942612...",110.0
4,10245844553,Licorice pH Balancing Cleansing Toner,ACWELL,$18,Deep clean and brighten your skin with this sp...,['Toner'],"['Combination', 'Dry', 'Normal', 'Oily']","['Acne', 'Anti-Aging/Wrinkles', 'Pigmentation'...",https://cdn.shopify.com/s/files/1/0249/1218/pr...,"[-0.6399795, 0.21811089, 0.30775434, 0.6942612...",18.0
...,...,...,...,...,...,...,...,...,...,...,...
548,4477084074053,Fermentation Mask Pack,BENTON,$3,This premium Fermentation Mask Pack from Bento...,['Face Mask'],"['Combination', 'Normal']","['Anti-Aging/Wrinkles', 'Pigmentation', 'Dryne...",https://cdn.shopify.com/s/files/1/0249/1218/pr...,"[-0.6399795, 0.21811089, 0.30775434, 0.6942612...",3.0
549,1313135558764,Aloe Soothing Mask Pack (10 Pack),BENTON,$24,Whether you got a little too much sun or are b...,['Face Mask'],"['Combination', 'Dry', 'Normal']","['Sensitive', 'Redness', 'Dryness/Hydration']",https://cdn.shopify.com/s/files/1/0249/1218/pr...,"[-0.6399795, 0.21811089, 0.30775434, 0.6942612...",24.0
550,3935397314629,Cicaherb Restore Sheet Mask,ENATURE,$1.80,E Nature Cica Herb Restore Sheet Mask is now a...,['Face Mask'],"['Combination', 'Dry', 'Normal', 'Oily']","['Redness', 'Dryness/Hydration']",https://cdn.shopify.com/s/files/1/0249/1218/pr...,"[-0.6399795, 0.21811089, 0.30775434, 0.6942612...",1.8
551,1978243940421,Squalane Silk Mask (5 pack),DR ALTHEA,$17,The Dr. Althea Squalane Silk Mask offers 360-d...,['Face Mask'],['Dry'],['Dryness/Hydration'],https://cdn.shopify.com/s/files/1/0249/1218/pr...,"[-0.6399795, 0.21811089, 0.30775434, 0.6942612...",17.0
