In [1]:
from IPython.display import Image, display, HTML, JSON
import pandas as pd
import ast
from dotenv import load_dotenv
import re

## Load Examples & Create a Few-Shot Prompt

In [2]:
def convert_string_to_list(string_list):
    try:
        actual_list = ast.literal_eval(string_list)
        return actual_list
    except ValueError as e:
        print(f"Error converting string to list: {e}")
        return None

In [5]:
examples_file_path = '../examples.csv'
examples_df = pd.read_csv(examples_file_path)
examples_df['product_images'] = examples_df['product_images'].apply(convert_string_to_list)
examples_df.head()

Unnamed: 0,address,product_title,description,product_images
0,https://www.pazzion.com/products/191-5a-fiorel...,Fiorella Boots,"A perfect blend of style, comfort, and conveni...",[https://www.pazzion.com/cdn/shop/files/Fiorel...
1,https://www.pazzion.com/products/2203a-1-mered...,Meredith Ankle Lace-up Boots,The perfect blend of style and comfort for the...,[https://www.pazzion.com/cdn/shop/files/Meredi...
2,https://www.pazzion.com/products/2597-1-ingrid...,Ingrid Gold Studded Cage Patent Leather Slides,Gladiator styled sandals made in glossy patent...,[https://www.pazzion.com/cdn/shop/files/Ingrid...
3,https://www.pazzion.com/products/522-1-kate-un...,Kate Unlace Platform Sneakers,Athletic trainers don’t need to be dedicated t...,[https://www.pazzion.com/cdn/shop/files/KateUn...
4,https://www.pazzion.com/products/80-29-reagan-...,Reagan Contrast Sneakers,A pristine clean white sneaker is essential to...,[https://www.pazzion.com/cdn/shop/files/Reagan...


In [6]:
for column, value in examples_df.iloc[0].items():
    print(f"{column}: {value}")


address: https://www.pazzion.com/products/191-5a-fiorella-boots
product_title: Fiorella Boots
description: A perfect blend of style, comfort, and convenience, crafted to cater to the modern woman's needs, Fiorella is designed with a focus on ease of wear and contemporary style. These boots feature a convenient zipper closure, making them a breeze to put on and take off. No more struggling with laces or buttons; whether you're dashing through your daily routine or stepping out for a night on the town, just zip up and you're ready to conquer your day.
product_images: ['https://www.pazzion.com/cdn/shop/files/FiorellaBoots-Camel-1.webp?v=1705915480', 'https://www.pazzion.com/cdn/shop/files/FiorellaBoots-Camel-10.webp?v=1715759531', 'https://www.pazzion.com/cdn/shop/files/FiorellaBoots-Camel-2.webp?v=1715759531', 'https://www.pazzion.com/cdn/shop/files/FiorellaBoots-Camel-3.webp?v=1715759531', 'https://www.pazzion.com/cdn/shop/files/FiorellaBoots-Camel-4.webp?v=1715759531']


In [7]:
with open('system_prompt.txt', 'r') as file:
    system_prompt = file.read()

with open('user_prompt_a.txt', 'r') as file:
    user_prompt_a = file.read()

with open('user_prompt_b.txt', 'r') as file:
    user_prompt_b = file.read()

# system_prompt
# user_prompt_a
# user_prompt_b

In [8]:
def create_example_prompt(df):
    example_prompt = []

    for index, row in df.iterrows():
        product_title = row['product_title']
        product_description = row['description']
        product_images = row['product_images']

        user_content = [
            {"type": "text", "text": user_prompt_a.replace("{{product_title}}", product_title)}
        ]

        for image_url in product_images:
            user_content.append({
                "type": "image_url",
                "image_url": {
                    "url": image_url,
                }
            })

        user_content.append({"type": "text", "text": user_prompt_b})

        example_prompt.append({
            "role": "user",
            "content": user_content
        })
        
        example_prompt.append({
            "role": "assistant",
            "content": [
                {"type": "text", "text": "<product_description>"+product_description+"</product_description>"}
            ]
        })
    
    return example_prompt

## Load Test Products & Create a User Prompt

In [9]:
products = '../test_products.csv'
products_df = pd.read_csv(products)
products_df['product_images'] = products_df['product_images'].apply(convert_string_to_list)
products_df.head()

Unnamed: 0,address,product_title,product_images
0,https://www.pazzion.com/products/c2257-3-joyce...,Joyce Pearl Patent Slingback Heels,[https://www.pazzion.com/cdn/shop/files/JoyceP...
1,https://www.pazzion.com/products/122-5-maia-pe...,Maia Pearl Décor Bow Slingbacks Kitten Heels,[https://www.pazzion.com/cdn/shop/files/MaiaPe...
2,https://www.pazzion.com/products/6313-1-aureli...,Aurelia Strappy Espadrilles,[https://www.pazzion.com/cdn/shop/files/Aureli...
3,https://www.pazzion.com/products/1919-1-margot...,Margot Tweed Lace-up Espadrilles,[https://www.pazzion.com/cdn/shop/files/Margot...
4,https://www.pazzion.com/products/3899-9-fallon...,Fallon Leather Ankle Boots,[https://www.pazzion.com/cdn/shop/files/Fallon...


In [10]:
def create_user_prompt(row):
    user_prompt = []

    product_title = row['product_title']
    product_images = row['product_images']
    
    user_content = [
            {"type": "text", "text": user_prompt_a.replace("{{product_title}}", product_title)}
        ]

    for image_url in product_images:
        user_content.append({
            "type": "image_url",
            "image_url": {
                "url": image_url,
            }
        })

    user_content.append({"type": "text", "text": user_prompt_b})
    
    user_prompt.append({
            "role": "user",
            "content": user_content
        })
    
    return user_prompt

## Final Prompt Message List

In [11]:
## Visualize the prompt (message_list)
## https://jsoneditoronline.org/ 

first_product = products_df.iloc[0]

example_prompt = create_example_prompt(examples_df)
user_prompt = create_user_prompt(first_product)

message_list = [{"role": "system", "content": system_prompt}] + example_prompt + user_prompt
message_list

[{'role': 'system',
  'content': "You are an AI assistant tasked with generating product descriptions for women's shoes on an e-commerce website."},
 {'role': 'user',
  'content': [{'type': 'text',
    'text': 'Your goal is to create detailed, concise, and compelling descriptions based on the provided product title and images.\n\nYou will be given two inputs, <product_title> and a list of product images in <product_images>\n\n<product_title>\nFiorella Boots\n</product_title>\n\n<product_images>\n'},
   {'type': 'image_url',
    'image_url': {'url': 'https://www.pazzion.com/cdn/shop/files/FiorellaBoots-Camel-1.webp?v=1705915480'}},
   {'type': 'image_url',
    'image_url': {'url': 'https://www.pazzion.com/cdn/shop/files/FiorellaBoots-Camel-10.webp?v=1715759531'}},
   {'type': 'image_url',
    'image_url': {'url': 'https://www.pazzion.com/cdn/shop/files/FiorellaBoots-Camel-2.webp?v=1715759531'}},
   {'type': 'image_url',
    'image_url': {'url': 'https://www.pazzion.com/cdn/shop/files/Fi

## LLM

### GPT-4o

In [12]:
from openai import OpenAI
from dotenv import load_dotenv
load_dotenv()
client = OpenAI()

In [13]:
def describe_image(message_list):
    try:
        response = client.chat.completions.create(
            model="gpt-4o",
            messages=message_list,
            max_tokens=4000,
            temperature=1 
        )
        return response.choices[0].message.content
    except Exception as e:
        return str(e)
    # return response.choices[0].message.content, response.usage

In [14]:
first_product = products_df.iloc[0]

print(f"Product Title: "+ first_product['product_title'])
print(f"Address: "+ first_product['address'])
print("Product Images:")
for image_url in first_product['product_images']:
    print(image_url)
    
display(Image(url=first_product['product_images'][0], width=400, height=400))

llm_description = describe_image(message_list)    
display(HTML(f"<div style='word-wrap: break-word; width: 100;'>{llm_description}</div>"))


Product Title: Joyce Pearl Patent Slingback Heels
Address: https://www.pazzion.com/products/c2257-3-joyce-pearl-patent-slingback-heels
Product Images:
https://www.pazzion.com/cdn/shop/files/JoycePearlPatentSlingbackHeels-Beige-1.jpg?v=1705898929
https://www.pazzion.com/cdn/shop/files/JoycePearlPatentSlingbackHeels-Beige-2.jpg?v=1705898934
https://www.pazzion.com/cdn/shop/files/JoycePearlPatentSlingbackHeels-Beige-3.jpg?v=1705898939
https://www.pazzion.com/cdn/shop/files/JoycePearlPatentSlingbackHeels-Beige-4.jpg?v=1705898944
https://www.pazzion.com/cdn/shop/files/JoycePearlPatentSlingbackHeels-Beige-5.jpg?v=1705898949


In [None]:
## cost (12 examples >> 60 images)
  
## time: 30~60s
## input_tokens = 55567 >> $0.2778
## output_tokens = 104 >> 0.00156

## smaller images, less images

### Claude

In [None]:
df_example = pd.read_csv('claude/examples_base64.csv')
df = pd.read_csv('claude/test_products_base64.csv')

def create_example_prompt(df):
    example_prompt = []

    for index, row in df.iterrows():
        product_title = row['product_title']
        product_images_base64 = eval(row['product_images_base64'])
        media_type = eval(row['media_type'])
        product_description = row['description']

        user_content = [
            {"type": "text", "text": "Product Title:"},
            {"type": "text", "text": product_title},
            {"type": "text", "text": "Product Images:"}
        ]

        for img_base64, m_type in zip(product_images_base64, media_type):
            user_content.append({"type": "image", "source": {"type": "base64", "media_type": m_type, "data": img_base64}})

        user_content.append({"type": "text", "text": "Based on the product images, product title, and guidelines provided, write a compelling product description. Present your final product description within <tagline> and <why_youll_love_it> tags."})

        example_prompt.append({
            "role": "user",
            "content": user_content
        })

        example_prompt.append({
            "role": "assistant",
            "content": [
                {"type": "text", "text": product_description}
            ]
        })
    
    return example_prompt

def create_user_prompt(row):

    acutal_prompt = []

    product_title = row['product_title']
    product_images_base64 = eval(row['product_images_base64'])
    media_type = eval(row['media_type'])
    
    user_content = [
        {"type": "text", "text": "Product Title:"},
        {"type": "text", "text": product_title},
        {"type": "text", "text": "Product Images:"}
    ]
    
    for img_base64, m_type in zip(product_images_base64, media_type):
        user_content.append({
            "type": "image",
            "source": {
                "type": "base64",
                "media_type": m_type,
                "data": img_base64
            }
        })

    user_content.append({"type": "text", "text": "Based on the product images, product title, and guidelines provided, write a compelling product description. Present your final product description within <tagline> and <why_youll_love_it> tags."})
    
    acutal_prompt.append({
            "role": "user",
            "content": user_content
        })
    
    
    return acutal_prompt

In [None]:
prompt_example = create_example_prompt(df_example)
first_product = df.iloc[0]
acutal_prompt = create_user_prompt(first_product)
acutal_prompt

message_list = prompt_example + acutal_prompt
message_list

In [None]:
from anthropic import Anthropic
load_dotenv()
client = Anthropic()
MODEL_NAME = "claude-3-5-sonnet-20240620"

In [None]:
response = client.messages.create(
    model=MODEL_NAME,
    max_tokens=4096,
    system = system_prompt,
    messages = message_list, 
    temperature=1
)

print(response.content[0].text)


In [None]:
# Visualize the prompt (message_list)
# https://jsoneditoronline.org/ 

import random
import string

def generate_random_string(length=10):
    return ''.join(random.choices(string.ascii_letters + string.digits, k=length))

for message in message_list:
    if message['role'] == 'user':
        for content in message['content']:
            if content['type'] == 'image':
                content['source']['data'] = generate_random_string()

updated_message_list = message_list
updated_message_list


## Automated & Structural Output

In [15]:
products = '../test_products.csv'
products_df = pd.read_csv(products)
products_df['product_images'] = products_df['product_images'].apply(convert_string_to_list)
products_df.head()

Unnamed: 0,address,product_title,product_images
0,https://www.pazzion.com/products/c2257-3-joyce...,Joyce Pearl Patent Slingback Heels,[https://www.pazzion.com/cdn/shop/files/JoyceP...
1,https://www.pazzion.com/products/122-5-maia-pe...,Maia Pearl Décor Bow Slingbacks Kitten Heels,[https://www.pazzion.com/cdn/shop/files/MaiaPe...
2,https://www.pazzion.com/products/6313-1-aureli...,Aurelia Strappy Espadrilles,[https://www.pazzion.com/cdn/shop/files/Aureli...
3,https://www.pazzion.com/products/1919-1-margot...,Margot Tweed Lace-up Espadrilles,[https://www.pazzion.com/cdn/shop/files/Margot...
4,https://www.pazzion.com/products/3899-9-fallon...,Fallon Leather Ankle Boots,[https://www.pazzion.com/cdn/shop/files/Fallon...


In [None]:
def extract_product_description(xml_string):
    match = re.search(r'<product_description>\s*(.*?)\s*</product_description>', xml_string, re.DOTALL)
    if match:
        return match.group(1).strip()
    return None

In [None]:
for index, row in products_df.iterrows():

    print(f"Product Title: {row['product_title']}")
    print(f"Address: {row['address']}")
    print("Product Images:")
    for image_url in row['product_images']:
        print(image_url)
    
    display(Image(url=row['product_images'][0], width=400, height=400))

    user_prompt = create_user_prompt(row)
    message_list = [{"role": "system", "content": system_prompt}] + example_prompt + user_prompt

    llm_output = describe_image(message_list)
    ai_description = extract_product_description(llm_output)
    
    print("AI Description:")
    display(HTML(f"<div style='word-wrap: break-word; width: 600;'>{ai_description}</div>"))
    print("-" * 40)
    products_df.at[index, 'ai_description'] = ai_description


In [None]:
products_df.to_csv('products_llm_output.csv', index=False, encoding='utf-8')

In [None]:
products_df.head()