# Build a fashion app for image description

An optional lab to see how a simple fashio app can be built with GPT-4o model and Gradio framework

https://www.gradio.app/

In [None]:
#%pip install openai --upgrade

Import libraries

In [None]:
import base64
import gradio as gr
import json
import openai
import os
import requests

from dotenv import load_dotenv
from io import BytesIO
from PIL import Image

Load environment variables

In [None]:
load_dotenv(override=True)  

# Azure Open AI
openai.api_type: str = "azure"
openai.api_key = os.getenv("AZURE_OPENAI_API_KEY")
openai.api_base = os.getenv("AZURE_OPENAI_ENDPOINT")
openai.api_version = os.getenv("AZURE_OPENAI_API_VERSION")

model =   os.getenv("AZURE_OPENAI_MODEL")  # This is the deployed name of your GPT4o model from the Azure Open AI studio

## Gradio WebApp

**Function to generate image description with focus on fashion features**

The function is designed to interact with the **GPT-4 Vision** model to analyze and describe fashion items from an uploaded image. The function takes an image file as input and processes it to generate a detailed description of the fashion item in the image.

In [None]:
def gpt4V_fashion_webapp(pil_image):
    """
    GPT4-Vision
    """
    # Endpoint
    base_url = f"{openai.api_base}/openai/deployments/{model}"
    gpt4vision_endpoint = f"{base_url}/chat/completions?api-version={openai.api_version}"

    # Header
    headers = {"Content-Type": "application/json", "api-key": openai.api_key}

    # Encoded PIL image
    buffered = BytesIO()
    pil_image.save(buffered, format="JPEG")
    encoded_image = base64.b64encode(buffered.getvalue()).decode("ascii")

    context = """ 
    You are a fashion expert, familiar with identifying features of fashion articles from images.
    A user will upload an image and asks you to describe one particular piece in the shot: jacket, shoes, pants, \
    watches, etc.
    """

    prompt = """
    You respond with your analysis of the following fields:

    1. ITEM'S TYPE: Identify if it's a top, bottom, dress, outerwear, footwear, bag, jewelry...
    2. BRAND: identity the brand of the item.
    3. COLOR: Note the main color(s) and any secondary colors.
    4. PATTERN: Identify any visible patterns such as stripes, florals, animal print, or geometric designs.\
    Feel free to use any other patterns here.
    5. MATERIAL: Best guess at the material that the item is made from.
    6. FEATURES: Note any unique details or embellishments, like embroidery, sequins, studs, fringes, buttons,
    zippers...
    7. ITEM TYPE SPECIFIC: For each type of item, feel free to add any additional descriptions that are relevant \
    to help describe the item. For example, for a jacket you can include the neck and sleeve design, plus the length.
    8. MISC.: Anything else important that you notice.
    9. SIZE: Print the size of the item if you get it from the image.
    10. ITEM SUMMARY: Write a one line summary for this item.
    11. ITEM CLASSIFICATION: Classify this item into CLOTHES, BAG, SHOES, WATCH or OTHERS.
    12. ITEM TAGS: Generate 10 tags to describe this item. Each tags should be separated with a comma.
    13. STORIES: Write multiple stories about this product in 5 lines.
    14. TWEETER PUBLICATION: Write a tweeter ad for this item with some hashtags and emojis.
    15. ECOMMERCE AD: Generate an item description for a publication on a ecommerce website with a selling message.
    16. FRENCH ECOMMERCE AD: Generate an item description in French for a publication on a ecommerce website with \
    a selling message.

    The output should be a numbered bulleted list. Just print an empty line between each items starting at item 12.
    """

    # Prompt
    json_data = {
        "messages": [
    {
      "role": "system",
      "content": [
        {
          "type": "text",
          "text": context
        }
      ]
    }, 
    {
      "role": "user",
      "content": [
        {
          "type": "text",
          "text": prompt
        },
        {
          "type": "image_url",
          "image_url": {
            "url": f"data:image/jpeg;base64,{encoded_image}"
          }
        }
      ]
    }
  ],
        "max_tokens": 4000,
        "temperature": 0.7,
    }

    # Results
    response = requests.post(
        gpt4vision_endpoint, headers=headers, data=json.dumps(json_data)
    )

    if response.status_code == 200:
        resp = json.loads(response.text)["choices"][0]["message"]["content"]
        resp1, resp2 = resp.split("13. STORIES:", 1)
        resp1 = resp1.replace("\n\n", "  ")
        resp2 = "13. STORIES:" + resp2
        return resp1, resp2

    elif response.status_code == 429:
        msg1 = "[429 Error] Too many requests. Please wait a couple of seconds and try again."
        msg2 = msg1
        return msg1, msg2

    else:
        msg1 = str(response.status_code)
        msg2 = msg1
        return msg1, msg2

**Define an app**

Here we are defining an application inputs, outputs and execution

In [None]:
image_url = (
    "https://raw.githubusercontent.com/retkowsky/images/master/fashion-logo-design.jpg"
)
logo = "<center> <img src= {} width=200px></center>".format(image_url)
title = "GPT-4 Vision demo with Azure Open AI - Fashion usecase"

examples = [
    "../data/fashion/image1.jpg",
    "../data/fashion/image2.jpg",
    "../data/fashion/image3.png",
    "../data/fashion/image4.jpg",
    "../data/fashion/image5.jpg",
    "../data/fashion/image6.png",
    "../data/fashion/image7.jpg",
    "../data/fashion/image8.jpg",
]

inputs = gr.Image(type="pil", label="Your image")
outputs = [
    gr.Text(label="Product image insights"),
    gr.Text(label="Marketing content"),
]

theme = "rottenlittlecreature/Moon_Goblin"
# https://huggingface.co/spaces/gradio/theme-gallery

gpt4V_fashion_gradiowebapp = gr.Interface(
    fn=gpt4V_fashion_webapp,
    inputs=inputs,
    outputs=outputs,
    description=logo,
    title=title,
    examples=examples,
    theme=theme,
)

**Run the app**

Note that you can upload your own images or use inputs from camera or clipboard

In [None]:
gpt4V_fashion_gradiowebapp.launch(share=True)