# Zero-shot Prompt

### 1. Llama

In [None]:
from openai import OpenAI
import pandas as pd
import time

client = OpenAI(base_url="", api_key="")
screenshot_data = pd.read_excel("screenshot_data.xlsx")

# Define category-specific attribute sets
category_attributes = {
    "fashion": ["Object", "Brand", "Color", "Size", "Material", "Department", "Style", "Price"],
    "electronics": ["Object", "Brand", "Color", "Size", "Material", "Model", "Power Mode", "Price"],
    "beauty": ["Object", "Brand", "Volume", "Material", "Skin/Hair Type", "Benefits", "Price"]
}

# Helper function to process each category
def process_category(category, screenshot_url):
    attributes = category_attributes.get(category, [])
    if not attributes:
        return "No valid attributes for this category."
    
    response = client.chat.completions.create(
        model="meta-llama/llama-3.2-11b-vision-instruct:free",
        messages=[
            {"role": "system", "content": "You are a world-class algorithm for extracting provided product attributes from screenshot in structured formats, strictly exclude any unrelated information."}
        ] + [
            {
                "role": "user",
                "content": [
                    {
                        "type": "text",
                        "text": f"Extract the product attribute values from the screenshot in a JSON format. Valid attributes are {', '.join(attributes)}. If an attribute is not present in the screenshot, the attribute value is supposed to be ‘n/a’"
                    },
                    {
                        "type": "image_url",
                        "image_url": {
                            "url": screenshot_url
                        }
                    }
                ]
            }
        ]
    )
    
    if response.choices and response.choices[0].message and response.choices[0].message.content:
        return response.choices[0].message.content
    elif hasattr(response, "error"):
        return f"API Error: {response.error}"
    else:
        return "No content returned from the API."

# Main loop for processing the screenshot inputs
for i, row in screenshot_data.iterrows():
    screenshot_url = row["screenshot_url"]

    # Process based on category
    if row["category"] in category_attributes:
        result = process_category(row["category"], screenshot_url)
        screenshot_data.at[i, 'llama_zeroshot'] = result
        print(result)
    else:
        screenshot_data.at[i, 'llama_zeroshot'] = "No valid category."
    time.sleep(20)

print(screenshot_data["llama_zeroshot"])

### 2. Mistral

In [None]:
from openai import OpenAI
import pandas as pd
import time

client = OpenAI(base_url="", api_key="")
screenshot_data = pd.read_excel("screenshot_data.xlsx")

# Define category-specific attribute sets
category_attributes = {
    "fashion": ["Object", "Brand", "Color", "Size", "Material", "Department", "Style", "Price"],
    "electronics": ["Object", "Brand", "Color", "Size", "Material", "Model", "Power Mode", "Price"],
    "beauty": ["Object", "Brand", "Volume", "Material", "Skin/Hair Type", "Benefits", "Price"]
}

# Helper function to process each category
def process_category(category, screenshot_url):
    attributes = category_attributes.get(category, [])
    if not attributes:
        return "No valid attributes for this category."
    
    response = client.chat.completions.create(
        model="mistralai/mistral-small-3.1-24b-instruct:free",
        messages=[
            {"role": "system", "content": "You are a world-class algorithm for extracting provided product attributes from screenshot in structured formats, strictly exclude any unrelated information."}
        ] + [
            {
                "role": "user",
                "content": [
                    {
                        "type": "text",
                        "text": f"Extract the product attribute values from the screenshot in a JSON format. Valid attributes are {', '.join(attributes)}. If an attribute is not present in the screenshot, the attribute value is supposed to be ‘n/a’"
                    },
                    {
                        "type": "image_url",
                        "image_url": {
                            "url": screenshot_url
                        }
                    }
                ]
            }
        ]
    )
    
    if response.choices and response.choices[0].message and response.choices[0].message.content:
        return response.choices[0].message.content
    elif hasattr(response, "error"):
        return f"API Error: {response.error}"
    else:
        return "No content returned from the API."

# Main loop for processing the screenshot inputs
for i, row in screenshot_data.iterrows():
    screenshot_url = row["screenshot_url"]

    # Process based on category
    if row["category"] in category_attributes:
        result = process_category(row["category"], screenshot_url)
        screenshot_data.at[i, 'mistral_zeroshot'] = result
        print(result)
    else:
        screenshot_data.at[i, 'mistral_zeroshot'] = "No valid category."
    time.sleep(20)

print(screenshot_data["mistral_zeroshot"])

### 3. Qwen

In [None]:
from openai import OpenAI
import pandas as pd
import time

client = OpenAI(base_url="", api_key="")
screenshot_data = pd.read_excel("screenshot_data.xlsx")

# Define category-specific attribute sets
category_attributes = {
    "fashion": ["Object", "Brand", "Color", "Size", "Material", "Department", "Style", "Price"],
    "electronics": ["Object", "Brand", "Color", "Size", "Material", "Model", "Power Mode", "Price"],
    "beauty": ["Object", "Brand", "Volume", "Material", "Skin/Hair Type", "Benefits", "Price"]
}

# Helper function to process each category
def process_category(category, screenshot_url):
    attributes = category_attributes.get(category, [])
    if not attributes:
        return "No valid attributes for this category."
    
    response = client.chat.completions.create(
        model="qwen/qwen2.5-vl-72b-instruct:free",
        messages=[
            {"role": "system", "content": "You are a world-class algorithm for extracting provided product attributes from screenshot in structured formats, strictly exclude any unrelated information."}
        ] + [
            {
                "role": "user",
                "content": [
                    {
                        "type": "text",
                        "text": f"Extract the product attribute values from the screenshot in a JSON format. Valid attributes are {', '.join(attributes)}. If an attribute is not present in the screenshot, the attribute value is supposed to be ‘n/a’"
                    },
                    {
                        "type": "image_url",
                        "image_url": {
                            "url": screenshot_url
                        }
                    }
                ]
            }
        ]
    )
    
    if response.choices and response.choices[0].message and response.choices[0].message.content:
        return response.choices[0].message.content
    elif hasattr(response, "error"):
        return f"API Error: {response.error}"
    else:
        return "No content returned from the API."

# Main loop for processing the screenshot inputs
for i, row in screenshot_data.iterrows():
    screenshot_url = row["screenshot_url"]

    # Process based on category
    if row["category"] in category_attributes:
        result = process_category(row["category"], screenshot_url)
        screenshot_data.at[i, 'qwen_zeroshot'] = result
        print(result)
    else:
        screenshot_data.at[i, 'qwen_zeroshot'] = "No valid category."
    time.sleep(20)

print(screenshot_data["qwen_zeroshot"])

In [None]:
screenshot_data.to_excel("screenshot_data.xlsx", index = False)

# Few-shot Prompt

## Few-shot by platform & category

### 1. Llama

In [None]:
import pandas as pd
from openai import OpenAI
import time

# Initialize OpenAI client
client = OpenAI(base_url="", api_key="")

# Load the screenshot data from the Excel file
screenshot_data = pd.read_excel("screenshot_data.xlsx")

# Filter the test data from the dataset
test_data = screenshot_data[screenshot_data['set'] == 'test']

# Define category-specific attribute sets
category_attributes = {
    "fashion": ["Object", "Brand", "Color", "Size", "Material", "Department", "Style", "Price"],
    "electronics": ["Object", "Brand", "Color", "Size", "Material", "Model", "Power Mode", "Price"],
    "beauty": ["Object", "Brand", "Volume", "Material", "Skin/Hair Type", "Benefits", "Price"]
}

platforms = ["amazon", "ebay", "temu"]

# Helper function to generate few-shot examples based on the "train" set data
def generate_few_shot_examples(category, platform, data):
    examples = []
    
    # Filter data based on category and platform
    filtered_data = data[(data['category'] == category) & (data['platform'] == platform) & (data['set'] == 'train')]
    
    for _, row in filtered_data.iterrows():
        screenshot_url = row['screenshot_url']  # URL to the screenshot
        reference_output = row['reference_output']  # Correct answer (reference output)
        
        # Construct the few-shot example
        examples.append({
            "role": "user",
            "content": [
                {
                    "type": "text",
                    "text": f"Extract the product attribute values from the screenshot in a JSON format. Valid attributes are {', '.join(category_attributes.get(category, []))}. If an attribute is not present in the screenshot, the attribute value is supposed to be ‘n/a’"
                },
                {
                    "type": "image_url",
                    "image_url": {
                        "url": screenshot_url
                    }
                }
            ]
        })
        examples.append({"role": "assistant", "content": reference_output})
    
    return examples

# Helper function to process each category/platform with few-shot examples
def process_category_platform(category, platform, screenshot_url):
    examples = generate_few_shot_examples(category, platform, screenshot_data)
    
    if not examples:
        return "No valid few-shot examples for this category/platform."
    
    response = client.chat.completions.create(
        model="meta-llama/llama-3.2-11b-vision-instruct:free",
        messages=[
            {"role": "system", "content": "You are a world-class algorithm for extracting provided product attributes from screenshot in structured formats, strictly exclude any unrelated information."}
        ] + examples + [
            {
                "role": "user",
                "content": [
                    {
                        "type": "text",
                        "text": f"Extract the product attribute values from the screenshot in a JSON format. Valid attributes are {', '.join(category_attributes[category])}. If an attribute is not present in the screenshot, the attribute value is supposed to be ‘n/a’"
                    },
                    {
                        "type": "image_url",
                        "image_url": {
                            "url": screenshot_url
                        }
                    }
                ]
            }
        ]
    )
    
    if response.choices and response.choices[0].message and response.choices[0].message.content:
        return response.choices[0].message.content
    elif hasattr(response, "error"):
        return f"API Error: {response.error}"
    else:
        return "No content returned from the API."

# Main loop for processing the test set screenshot inputs
for i, row in test_data.iterrows():
    screenshot_url = row["screenshot_url"]  # Get the screenshot URL

    # Process based on category and platform
    if row["category"] in category_attributes and row["platform"] in platforms:
        result = process_category_platform(row["category"], row["platform"], screenshot_url)
        test_data.at[i, 'llama_fewshot'] = result  # Write result to 'llama_fewshot' column
        print(result)
    else:
        test_data.at[i, 'llama_fewshot'] = "No valid category or platform."

    time.sleep(20)  # Avoid hitting API rate limits

# Write the results back to the original dataframe
screenshot_data.update(test_data[['screenshot_url', 'llama_fewshot']])

### 2. Mistral

In [None]:
import pandas as pd
from openai import OpenAI
import time

# Initialize OpenAI client
client = OpenAI(base_url="", api_key="")

# Load the screenshot data from the Excel file
screenshot_data = pd.read_excel("screenshot_data.xlsx")

# Filter the test data from the dataset
test_data = screenshot_data[screenshot_data['set'] == 'test']

# Define category-specific attribute sets
category_attributes = {
    "fashion": ["Object", "Brand", "Color", "Size", "Material", "Department", "Style", "Price"],
    "electronics": ["Object", "Brand", "Color", "Size", "Material", "Model", "Power Mode", "Price"],
    "beauty": ["Object", "Brand", "Volume", "Material", "Skin/Hair Type", "Benefits", "Price"]
}

platforms = ["amazon", "ebay", "temu"]

# Helper function to generate few-shot examples based on the "train" set data
def generate_few_shot_examples(category, platform, data):
    examples = []
    
    # Filter data based on category and platform
    filtered_data = data[(data['category'] == category) & (data['platform'] == platform) & (data['set'] == 'train')]
    
    for _, row in filtered_data.iterrows():
        screenshot_url = row['screenshot_url']  # URL to the screenshot
        reference_output = row['reference_output']  # Correct answer (reference output)
        
        # Construct the few-shot example
        examples.append({
            "role": "user",
            "content": [
                {
                    "type": "text",
                    "text": f"Extract the product attribute values from the screenshot in a JSON format. Valid attributes are {', '.join(category_attributes.get(category, []))}. If an attribute is not present in the screenshot, the attribute value is supposed to be ‘n/a’"
                },
                {
                    "type": "image_url",
                    "image_url": {
                        "url": screenshot_url
                    }
                }
            ]
        })
        examples.append({"role": "assistant", "content": reference_output})
    
    return examples

# Helper function to process each category/platform with few-shot examples
def process_category_platform(category, platform, screenshot_url):
    examples = generate_few_shot_examples(category, platform, screenshot_data)
    
    if not examples:
        return "No valid few-shot examples for this category/platform."
    
    response = client.chat.completions.create(
        model="mistralai/mistral-small-3.1-24b-instruct:free",
        messages=[
            {"role": "system", "content": "You are a world-class algorithm for extracting provided product attributes from screenshot in structured formats, strictly exclude any unrelated information."}
        ] + examples + [
            {
                "role": "user",
                "content": [
                    {
                        "type": "text",
                        "text": f"Extract the product attribute values from the screenshot in a JSON format. Valid attributes are {', '.join(category_attributes[category])}. If an attribute is not present in the screenshot, the attribute value is supposed to be ‘n/a’"
                    },
                    {
                        "type": "image_url",
                        "image_url": {
                            "url": screenshot_url
                        }
                    }
                ]
            }
        ]
    )
    
    if response.choices and response.choices[0].message and response.choices[0].message.content:
        return response.choices[0].message.content
    elif hasattr(response, "error"):
        return f"API Error: {response.error}"
    else:
        return "No content returned from the API."

# Main loop for processing the test set screenshot inputs
for i, row in test_data.iterrows():
    screenshot_url = row["screenshot_url"]  # Get the screenshot URL

    # Process based on category and platform
    if row["category"] in category_attributes and row["platform"] in platforms:
        result = process_category_platform(row["category"], row["platform"], screenshot_url)
        test_data.at[i, 'mistral_fewshot'] = result  # Write result to 'mistral_fewshot' column
        print(result)
    else:
        test_data.at[i, 'mistral_fewshot'] = "No valid category or platform."

    time.sleep(20)  # Avoid hitting API rate limits

# Write the results back to the original dataframe
screenshot_data.update(test_data[['screenshot_url', 'mistral_fewshot']])

### 3. Qwen

In [None]:
import pandas as pd
from openai import OpenAI
import time

# Initialize OpenAI client
client = OpenAI(base_url="", api_key="")

# Load the screenshot data from the Excel file
screenshot_data = pd.read_excel("screenshot_data.xlsx")

# Filter the test data from the dataset
test_data = screenshot_data[screenshot_data['set'] == 'test']

# Define category-specific attribute sets
category_attributes = {
    "fashion": ["Object", "Brand", "Color", "Size", "Material", "Department", "Style", "Price"],
    "electronics": ["Object", "Brand", "Color", "Size", "Material", "Model", "Power Mode", "Price"],
    "beauty": ["Object", "Brand", "Volume", "Material", "Skin/Hair Type", "Benefits", "Price"]
}

platforms = ["amazon", "ebay", "temu"]

# Helper function to generate few-shot examples based on the "train" set data
def generate_few_shot_examples(category, platform, data):
    examples = []
    
    # Filter data based on category and platform
    filtered_data = data[(data['category'] == category) & (data['platform'] == platform) & (data['set'] == 'train')]
    
    for _, row in filtered_data.iterrows():
        screenshot_url = row['screenshot_url']  # URL to the screenshot
        reference_output = row['reference_output']  # Correct answer (reference output)
        
        # Construct the few-shot example
        examples.append({
            "role": "user",
            "content": [
                {
                    "type": "text",
                    "text": f"Extract the product attribute values from the screenshot in a JSON format. Valid attributes are {', '.join(category_attributes.get(category, []))}. If an attribute is not present in the screenshot, the attribute value is supposed to be ‘n/a’"
                },
                {
                    "type": "image_url",
                    "image_url": {
                        "url": screenshot_url
                    }
                }
            ]
        })
        examples.append({"role": "assistant", "content": reference_output})
    
    return examples

# Helper function to process each category/platform with few-shot examples
def process_category_platform(category, platform, screenshot_url):
    examples = generate_few_shot_examples(category, platform, screenshot_data)
    
    if not examples:
        return "No valid few-shot examples for this category/platform."
    
    response = client.chat.completions.create(
        model="qwen/qwen2.5-vl-72b-instruct:free",
        messages=[
            {"role": "system", "content": "You are a world-class algorithm for extracting provided product attributes from screenshot in structured formats, strictly exclude any unrelated information."}
        ] + examples + [
            {
                "role": "user",
                "content": [
                    {
                        "type": "text",
                        "text": f"Extract the product attribute values from the screenshot in a JSON format. Valid attributes are {', '.join(category_attributes[category])}. If an attribute is not present in the screenshot, the attribute value is supposed to be ‘n/a’"
                    },
                    {
                        "type": "image_url",
                        "image_url": {
                            "url": screenshot_url
                        }
                    }
                ]
            }
        ]
    )
    
    if response.choices and response.choices[0].message and response.choices[0].message.content:
        return response.choices[0].message.content
    elif hasattr(response, "error"):
        return f"API Error: {response.error}"
    else:
        return "No content returned from the API."

# Main loop for processing the test set screenshot inputs
for i, row in test_data.iterrows():
    screenshot_url = row["screenshot_url"]  # Get the screenshot URL

    # Process based on category and platform
    if row["category"] in category_attributes and row["platform"] in platforms:
        result = process_category_platform(row["category"], row["platform"], screenshot_url)
        test_data.at[i, 'qwen_fewshot'] = result  # Write result to 'qwen_fewshot' column
        print(result)
    else:
        test_data.at[i, 'qwen_fewshot'] = "No valid category or platform."

    time.sleep(20)  # Avoid hitting API rate limits

# Write the results back to the original dataframe
screenshot_data.update(test_data[['screenshot_url', 'qwen_fewshot']])

In [None]:
screenshot_data.to_json("screenshot_data.json")

## Few-shot all training data

In [None]:
import pandas as pd
from openai import OpenAI
import time

# Initialize OpenAI client
client = OpenAI(base_url="", api_key="")

# Load the screenshot data from the Excel file
screenshot_data = pd.read_excel("screenshot_data.xlsx")

# Filter the test data from the dataset
test_data = screenshot_data[screenshot_data['set'] == 'test']

# Define category-specific attribute sets
category_attributes = {
    "fashion": ["Object", "Brand", "Color", "Size", "Material", "Department", "Style", "Price"],
    "electronics": ["Object", "Brand", "Color", "Size", "Material", "Model", "Power Mode", "Price"],
    "beauty": ["Object", "Brand", "Volume", "Material", "Skin/Hair Type", "Benefits", "Price"]
}

platforms = ["amazon", "ebay", "temu"]

# Helper function to generate few-shot examples based on the "train" set data
def generate_few_shot_examples(data):
    examples_by_category = {category: [] for category in category_attributes.keys()}
    
    # Filter data based on category and platform
    for category in category_attributes.keys():
        for platform in platforms:
            filtered_data = data[(data['category'] == category) & (data['platform'] == platform) & (data['set'] == 'train')]
            
            for _, row in filtered_data.iterrows():
                screenshot_url = row['screenshot_url']  # URL to the screenshot
                reference_output = row['reference_output']  # Correct answer (reference output)
                
                # Construct the few-shot example
                examples_by_category[category].append({
                    "role": "user",
                    "content": [
                        {
                            "type": "text",
                            "text": f"Extract the product attribute values from the screenshot in a JSON format. Valid attributes are {', '.join(category_attributes[category])}. If an attribute is not present in the screenshot, the attribute value is supposed to be ‘n/a’"
                        },
                        {
                            "type": "image_url",
                            "image_url": {
                                "url": screenshot_url
                            }
                        }
                    ]
                })
                examples_by_category[category].append({"role": "assistant", "content": reference_output})
    
    return examples_by_category

# Generate few-shot examples for each category
few_shot_examples_by_category = generate_few_shot_examples(screenshot_data)

# Helper function to process each test row with the appropriate few-shot examples for its category
def process_with_category_examples(category, screenshot_url):
    if category not in few_shot_examples_by_category:
        return "No valid few-shot examples for this category."

    examples = few_shot_examples_by_category[category]

    response = client.chat.completions.create(
        model="mistralai/mistral-small-3.1-24b-instruct:free",  # Specify the model you want to use
        messages=examples + [
            {
                "role": "user",
                "content": [
                    {
                        "type": "text",
                        "text": f"Extract the product attribute values from the screenshot in a JSON format. Valid attributes are {', '.join(category_attributes[category])}. If an attribute is not present in the screenshot, the attribute value is supposed to be ‘n/a’"
                    },
                    {
                        "type": "image_url",
                        "image_url": {
                            "url": screenshot_url
                        }
                    }
                ]
            }
        ]
    )

    if response.choices and response.choices[0].message and response.choices[0].message.content:
        return response.choices[0].message.content
    elif hasattr(response, "error"):
        return f"API Error: {response.error}"
    else:
        return "No content returned from the API."

# Main loop for processing the test set screenshot inputs
for i, row in test_data.iterrows():
    screenshot_url = row["screenshot_url"]  # Get the screenshot URL

    # Process using the few-shot examples for the correct category
    category = row["category"]
    result = process_with_category_examples(category, screenshot_url)
    test_data.at[i, 'llama_fewshot'] = result  # Write result to 'llama_fewshot' column
    print(result)

# Write the results back to the original dataframe
screenshot_data.update(test_data[['screenshot_url', 'llama_fewshot']])