In [2]:
import pandas as pd

file_path = '../data/All-Purpose Cleaners.xlsx'

# Read the Excel file
df = pd.read_excel(file_path)

In [104]:
columns = df.columns
candidate_labels = list(columns)
print(candidate_labels)

print(len(candidate_labels))

['L0 Domain', 'L1 Category', 'L2 Category', 'L3 Sub-Category', 'L4 Product Enum-Code', 'Product Name', 'Product Quantity', 'UOM', 'MRP', 'Price', 'Product Description', 'SKU Code', 'Manufacturer', 'Country Of Origin', 'Customer Care Contact', 'Time to Ship', 'Returnable (Y/N)', 'Cancellable (Y/N)', 'COD available (Y/N)', 'Images', 'Net Quantity', 'Brand', 'Pack Quantity', 'Pack Size', 'Images / Video', 'UPC/EAN', 'FSSAI no', 'Preservatives (Y/N)', 'Preservatives (details)', 'Flavours & Spices', 'Ready to cook (Y/N)', 'Ready to eat (Y/N)', 'Rice grain type', 'Recommended Age', 'Scented/ Flavour', 'Herbal/ Ayurvedic', 'Theme/ Occasion Type', 'Hair Type', 'Mineral Source', 'Caffeine Content', 'Capacity', 'Composition', 'Benefits', 'Usage', 'Other details', 'Marketed By', 'Images 3', 'Images 4', 'Images 5', 'Images 6', 'Images 7', 'Images 8', 'Images 9', 'Baby Weight', 'Absorption Duration (in Hrs)', 'Features', 'Images 10', 'Care Instruction', 'Ingredients', 'Specification', 'Package Cont

In [5]:
requirements = """Product name must include the brand name, actual name and size of the product as part of the overall name e.g. “LaxmiBhog atta 1KG bag”
Every product must have high quality images (preferably 500x500 px)
Each product must have meaningful and related short and full description associated with the product
Slab options will be shown in a separate slab table for those products who have more than one pricing slab
All variants of a product will be shown in a variants section sorted by variant name in a horizontal scrollable section"""

requirements = requirements.split('\n')
requirements

['Product name must include the brand name, actual name and size of the product as part of the overall name e.g. “LaxmiBhog atta 1KG bag”',
 'Every product must have high quality images (preferably 500x500 px)',
 'Each product must have meaningful and related short and full description associated with the product',
 'Slab options will be shown in a separate slab table for those products who have more than one pricing slab',
 'All variants of a product will be shown in a variants section sorted by variant name in a horizontal scrollable section']

In [6]:
from transformers import pipeline

nlp = pipeline("zero-shot-classification")

for requirement in requirements:
    new_result = nlp(requirement, candidate_labels)
    print(f"Requirement: {requirement}")
    print(f"Labels: {new_result['labels']}")
    print(f"Scores: {new_result['scores']}")
    print("\n")

No model was supplied, defaulted to facebook/bart-large-mnli and revision c626438 (https://huggingface.co/facebook/bart-large-mnli).
Using a pipeline without specifying a model name and revision in production is not recommended.


Requirement: Product name must include the brand name, actual name and size of the product as part of the overall name e.g. “LaxmiBhog atta 1KG bag”
Labels: ['Product Name', 'Other details', 'Pack Size', 'Product Quantity', 'Brand', 'Pack Quantity', 'Capacity', 'Composition', 'FSSAI no', 'Net Quantity', 'Specification', 'Features', 'Package Contains', 'L0 Domain', 'Images 6', 'Images 4', 'Images', 'Images 5', 'Images 10', 'Product Description', 'Images 9', 'Images 7', 'Images 3', 'Images 8', 'MRP', 'Usage', 'Herbal/ Ayurvedic', 'Marketed By', 'Baby Weight', 'About', 'Scented/ Flavour', 'Benefits', 'L4 Product Enum-Code', 'Price', 'Returnable (Y/N)', 'Manufacturer', 'Hair Type', 'Mineral Source', 'COD available (Y/N)', 'SKU Code', 'Ingredients', 'UOM', 'L1 Category', 'UPC/EAN', 'Recommended Age', 'Caffeine Content', 'Absorption Duration (in Hrs)', 'Care Instruction', 'Preservatives (details)', 'Rice grain type', 'Country Of Origin', 'Cancellable (Y/N)', 'Flavours & Spices', 'Images / Vi

In [79]:
from openai import OpenAI

from dotenv import load_dotenv
import os

load_dotenv(
    dotenv_path='../ops/.env',
)

api_key = os.getenv('OPENAI_API_KEY')

client = OpenAI(api_key=api_key)

assistant = client.beta.assistants.create(
  instructions="Given a list of attributes and a requirement in text format, generate a list with the attributes related to the requirement.",
  model="gpt-4",
)

In [99]:
sample_requirement = "Every product must have four product images"

thread = client.beta.threads.create(
    messages=[
        {
            "role": "user",
            "content": f"""This is a requirement - {sample_requirement} and this is the attribute list - {candidate_labels}. Return attributes comma separated. No additional text required.""",
        }
    ]
)

run = client.beta.threads.runs.create(
    thread_id=thread.id,
    assistant_id=assistant.id
)

print(run.status)

queued


In [100]:
run = client.beta.threads.runs.retrieve(
  thread_id=thread.id,
  run_id=run.id
)

print(run.status)

if run.status == 'completed':
  messages = client.beta.threads.messages.list(
      thread_id=thread.id,
      )

  message = messages.data[0].content[0].text.value

  attributes = message.replace("'", "").split(',')
  attributes = [att.strip() for att in attributes]
  print(attributes)

completed
['images', 'images 3', 'images 4', 'images 5', 'images 6']


In [101]:
thread = client.beta.threads.create(
    messages=[
        {
            "role": "user",
            "content": f"""This is a requirement - {sample_requirement} and this is the attribute list - {attributes}. The requirment is satisfied based on the given column names. True or False and why ?.""",
        }
    ]
)

run = client.beta.threads.runs.create(
    thread_id=thread.id,
    assistant_id=assistant.id
)

print(run.status)

queued


In [103]:
run = client.beta.threads.runs.retrieve(
  thread_id=thread.id,
  run_id=run.id
)

print(run.status)

if run.status == 'completed':
  messages = client.beta.threads.messages.list(
      thread_id=thread.id,
      )

  message = messages.data[0].content[0].text.value

  print(message)

completed
True, because there are four attributes related to images (images, images 3, images 4, images 5) which meets the requirement of every product having four product images.
