In [1]:
from langchain.output_parsers.json import SimpleJsonOutputParser
from langchain.prompts import PromptTemplate
from langchain_openai import ChatOpenAI
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.messages import HumanMessage, SystemMessage 
from dotenv import load_dotenv
from datetime import datetime
from os.path import exists
import os
import instructor
from openai import OpenAI
from pydantic import BaseModel, Field
from typing import List, Optional, Literal
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
from langchain_openai import ChatOpenAI
from langchain.output_parsers.json import SimpleJsonOutputParser
from langchain_core.runnables.history import RunnableWithMessageHistory
from langchain_community.chat_message_histories import ChatMessageHistory
from langchain.schema import StrOutputParser
import json
from tqdm import tqdm


  from .autonotebook import tqdm as notebook_tqdm


### Introduction
This notebook evaluates the performance of three LLMs, gpt4o, gpt4o-mini, gpo-3.5-turbo on a product attribute-value extraction task with zero-shot setting.

The aim is to use the test result as a benchmark for the actual information extraction chain for knowledge graph construction.

In [3]:
# Define utility function
def read_jsonl_file(file_path):
    with open(file_path, 'r', encoding='utf-8') as file:
        return [json.loads(line) for line in file if line.strip()]
    

def write_jsonl_file(file_path, data):
    # Open the file in write mode ('w')
    with open(file_path, 'w') as file:
        for item in data:
            # Serialize the dictionary to a JSON formatted string
            json_str = json.dumps(item)
            # Write the JSON string to the file followed by a newline
            file.write(json_str + '\n')


In [4]:
data = read_jsonl_file('final_target_scores.jsonl')

jewelry = [product for product in data if product['category'] == 'Jewelry']
office = [product for product in data if product['category'] == 'Office Products']
food = [product for product in data if product['category'] == 'Grocery And Gourmet Food']
home_garden = [product for product in data if product['category'] == 'Home And Garden']

### Product attribute-value extraction

In [5]:
from pydantic import BaseModel
from typing import Optional

class ProductInfoGarden(BaseModel):
   Base: Optional[str] = Field(default=None, alias='Base')
   Capacity: Optional[str] = Field(default=None, alias='Capacity')
   Color: Optional[str] = Field(default=None, alias='Color')
   Cooling: Optional[str] = Field(default=None, alias='Cooling')
   Depth: Optional[str] = Field(default=None, alias='Depth')
   Gauge: Optional[str] = Field(default=None, alias='Gauge')
   Heat: Optional[str] = Field(default=None, alias='Heat')
   Height: Optional[str] = Field(default=None, alias='Height')
   Length: Optional[str] = Field(default=None, alias='Length')
   Manufacturer: Optional[str] = Field(default=None, alias='Manufacturer')
   Manufacturer_Stock_Number: Optional[str] = Field(default=None, alias='Manufacturer Stock Number')
   Material: Optional[str] = Field(default=None, alias='Material')
   Product_Type: Optional[str] = Field(default=None, alias='Product Type')
   Retail_UPC: Optional[str] = Field(default=None, alias='Retail UPC')
   Shape: Optional[str] = Field(default=None, alias='Shape')
   Shelves: Optional[str] = Field(default=None, alias='Shelves')
   Splash: Optional[str] = Field(default=None, alias='Splash')
   Stainless_Steel_Series: Optional[str] = Field(default=None, alias='Stainless Steel Series')
   Voltage: Optional[str] = Field(default=None, alias='Voltage')
   Width: Optional[str] = Field(default=None, alias='Width')


class ProductInfoFood(BaseModel):

   Brand: Optional[str] = Field(default=None, alias='Brand')
   Flavor: Optional[str] = Field(default=None, alias='Flavor')
   Manufacturer_Stock_Number: Optional[str] = Field(default=None, alias='Manufacturer Stock Number')
   Pack_Quantity: Optional[str] = Field(default=None, alias='Pack Quantity')
   Packing_Type: Optional[str] = Field(default=None, alias='Packing Type')
   Product_Type: Optional[str] = Field(default=None, alias='Product Type')
   Retail_UPC: Optional[str] = Field(default=None, alias='Retail UPC')
   Size_Weight: Optional[str] = Field(default=None, alias='Size/Weight')
   
   
class ProductInfoOffice(BaseModel):
   Brand: Optional[str] = Field(default=None, alias='Brand')
   Binding: Optional[str] = Field(default=None, alias='Binding')
   Capacity: Optional[str] = Field(default=None, alias='Capacity')
   Closure: Optional[str] = Field(default=None, alias='Closure')
   Colors: Optional[str] = Field(default=None, alias='Color(s)')
   Depth: Optional[str] = Field(default=None, alias='Depth')
   Height: Optional[str] = Field(default=None, alias='Height')
   Length: Optional[str] = Field(default=None, alias='Length')
   Width: Optional[str] = Field(default=None, alias='Width')
   Mounting: Optional[str] = Field(default=None, alias='Mounting')
   Manufacturer_Stock_Number: Optional[str] = Field(default=None, alias='Manufacturer Stock Number')
   Material: Optional[str] = Field(default=None, alias='Material')
   Product_Type: Optional[str] = Field(default=None, alias='Product Type')
   Retail_UPC: Optional[str] = Field(default=None, alias='Retail UPC')
   Page_Yield: Optional[str] = Field(default=None, alias='Page Yield')
   Paper_Weight: Optional[str] = Field(default=None, alias='Paper Weight')
   Sheet_Perforation: Optional[str] = Field(default=None, alias='Sheet Perforation')
   Pack_Quantity: Optional[str] = Field(default=None, alias='Pack_Quantity')
   
class ProductInfoJewelry(BaseModel):
   Brand: Optional[str] = Field(default=None, alias='Brand')
   Gender: Optional[str] = Field(default=None, alias='Gender')
   Metal_Type: Optional[str] = Field(default=None, alias='Metal Type')
   Model_Number: Optional[str] = Field(default=None, alias='Model Number')
   Product_Type: Optional[str] = Field(default=None, alias='Product Type')
   Stone_Shape: Optional[str] = Field(default=None, alias='Stone Shape')
   Stones_Setting: Optional[str] = Field(default=None, alias='Stones Setting')
   Stones_Total_Weight: Optional[str] = Field(default=None, alias='Stones Total Weight')
   Stones_Type: Optional[str] = Field(default=None, alias='Stones Type')

In [6]:
def initial_extraction(text: str, custom_extraction_prompt:str, model_name: str = 'gpt-4o') -> ProductInfoGarden:
    
    # Patch the OpenAI client with Instructor
    client = instructor.from_openai(OpenAI(api_key=os.getenv('OPENAI_KEY')))
    
    system_message = """
    You are an intelligent text extraction and conversion assistant. Your task is to extract structured information 
    from the given text and convert it into a structured format. 
    The output response should contain only the data extracted from the text, with no additional commentary, explanations, or extraneous information.
    If the required information could not be found from the given source, return nothing for that field. Do not hallucinate.
    """
    
    rule_prompt = """
                Here are the rules that you need to adhere:
                    ## Rules:
                    - The aim is to achieve simplicity and clarity in the extracted text.
                    - Make sure to answer in the structured format.
                    - If no information is provided for any of the fields, return nothing of that field.
                    - DO NOT HALLUCINATE.
                """
    
    extraction_prompt = f"""
    {system_message}
    {custom_extraction_prompt}
    """

    response = client.chat.completions.create(
        model=model_name, 
        response_model=ProductInfoGarden,
        messages=[
            {"role": "system", "content": extraction_prompt},
            {"role": "user", "content": f"Use the given text to extract information: {text}"},
            {"role": "user", "content": rule_prompt}
        ]
    )
    return response

def evaluate_extraction(my_output, expected_output):
    
    # Simplify the expected output by removing the details of pid and score and handling multiple valid options
    simplified_expected_output = {'target_scores': {}}

    for field, values in expected_output['target_scores'].items():
        if isinstance(values, dict):  # Check if the value is a dictionary (implying multiple values)
            # Extract keys and make them a list if there are multiple valid responses
            simplified_expected_output['target_scores'][field] = list(values.keys())
        else:
            # Directly assign if it is a single value
            simplified_expected_output['target_scores'][field] = values
    
    target_scores = simplified_expected_output['target_scores']
    total_fields = len(target_scores)  # Total fields to be extracted
    correct_matches = 0  # Count of correctly extracted fields
    incorrect_fields = []  # List to store names of fields that are incorrect

    for field, expected_values in target_scores.items():
        # Normalize the value from my_output
        field = field.replace(' ', '_')
        field = field.replace('/', '_')
        field = field.replace('(', '')
        field = field.replace(')', '')
        my_value = my_output.get(field, 'n/a') if my_output.get(field) is not None else 'n/a'
        my_value = my_value.lower()  # Convert to lower case for case insensitive comparison

        # Handle cases where multiple correct answers are expected
        if isinstance(expected_values, list):
            # Normalize all expected values for case-insensitive comparison
            expected_values_normalized = [value.lower() for value in expected_values]
            if my_value in expected_values_normalized:
                correct_matches += 1
            else:
                incorrect_fields.append(field)
        else:  # Single correct answer case
            expected_value_normalized = expected_values.lower()
            if my_value == expected_value_normalized:
                correct_matches += 1
            else:
                incorrect_fields.append(field)

    accuracy_percentage = (correct_matches / total_fields) * 100

    # Print fields that were incorrect
    if incorrect_fields:
        print("Incorrect fields:")
        for field in incorrect_fields:
            print(f"- {field}")

    return accuracy_percentage


In [None]:
result_list = []
accuracy_list = []
model_name = 'gpt-4o-mini' #'gpt-3.5-turbo'
dataset = 'garden'

for index in tqdm(range(0, 100)):
    result = {}
    data = home_garden[index]
    result['id'] = data['id']
    result['cluster_id'] = data['cluster_id']
    result['category'] = data['category']
    text_compile = f'title: {data["input_title"]}\ndescription: {data["input_description"]}'

    custom_extraction_prompt = """
        Extract information from the text extracted from a webpage of a product, including title and description:
        Output in a structured format.
        """
    response = initial_extraction(text_compile, custom_extraction_prompt, model_name)
    result['output'] = response.dict()
    accuracy = evaluate_extraction(response.dict(), data)
    result['accuracy'] = accuracy
    result_list.append(result)
    accuracy_list.append(accuracy)
print(f'Average accuracy: {sum(accuracy_list)/len(accuracy_list)}')

# Specify the file path for your JSON Lines file
file_path = f'{model_name.replace(".", "-")}-{dataset}-zero.jsonl'

write_jsonl_file(file_path, result_list)


### Test results
- garden - gpt3.5 - zero - 75.65
- garden - gpt4o - zero - 81.2
- garden - gpt4o-mini - zero - 62.30

- food - gpt3.5 - zero - 70.22
- food - gpt4o - zero - 77.62
- food - gpt4o-mini - zero - 74.07

- office - gpt3.5 - zero - 76.94
- office - gpt4o - zero - 78.11
- office - gpt4o-mini - zero - 76.06

- jewelry - gpt3.5 - zero - 76.78
- jewelry - gpt4o - zero - 84.56
- jewelry - gpt4o-mini - zero - 82.33


