# Text Classification with Structured Outputs - Suitable for GPT 4.1 and beyond



This was created to make a template of text classification using OpenAI with structured outputs.

The structure is typically for support requests and classifying customer feedback. However you can adapt this in many different ways.


The examples are based on this dataset: https://www.kaggle.com/datasets/chaudharyanshul/airline-reviews 

NOTE: If you are running this please refer to the requirements.txt file to ensure you have the relevant packages installed.


In [None]:
import pandas as pd
from typing import List, Literal, Dict
from pydantic import BaseModel, Field, field_validator, model_validator
import os
from dotenv import load_dotenv
import logging
import re
import json
import prompt_data
from openai import openai

In [126]:

logging.basicConfig(level=logging.INFO)


In [None]:
load_dotenv(dotenv_path="INSERTPATHHERE",override=True)
TEMPERATURE = 0.2

In [None]:
api_key = "INSERT HERE"
client = OpenAI(
    "INSERT CREDENTIALS HERE"
)

# Load Sample data

In [130]:
def create_sample_data():
    sample_data = [
        {
            "Row_ID": 1,
            "ProblemDescription": "I want to know how much my flight bonus will be for next year.",
            "Resolution": "Here’s a link to the bonus calculation guidelines.",
            "CustomerFeedback": "I wasn’t contacted and got a generic response with no specifics.",
            "DaysToResolve": 0.2,
            "Channel": "Email",
            "NumTransfers": 1
        },
        {
            "Row_ID": 2,
            "ProblemDescription": "Why is my ticket cost higher than usual?",
            "Resolution": "The price reflects new seasonal rates.",
            "CustomerFeedback": "I don’t understand why the cost increased. No detailed explanation.",
            "DaysToResolve": 5,
            "Channel": "Chat",
            "NumTransfers": 2
        },
        {
            "Row_ID": 3,
            "ProblemDescription": "When will my refund for the canceled flight be processed?",
            "Resolution": "Refunds typically take 4-6 weeks to process.",
            "CustomerFeedback": "I received a generic timeline. No specific details for my case.",
            "DaysToResolve": 10,
            "Channel": "Phone",
            "NumTransfers": 3
        },
        {
            "Row_ID": 4,
            "ProblemDescription": "I can’t access my flight booking details.",
            "Resolution": "You can access them directly on the website by logging in.",
            "CustomerFeedback": "The website is not working, and the advice didn’t help.",
            "DaysToResolve": 2,
            "Channel": "Email",
            "NumTransfers": 1
        },
        {
            "Row_ID": 5,
            "ProblemDescription": "How do I reset my password for my booking account?",
            "Resolution": "Follow the steps in the attached guide to reset your password.",
            "CustomerFeedback": "The guide wasn’t clear, I couldn’t reset my password.",
            "DaysToResolve": 0.8,
            "Channel": "Chat",
            "NumTransfers": 1
        },
        {
            "Row_ID": 6,
            "ProblemDescription": "Why hasn’t my refund been processed yet?",
            "Resolution": "Refunds are processed at the end of each month.",
            "CustomerFeedback": "I wasn’t informed why my refund was delayed.",
            "DaysToResolve": 30,
            "Channel": "Phone",
            "NumTransfers": 2
        },
        {
            "Row_ID": 7,
            "ProblemDescription": "Can you explain the process for claiming a flight delay compensation?",
            "Resolution": "Please refer to the compensation policy document linked here.",
            "CustomerFeedback": "The document is too complicated, and I didn’t find any clear answers.",
            "DaysToResolve": 0.2,
            "Channel": "Email",
            "NumTransfers": 1
        },
        {
            "Row_ID": 8,
            "ProblemDescription": "I need help with making a group booking for my department.",
            "Resolution": "Here’s a guide for making group bookings on our website.",
            "CustomerFeedback": "The guide missed some important steps for large group bookings.",
            "DaysToResolve": 0.5,
            "Channel": "Chat",
            "NumTransfers": 2
        },
        {
            "Row_ID": 9,
            "ProblemDescription": "Why hasn’t my booking for an extra seat been processed?",
            "Resolution": "It takes up to 48 hours to confirm extra seat requests.",
            "CustomerFeedback": "Agent didn’t check my specific request, just gave a standard response.",
            "DaysToResolve": 0.15,
            "Channel": "Phone",
            "NumTransfers": 1
        },
        {
            "Row_ID": 10,
            "ProblemDescription": "I need help with changing my flight date.",
            "Resolution": "Here’s the link to change your booking online.",
            "CustomerFeedback": "The website process was unclear and I couldn’t change my flight.",
            "DaysToResolve": 0.8,
            "Channel": "Email",
            "NumTransfers": 1
        }
    ]
    return pd.DataFrame(sample_data)

sample_data = create_sample_data()


# Pydantic models and definitions

## Category and labels for multiple option selection

In [None]:
LABELS = Literal['TIME_WAITING', 'POLICY', 'SERVICE_PROCESS', 
                'QUALITY_OF_RESOLUTION', 'SELF_HELP_RESOURCES','STAFF_MANNERS', 
                'STAFF_KNOWLEDGE', 'TECHNOLOGY', 'REPEATED_FOLLOW_UP','NOT_SURE']

CATEGORY_DEFINITIONS = """
    'TIME_WAITING': 'Feedback that EXPLICITY mentions long call waiting times, call queue lengths, or delays in receiving responses. This includes complaints about waiting for responses or resolution timeframes.'
    'POLICY': 'Feedback related to company policies, rules, or standard procedures that affect service delivery. This includes cases where policies are unclear, seem unfair, or limit service options.'
    'PROCESS': 'Feedback related to how processes in services are delivered or tasks are completed. This includes difficult processes or complicated workflows.'
    'QUALITY_OF_RESOLUTION': 'Feedback related to the customer\'s problem not being resolved, or answered. This also includes where the customer indicates the resolution was generic or incomplete.'
    'SELF_HELP_RESOURCES': 'Feedback related to QRG, website links, documentation, user guides, manuals, or other self-service materials. This includes unclear instructions, missing information, or difficult-to-use resources.'
    'STAFF_MANNERS': 'Feedback that EXPLICITLY mentions the agent\'s (ALSO KNOWN AS 'staff') poor behavior towards customers. This includes specific mentions of rudeness, lack of empathy, being abrupt, dismissive, or any other unprofessional conduct. Do NOT apply this category for general complaints about resolution quality or service process.'
    'STAFF_KNOWLEDGE': 'Feedback related to the agent\'s (ALSO KNOWN AS 'staff') expertise or understanding. This includes incorrect information, inability to explain clearly, or lack of technical knowledge.'
    'TECHNOLOGY': 'Feedback related to systems, software, or technical infrastructure. This includes system errors, software bugs, or lack of ease of use with digital tools.'
    'REPEATED_FOLLOW_UP': 'Feedback that EXPLICITLY mentions the customer having to follow-up multiple times on a request.'
"""


## Pydantic Models

In [None]:
CATEGORY_ID_MAP = {
    'TIME_WAITING': 1,
    'POLICY': 2,
    'PROCESS': 3,
    'QUALITY_OF_RESOLUTION': 4,
    'SELF_HELP_RESOURCES': 5,
    'STAFF_MANNERS': 6,
    'STAFF_KNOWLEDGE': 7,
    'TECHNOLOGY': 8,
    'REPEATED_FOLLOW_UP': 9,
    'NOT_SURE' : -1
}

class MainCategoryDetail(BaseModel):
    label: LABELS = Field(..., description="The main category that applies.")
    justification: str = Field(..., description="Specific justification for why this main category was chosen, referencing the feedback.")

class SubCategoryDetail(BaseModel):
    label: str = Field(..., description="The sub-category label in UPPER_SNAKE_CASE.")
    justification: str = Field(..., description="A short, natural language description of the specific issue.")
    linked_main_category_id: int = Field(..., description="The integer ID of the Main_Category this sub-category is linked to. Use -1 if no link is possible.")

    @field_validator('label')
    def validate_label_format(cls, value):
        pattern = r'^[A-Z0-9]+(_[A-Z0-9]+)*$'
        if not re.match(pattern, value):
            raise ValueError(f"Invalid Sub_Category label format: {value}. Must be UPPER_SNAKE_CASE.")
        return value




class TicketClassification(BaseModel):
    '''Classification model for feedback'''
    Survey_ID: str
    Confidence: float

    Main_Categories: List[MainCategoryDetail] = Field(
        ...,
        min_length=1,
        description="A list of main categories, each with its own specific justification."
    )

    Sub_Categories: List[SubCategoryDetail] = Field(
        ...,
        min_length=1,
        max_length=8,
        description="A list of 1 to 8 detailed sub-categories."
    )

    @model_validator(mode='after')
    def check_model_logic(self):
        main_cat_labels = {mc.label for mc in self.Main_Categories}
        main_cat_ids = {CATEGORY_ID_MAP[mc.label] for mc in self.Main_Categories}
        sub_cat_labels = {sub.label for sub in self.Sub_Categories}
        overlap = main_cat_labels & sub_cat_labels
        if overlap:
            raise ValueError(f"Sub-Category labels must not repeat Main Category names: {overlap}")

        not_sure_id = CATEGORY_ID_MAP["NOT_SURE"]
        
        # If not sure is selected, we cannot have other main categories - reinforced in prompt!
        if not_sure_id in main_cat_ids and len(main_cat_ids) > 1:
            raise ValueError("If NOT_SURE is selected as a Main Category, no other Main Categories can be selected!")
        
        # Check if the model tries to use some RANDOM main_category_id for the sub_category
        for sub in self.Sub_Categories:
            if sub.linked_main_category_id not in main_cat_ids and sub.linked_main_category_id != 9999:
                raise ValueError(
                    f"Sub-category '{sub.label}' has linked_main_category_id={sub.linked_main_category_id}, "
                    "which is not valid. Must be linked to a selected Main Category or 9999 for new/unlinked subcategories."
                )

        
        # If not sure is in main_cat_ids then the sub_category must be -1, again reinforced in prompt.
        if not_sure_id in main_cat_ids:
            for sub in self.Sub_Categories:
                if sub.linked_main_category_id != -1:
                    raise ValueError(
                        f"Sub-category '{sub.label}' has linked_main_category_id={sub.linked_main_category_id}. "
                        "When NOT_SURE is selected, sub-categories must have linked_main_category_id=9999."
                    )
        else:            
            # every main category has to have at LEAST one sub-category
            linked_main_ids = {sub.linked_main_category_id for sub in self.Sub_Categories if sub.linked_main_category_id != 9999}
            if linked_main_ids: 
                unlinked_main_cats = main_cat_ids - linked_main_ids
                if unlinked_main_cats:
                    id_to_label = {v: k for k, v in CATEGORY_ID_MAP.items()}
                    unlinked_labels = [id_to_label.get(cat_id, f"ID_{cat_id}") for cat_id in unlinked_main_cats]
                    raise ValueError(
                        f"Main Categories selected but no sub-categories linked to them: {unlinked_labels}. "
                        "Each selected Main Category must have at least one linked Sub-Category."
                    )

        return self

# Classification loop

In [None]:

sample_item = sample_data

def classify_single_ticket(sample_item: Dict) -> TicketClassification:
    for attempt in range(3):
        try:
            prompt = prompt_data.prompt.format(
                feedback=sample_item['CustomerFeedback'],
                support_request_description=sample_item['ProblemDescription'],
                feedback_id=sample_item['Row_ID'],
                days_to_resolve=sample_item['DaysToResolve'],
                channel=sample_item['Channel'],
                num_transfers=sample_item['NumTransfers']
            )



            response = client.responses.parse(
            model="whatever-GPT-Model-4.1-or-above",
            input=prompt,
            temperature=TEMPERATURE,
            text_format=TicketClassification
            )
            print(response)
            text = response.output_parsed
            return text
        
        except Exception as e:
            if attempt == 2:
                return TicketClassification(
                    Row_ID=sample_item['Row_ID'],
                    Category=[],
                    Sub_Category=['UNKNOWN'],
                    confidence=0,
                    justification=f"Classification failed after 3 attempts: {str(e)}"
                )
    
    
           
       


# Main Program Execution

In [None]:
if __name__ == "__main__":
    results = []
    for _,row in sample_data.iterrows():
        classification = classify_single_ticket(row)
        results.append(classification)

