# Task 1: Structured Classification and Extraction - SOLUTION

In [None]:
from openai import OpenAI
from pydantic import BaseModel, Field, field_validator
from typing import Literal, List, Optional
from enum import Enum
import json

# SET YOUR API KEY
api_key = "your_api_key_here"
client = OpenAI(api_key=api_key)

In [None]:
# Load data
with open('../fixtures/input/support_tickets.json', 'r') as f:
    tickets = json.load(f)

with open('../fixtures/input/extraction_texts.json', 'r') as f:
    extraction_data = json.load(f)

print(f"Loaded {len(tickets)} tickets")

## Task 1: Define Schema

In [None]:
# SOLUTION

class Priority(str, Enum):
    LOW = "low"
    MEDIUM = "medium"
    HIGH = "high"
    URGENT = "urgent"

class TicketClassification(BaseModel):
    category: Literal[
        "technical",
        "billing",
        "account",
        "feature_request",
        "bug_report",
        "general"
    ]
    priority: Priority
    subcategory: str = Field(max_length=50)
    estimated_hours: int = Field(ge=0, le=720)
    requires_escalation: bool
    confidence: float = Field(ge=0, le=1)
    reasoning: str = Field(min_length=10, max_length=200)

print("✓ Schema defined")

## Task 2: Classify Single Ticket

In [None]:
# SOLUTION

ticket = tickets[0]
ticket_text = f"{ticket['subject']}\n{ticket['body']}"

response = client.beta.chat.completions.parse(
    model="gpt-4o-mini",
    messages=[
        {
            "role": "system",
            "content": """You are a support ticket classifier.
Classify tickets into categories and assign priority.

Priority guidelines:
- urgent: System down, security issues, data loss
- high: Major features broken, paying customers affected
- medium: Features not working, workarounds exist
- low: Questions, minor issues, feature requests"""
        },
        {"role": "user", "content": f"Classify: {ticket_text}"}
    ],
    response_format=TicketClassification
)

classification = response.choices[0].message.parsed

print(f"Category: {classification.category}")
print(f"Priority: {classification.priority.value}")
print(f"Reasoning: {classification.reasoning}")
print("✓ Task 2 passed")

## Task 3: Batch Classification

In [None]:
# SOLUTION

results = []
total_input_tokens = 0
total_output_tokens = 0

for ticket in tickets:
    ticket_text = f"{ticket['subject']}\n{ticket['body']}"

    response = client.beta.chat.completions.parse(
        model="gpt-4o-mini",
        messages=[
            {"role": "system", "content": "Classify support tickets."},
            {"role": "user", "content": ticket_text}
        ],
        response_format=TicketClassification
    )

    classification = response.choices[0].message.parsed
    results.append({
        "ticket_id": ticket['ticket_id'],
        "classification": classification
    })

    # Track usage
    total_input_tokens += response.usage.prompt_tokens
    total_output_tokens += response.usage.completion_tokens

# Calculate cost (gpt-4o-mini pricing)
total_cost = (
    (total_input_tokens / 1_000_000) * 0.15 +
    (total_output_tokens / 1_000_000) * 0.60
)

print(f"Classified {len(results)} tickets")
print(f"Total tokens: {total_input_tokens + total_output_tokens}")
print(f"Total cost: ${total_cost:.4f}")
print("✓ Task 3 passed")

## Task 4: Measure Accuracy

In [None]:
# SOLUTION

category_correct = 0
priority_correct = 0

for ticket, result in zip(tickets, results):
    predicted = result['classification']

    if predicted.category == ticket['expected_category']:
        category_correct += 1

    if predicted.priority.value == ticket['expected_priority']:
        priority_correct += 1

category_accuracy = category_correct / len(tickets)
priority_accuracy = priority_correct / len(tickets)

print(f"Category accuracy: {category_accuracy:.1%}")
print(f"Priority accuracy: {priority_accuracy:.1%}")
print("✓ Task 4 passed")

## Task 5: Contact Extraction Schema

In [None]:
# SOLUTION

import re

class ContactInfo(BaseModel):
    name: str = Field(min_length=1, max_length=100)
    company: Optional[str] = Field(None, max_length=100)
    email: Optional[str] = None
    phone: Optional[str] = None
    interest: Optional[str] = None

    @field_validator('email')
    def validate_email(cls, v):
        if v and not re.match(r'^[\w\.-]+@[\w\.-]+\.\w+$', v):
            raise ValueError('Invalid email format')
        return v.lower() if v else v

print("✓ Task 5 passed")

## Task 6: Extract Contacts

In [None]:
# SOLUTION

extraction_results = []

for item in extraction_data[:3]:
    response = client.beta.chat.completions.parse(
        model="gpt-4o-mini",
        messages=[
            {"role": "system", "content": "Extract contact information."},
            {"role": "user", "content": item['text']}
        ],
        response_format=ContactInfo
    )

    contact = response.choices[0].message.parsed
    extraction_results.append(contact)

print("✓ Task 6 passed")
for i, contact in enumerate(extraction_results):
    print(f"\n{i+1}. {contact.name} - {contact.email}")

## Task 7: Caching

In [None]:
# SOLUTION

from functools import lru_cache

@lru_cache(maxsize=1000)
def classify_ticket_cached(ticket_text: str) -> TicketClassification:
    response = client.beta.chat.completions.parse(
        model="gpt-4o-mini",
        messages=[
            {"role": "system", "content": "Classify tickets."},
            {"role": "user", "content": ticket_text}
        ],
        response_format=TicketClassification
    )
    return response.choices[0].message.parsed

test_ticket = "Urgent: System down!"
result1 = classify_ticket_cached(test_ticket)
result2 = classify_ticket_cached(test_ticket)

print(f"Cache info: {classify_ticket_cached.cache_info()}")
print("✓ Task 7 passed")

## Task 8: Error Handling

In [None]:
# SOLUTION

from openai import APIError, RateLimitError
from pydantic import ValidationError

def safe_classify(ticket_text: str):
    try:
        response = client.beta.chat.completions.parse(
            model="gpt-4o-mini",
            messages=[
                {"role": "system", "content": "Classify tickets."},
                {"role": "user", "content": ticket_text}
            ],
            response_format=TicketClassification
        )
        return response.choices[0].message.parsed, None

    except RateLimitError:
        return None, "Rate limit exceeded"

    except APIError as e:
        return None, f"API error: {str(e)}"

    except ValidationError as e:
        return None, f"Validation error: {str(e)}"

    except Exception as e:
        return None, f"Unexpected error: {str(e)}"

result, error = safe_classify("Test ticket")
print("✓ Task 8 passed")
if result:
    print(f"  Classification: {result.category}")

## Summary

**Achievements:**
- ✓ Pydantic schemas with validation
- ✓ Structured outputs for classification
- ✓ Information extraction
- ✓ Cost tracking
- ✓ Accuracy measurement
- ✓ Caching for efficiency
- ✓ Production error handling

**Key learnings:**
- Structured outputs eliminate parsing logic
- Pydantic provides automatic validation
- Token tracking enables cost monitoring
- Caching reduces API calls by 50-90%
- Error handling is critical for production