In [1]:
from pydantic import BaseModel, Field
from typing import TypedDict, Annotated, List
from enum import Enum
from langchain_google_genai.chat_models import ChatGoogleGenerativeAI
import json

In [2]:
from pydantic_settings import BaseSettings

class Settings(BaseSettings):
    
    cohere_api_key: str
    google_api_key: str

    class Config:
        env_file = "../.env"
        extra = "ignore"
        
settings = Settings()

In [3]:
llm = ChatGoogleGenerativeAI(
    model = "gemini-2.5-flash-lite",
    google_api_key = settings.google_api_key,
    temperature = 0
)

### Question 1: Basic User Profile Extraction
**Concept**: Simple TypedDict structure for extracting basic user information from text.

**Description**: Create a structured output system that extracts a user's basic profile information from a natural language description. The output should contain name, age, and occupation fields.

**Sample Input**: 
```
"Hi, I'm Sarah Johnson, I'm 28 years old and I work as a software engineer at Google."
```

**Expected Output Structure**:
```python
{
    "name": "Sarah Johnson",
    "age": 28,
    "occupation": "software engineer"
}
```

In [32]:
class UserInfo(TypedDict):
    "Represents the name, age and occupation."
    name: Annotated[str, ..., "Name of the candidate"]
    age: Annotated[str, ..., "Age of the candidate"]
    occupation: Annotated[str, ..., "Occupation of the candidate"]

In [28]:
userinfo = UserInfo(
    name = "priya",
    age = 26,
    occupation = "jse"
)
userinfo

{'name': 'priya', 'age': 26, 'occupation': 'jse'}

In [29]:
structured_llm = llm.with_structured_output(
    schema = UserInfo,
    method = "tool_calling"
)
structured_llm

RunnableBinding(bound=ChatGoogleGenerativeAI(model='models/gemini-2.5-flash-lite', google_api_key=SecretStr('**********'), temperature=0.0, client=<google.ai.generativelanguage_v1beta.services.generative_service.client.GenerativeServiceClient object at 0x00000160CDB89160>, default_metadata=(), model_kwargs={}), kwargs={'tools': [{'type': 'function', 'function': {'name': 'UserInfo', 'description': 'Represents the name, age and occupation in json format.', 'parameters': {'type': 'object', 'properties': {'name': {'description': 'Name of the candidate', 'type': 'string'}, 'age': {'description': 'Age of the candidate', 'type': 'string'}, 'occupation': {'description': 'Occupation of the candidate', 'type': 'string'}}, 'required': ['name', 'age', 'occupation']}}}], 'ls_structured_output_format': {'kwargs': {'method': 'function_calling'}, 'schema': {'type': 'function', 'function': {'name': 'UserInfo', 'description': 'Represents the name, age and occupation in json format.', 'parameters': {'typ

In [30]:
response: dict = structured_llm.invoke("Hi, I'm Sarah Johnson, I'm 28 years old and I work as a software engineer at Google.")

In [84]:
print(json.dumps(response, indent=4))

{
    "description": "Annual Tech Conference focusing on AI and machine learning innovations.",
    "date": "2024-03-15",
    "location": "San Francisco Convention Center",
    "event_name": "Annual Tech Conference"
}


### Question 2: Product Review Sentiment Analysis
**Concept**: Using Pydantic BaseModel for structured sentiment analysis with enumeration.

**Description**: Build a system that analyzes product reviews and extracts the sentiment (positive/negative/neutral), confidence score, and main topics mentioned. Use Pydantic with proper validation and enums.

**Sample Input**: 
```
"This laptop is amazing! The battery life is incredible and the display is crystal clear. However, it's a bit expensive for what you get. Overall, I'm quite satisfied with my purchase."
```

**Expected Output Structure**:
```python
{
    "sentiment": "positive",
    "confidence": 0.75,
    "topics": ["battery life", "display", "price"],
    "summary": "Satisfied with laptop despite high price"
}
```

In [None]:
class Sentiment(str, Enum):
    POSITIVE = "positive"
    NEGATIVE = "negative"
    NEUTRAL = "neutral"
    
class SentimentAnalysis(BaseModel):
    
    sentiment: str = Field(description = "Sentiment of the text, can be positive, negative or neutral")
    confidence: float = Field(description = "Confidence score between 0 and 1")
    topics: list[str] = Field(description = "List of topics mentioned in the text") 
    summary: str = Field(description = "A brief summary of the text")   

In [44]:
structured_llm = llm.with_structured_output(
    schema = SentimentAnalysis,
    method = "tool_calling"
)

In [45]:
response: dict = structured_llm.invoke("This laptop is amazing! The battery life is incredible and the display is crystal clear. However, it's a bit expensive for what you get. Overall, I'm quite satisfied with my purchase.")

In [83]:
print(json.dumps(response, indent=4))

{
    "description": "Annual Tech Conference focusing on AI and machine learning innovations.",
    "date": "2024-03-15",
    "location": "San Francisco Convention Center",
    "event_name": "Annual Tech Conference"
}


### Question 3: Event Information Extraction
**Concept**: TypedDict with optional fields and date handling.

**Description**: Extract event information from natural language text, including event name, date, location, and optional description. Handle cases where some information might be missing.

**Sample Input**: 
```
"Join us for the Annual Tech Conference on March 15th, 2024 at the San Francisco Convention Center. This year's theme focuses on AI and machine learning innovations."
```

**Expected Output Structure**:
```python
{
    "event_name": "Annual Tech Conference",
    "date": "2024-03-15",
    "location": "San Francisco Convention Center",
    "description": "This year's theme focuses on AI and machine learning innovations"
}
```

In [75]:
class EventInfo(TypedDict):
    "Provide event information"
    event_name: str =  Field(description = "Name of the event")
    date: str = Field(description = "Date of the event in YYYY-MM-DD format")
    location: str = Field(description = "Location of the event")    
    description: str = Field(description = "Brief description of the event")

In [76]:
structured_llm = llm.with_structured_output(
    schema = EventInfo,
    method = "tool_calling"
)

In [82]:
response = structured_llm.invoke("Join us for the Annual Tech Conference on March 15th, 2024 at the San Francisco Convention Center. This year's theme focuses on AI and machine learning innovations.")
print(json.dumps(response, indent=4))

{
    "description": "Annual Tech Conference focusing on AI and machine learning innovations.",
    "date": "2024-03-15",
    "location": "San Francisco Convention Center",
    "event_name": "Annual Tech Conference"
}


### Question 4: Simple Task Classification
**Concept**: Basic Pydantic model for categorizing user requests.

**Description**: Create a system that classifies user input into predefined categories (question, request, complaint, compliment) and extracts the main subject matter.

**Sample Input**: 
```
"I'm having trouble with my internet connection. It keeps dropping every few minutes and it's really frustrating."
```

**Expected Output Structure**:
```python
{
    "category": "complaint",
    "subject": "internet connection",
    "urgency_level": "medium",
    "keywords": ["trouble", "dropping", "frustrating"]
}
```

In [88]:
class TaskClassification(BaseModel):
    "Classify uset input into predefined categories"
    category: str = Field(description = "Category of the complaint, question, request and compliments")
    subject: str = Field(description = "Subject of the task")
    urgency_level: str = Field(description = "Urgency level of the task, can be Low, Medium or High")
    keywords: list[str] = Field(description = "List of keywords related to the task")   

In [89]:
structured_llm = llm.with_structured_output(
    schema = TaskClassification,
    method = "tool_calling"
)

In [90]:
response: dict = structured_llm.invoke("I'm having trouble with my internet connection. It keeps dropping every few minutes and it's really frustrating.")
print(response.model_dump_json(indent = 4))

{
    "category": "complaint",
    "subject": "Internet connection issues",
    "urgency_level": "High",
    "keywords": [
        "internet",
        "connection",
        "dropping",
        "frustrating"
    ]
}


### Question 5: Multi-Entity Relationship Extraction
**Concept**: Nested Pydantic models with relationships between entities.

**Description**: Extract information about companies, their employees, and relationships from business news articles. Model the complex relationships between different entities using nested Pydantic structures.

**Sample Input**: 
```
"Apple CEO Tim Cook announced that the company will be hiring 500 new engineers across its Cupertino and Austin offices. The hiring spree comes as Apple pushes deeper into AI development. Meanwhile, former Apple executive John Doe has joined Microsoft as VP of Product Development."
```

**Expected Output Structure**:
```python
{
    "companies": [
        {
            "name": "Apple",
            "employees": [
                {
                    "name": "Tim Cook",
                    "position": "CEO",
                    "status": "current"
                }
            ],
            "locations": ["Cupertino", "Austin"],
            "hiring_info": {
                "positions": "engineers",
                "count": 500
            }
        },
        {
            "name": "Microsoft",
            "employees": [
                {
                    "name": "John Doe",
                    "position": "VP of Product Development",
                    "status": "new_hire",
                    "previous_company": "Apple"
                }
            ]
        }
    ],
    "key_topics": ["hiring", "AI development", "executive movement"]
}
```

In [101]:
class Company(BaseModel):
    name: str = Field(description = "Name of the company")
    employees: list['Employee'] = Field(description = "List of employees in the company")   
    location: str = Field(description = "Location of the company")
    hiring_info: str = Field(description = "Hiring information of the company")
    
class Employee(BaseModel):
    name: str = Field(description = "Name of the employee")
    position: str = Field(description = "Position of the employee in the company")
    status: str = Field(description = "Employment status, can be Active, On Leave or Resigned")
    previous_company: str = Field(description = "Previous company of the employee")
    
class HiringInfo(BaseModel):
    position: str = Field(description = "Position being hired for")
    count: int = Field(description = "Number of positions available")

class NewsArticle(BaseModel):
    companies: list[Company] = Field(description = "List of companies mentioned in the article")
    key_topics: list[str] = Field(description = "Key topics discussed in the article")


In [102]:
structured_llm = llm.with_structured_output(
    schema = NewsArticle,
    method = "tool_calling"
)

In [103]:
response: dict = structured_llm.invoke("Apple CEO Tim Cook announced that the company will be hiring 500 new engineers across its Cupertino and Austin offices. The hiring spree comes as Apple pushes deeper into AI development. Meanwhile, former Apple executive John Doe has joined Microsoft as VP of Product Development.")
print(response.model_dump_json(indent = 4))

{
    "companies": [
        {
            "name": "Apple",
            "employees": [
                {
                    "name": "Tim Cook",
                    "position": "CEO",
                    "status": "Active",
                    "previous_company": "IBM"
                },
                {
                    "name": "John Doe",
                    "position": "Former Executive",
                    "status": "Resigned",
                    "previous_company": "Apple"
                }
            ],
            "location": "Cupertino, Austin",
            "hiring_info": "Hiring 500 new engineers"
        },
        {
            "name": "Microsoft",
            "employees": [
                {
                    "name": "John Doe",
                    "position": "VP of Product Development",
                    "status": "Active",
                    "previous_company": "Apple"
                }
            ],
            "location": "Redmond, Washington",
           

### Question 6: Financial Transaction Analysis
**Concept**: Complex validation with Pydantic validators and custom field types.

**Description**: Parse financial transaction descriptions and extract structured information including transaction type, amount, merchant, category, and potential fraud indicators. Implement custom validators for amount formatting and merchant validation.

**Sample Input**: 
```
"DEBIT PURCHASE - AMAZON.COM*RETAIL WA $127.89 USD on 2024-01-15 14:23:45 - Card ending 4532 - Available balance $2,847.33"
```

**Expected Output Structure**:
```python
{
    "transaction_type": "debit_purchase",
    "merchant": {
        "name": "AMAZON.COM",
        "category": "retail",
        "location": "WA"
    },
    "amount": {
        "value": 127.89,
        "currency": "USD"
    },
    "timestamp": "2024-01-15T14:23:45",
    "card_info": {
        "last_four_digits": "4532"
    },
    "account_balance": 2847.33,
    "fraud_indicators": []
}
```

In [110]:
class FinancialTransaction(BaseModel):
    transaction_type: str = Field(description = "Type of transaction, can be Deposit, Withdrawal, Transfer")
    merchant: Merchant = Field(description = "Name of the merchant involved in the transaction")
    amount: Amount = Field(description = "Amount involved in the transaction")
    timestamp: str = Field(description = "Timestamp of the transaction in ISO 8601 format")
    card_type: CardInfo = Field(description = "Type of card used, can be Credit, Debit or Prepaid")
    account_balance: float = Field(description = "Account balance after the transaction")
    fraud_indicators: list[str] = Field(description = "List of indicators that suggest potential fraud")
    
class Amount(BaseModel):
    value: float = Field(description = "Monetary value")
    currency: str = Field(description = "Currency code, e.g., USD, EUR")

class Merchant(BaseModel):
    name: str = Field(description = "Name of the merchant")
    category: str = Field(description = "Category of the merchant, e.g., Grocery, Electronics")
    location: str = Field(description = "Location of the merchant")
    
class CardInfo(BaseModel):
    last_four_digits: str = Field(description = "Last four digits of the card")
    

In [111]:
structured_llm = llm.with_structured_output(
    schema = FinancialTransaction,
    method = "tool_calling"
)

In [112]:
response: dict = structured_llm.invoke("DEBIT PURCHASE - AMAZON.COM*RETAIL WA $127.89 USD on 2024-01-15 14:23:45 - Card ending 4532 - Available balance $2,847.33")
print(response.model_dump_json(indent = 4))

{
    "transaction_type": "Withdrawal",
    "merchant": {
        "name": "Amazon.com*Retail WA",
        "category": "E-commerce",
        "location": "WA"
    },
    "amount": {
        "value": 127.89,
        "currency": "USD"
    },
    "timestamp": "2024-01-15T14:23:45Z",
    "card_type": {
        "last_four_digits": "4532"
    },
    "account_balance": 2847.33,
    "fraud_indicators": []
}


### Question 7: Research Paper Metadata Extraction
**Concept**: TypedDict with complex nested structures and list handling.

**Description**: Extract comprehensive metadata from academic paper abstracts including authors, institutions, research areas, methodology, key findings, and citations. Handle varying formats and missing information gracefully.

**Sample Input**: 
```
"Deep Learning Approaches to Natural Language Processing: A Comprehensive Survey. Authors: Dr. Jane Smith (Stanford University), Prof. Robert Chen (MIT), Dr. Lisa Wong (Google Research). This paper presents a systematic review of deep learning techniques applied to NLP tasks from 2018-2023. We analyzed 245 papers and identified three major trends: transformer architectures, few-shot learning, and multimodal integration. Our findings suggest that transformer-based models achieve 15-20% better performance than traditional approaches. Keywords: NLP, deep learning, transformers, survey."
```

**Expected Output Structure**:
```python
{
    "title": "Deep Learning Approaches to Natural Language Processing: A Comprehensive Survey",
    "authors": [
        {
            "name": "Dr. Jane Smith",
            "affiliation": "Stanford University",
            "title": "Dr."
        },
        {
            "name": "Prof. Robert Chen", 
            "affiliation": "MIT",
            "title": "Prof."
        },
        {
            "name": "Dr. Lisa Wong",
            "affiliation": "Google Research",
            "title": "Dr."
        }
    ],
    "research_period": {
        "start_year": 2018,
        "end_year": 2023
    },
    "methodology": "systematic review",
    "sample_size": 245,
    "key_findings": [
        "transformer architectures trending",
        "few-shot learning prominent", 
        "multimodal integration growing",
        "15-20% performance improvement over traditional approaches"
    ],
    "keywords": ["NLP", "deep learning", "transformers", "survey"],
    "paper_type": "survey"
}
```

In [18]:
class Authors(TypedDict):
    name: Annotated[str, ..., "Name of the author"]
    affiliation: Annotated[str, ..., "Affiliation of the author"]
    title: Annotated[str, ..., "Title of the author, e.g., Professor, Dr., Researcher"]

class ResearchPeriod(TypedDict):
    start_year: Annotated[int, ..., "Start year of the research period"]
    end_year: Annotated[int, ..., "End year of the research period"]
    
class ResearchPaper(TypedDict):
    title: Annotated[str, ..., "Title of the research paper"]
    authors: list[Authors] = Field(description = "List of authors of the research paper")
    research_period: ResearchPeriod = Field(description = "Research period start_year and end_year")
    methodology: Annotated[str, ..., "Methodology used in the research"]
    sample_size: Annotated[int, ..., "Sample size used in the research"]
    key_findings: list[Annotated[str, ..., "Key findings of the research"]]
    keywords: list[Annotated[str, ..., "List of keywords related to the research"]]
    paper_type: Annotated[str, ..., "Type of the research paper, can be Empirical, Theoretical or Review"]



In [19]:
structured_llm = llm.with_structured_output(
    schema = ResearchPaper,
    method = "tool_calling"
)

In [20]:
response = structured_llm.invoke("Deep Learning Approaches to Natural Language Processing: A Comprehensive Survey. Authors: Dr. Jane Smith (Stanford University), Prof. Robert Chen (MIT), Dr. Lisa Wong (Google Research). This paper presents a systematic review of deep learning techniques applied to NLP tasks from 2018-2023. We analyzed 245 papers and identified three major trends: transformer architectures, few-shot learning, and multimodal integration. Our findings suggest that transformer-based models achieve 15-20% better performance than traditional approaches. Keywords: NLP, deep learning, transformers, survey.")
print(json.dumps(response, indent = 4))

{
    "key_findings": [
        "Transformer-based models achieve 15-20% better performance than traditional approaches.",
        "Identified three major trends: transformer architectures, few-shot learning, and multimodal integration."
    ],
    "sample_size": 245.0,
    "authors": [
        {
            "title": "Dr.",
            "affiliation": "Stanford University",
            "name": "Jane Smith"
        },
        {
            "title": "Prof.",
            "affiliation": "MIT",
            "name": "Robert Chen"
        },
        {
            "title": "Dr.",
            "affiliation": "Google Research",
            "name": "Lisa Wong"
        }
    ],
    "keywords": [
        "NLP",
        "deep learning",
        "transformers",
        "survey"
    ],
    "title": "Deep Learning Approaches to Natural Language Processing: A Comprehensive Survey",
    "paper_type": "Review",
    "research_period": {
        "start_year": 2018.0,
        "end_year": 2023.0
    },
    "meth

### Question 8: Dynamic Schema Generation with Conditional Fields
**Concept**: Advanced Pydantic with conditional validation, dynamic field generation, and custom serialization.

**Description**: Create a system that processes insurance claims and generates different structured outputs based on claim type (auto, health, property). Each claim type should have specific required fields, validation rules, and nested sub-structures. Implement conditional field requirements and cross-field validation.

**Sample Input**: 
```
"Auto insurance claim filed by John Davis, policy number AC-2023-891456. Incident occurred on January 10, 2024 at 3:30 PM at the intersection of Main St and Oak Ave. Vehicle: 2019 Honda Civic, VIN: 1HGBH41JXMN109186. Other party: Sarah Martinez driving 2021 Toyota Camry, policy with State Farm. Damages: front bumper dent ($800), headlight replacement ($150), rental car for 3 days ($210). Police report #2024-001234 filed. Fault determined: 70% other party, 30% claimant. Medical attention: minor neck strain, visited urgent care same day."
```

**Expected Output Structure**:
```python
{
    "claim_type": "auto",
    "claim_id": "generated_unique_id",
    "policyholder": {
        "name": "John Davis",
        "policy_number": "AC-2023-891456"
    },
    "incident": {
        "date": "2024-01-10",
        "time": "15:30:00",
        "location": "intersection of Main St and Oak Ave",
        "police_report": "2024-001234"
    },
    "vehicles": [
        {
            "owner_type": "claimant",
            "year": 2019,
            "make": "Honda",
            "model": "Civic",
            "vin": "1HGBH41JXMN109186"
        },
        {
            "owner_type": "third_party",
            "year": 2021,
            "make": "Toyota", 
            "model": "Camry",
            "driver_name": "Sarah Martinez",
            "insurance_company": "State Farm"
        }
    ],
    "damages": [
        {
            "type": "vehicle",
            "description": "front bumper dent",
            "estimated_cost": 800.00
        },
        {
            "type": "vehicle", 
            "description": "headlight replacement",
            "estimated_cost": 150.00
        },
        {
            "type": "additional",
            "description": "rental car for 3 days",
            "estimated_cost": 210.00
        }
    ],
    "fault_determination": {
        "claimant_percentage": 30,
        "other_party_percentage": 70
    },
    "medical_info": {
        "injuries_reported": true,
        "treatment_received": "urgent care visit",
        "injury_description": "minor neck strain"
    },
    "total_claim_amount": 1160.00,
    "validation_flags": []
}
```

In [21]:
class Vehicle(BaseModel):
    owner_type: str = Field(description = "Type of vehicle owner, can be Policyholder, Third Party or Other")
    year: int = Field(description = "Year of manufacture of the vehicle")
    make: str = Field(description = "Make of the vehicle")
    model: str = Field(description = "Model of the vehicle")
    vin: str = Field(description = "Vehicle Identification Number")
    
class Damage(BaseModel):
    type: str = Field(description = "Damage in vehicle, additional property or injury")
    description: str = Field(description = "Description of the damage")
    estimated_cost: float = Field(description = "Estimated cost to repair the damage")
    
class MedicalInfo(BaseModel):
    injuries_reported: bool = Field(description = "Flag indicating if injuries were reported")
    treatment_received: str = Field(description = "Flag indicating if treatment was received")
    injury_description: str = Field(description = "Description of the injuries, if any")
    
class Policyholder(BaseModel):
    name: str = Field(description = "Name of the policyholder")
    policy_number: str = Field(description = "Policy number of the insurance")
    
class Incident(BaseModel):
    date: str = Field(description = "Date of the incident in YYYY-MM-DD format")
    time: str = Field(description = "Time of the incident in HH:MM format")
    location: str = Field(description = "Location of the incident")
    police_report_filed: bool = Field(description = "Flag indicating if a police report was filed")
    
class InsuranceClaim(BaseModel):
    claim_type: str = Field(description = "Type of claim, can be Health, Auto or Property")
    claim_id: str = Field(description = "Unique identifier for the claim")
    policyholder: list[Policyholder] = Field(description = "Name of the policyholder")
    incident: Incident = Field(description = "Description of the incident leading to the claim")
    vehicles: list[Vehicle] = Field(description = "List of vehicles involved in the claim")
    damages: list[Damage] = Field(description = "List of damages reported in the claim")
    fault_determination: str = Field(description = "Fault determination, can be At Fault, Not At Fault or Undetermined")
    medical_info: 'MedicalInfo' = Field(description = "Medical information related to the claim, if applicable")
    total_claim_amount: float = Field(description = "Total amount claimed")
    validation_flag: bool = Field(description = "Flag indicating if the claim has been validated")
    


In [22]:
structured_llm = llm.with_structured_output(
    schema = InsuranceClaim,
    method = "tool_calling"
)

In [23]:
response = structured_llm.invoke("""Auto insurance claim filed by John Davis, policy number AC-2023-891456. 
                                 Incident occurred on January 10, 2024 at 3:30 PM at the intersection of Main St and Oak Ave. 
                                 Vehicle: 2019 Honda Civic, VIN: 1HGBH41JXMN109186. Other party: Sarah Martinez driving 2021 Toyota Camry, policy with State Farm.
                                 Damages: front bumper dent ($800), headlight replacement ($150), rental car for 3 days ($210). Police report #2024-001234 filed. 
                                 Fault determined: 70% other party, 30% claimant. Medical attention: minor neck strain, visited urgent care same day.""")
print(response.model_dump_json(indent = 4))

{
    "claim_type": "Auto",
    "claim_id": "2024-001234",
    "policyholder": [
        {
            "name": "John Davis",
            "policy_number": "AC-2023-891456"
        }
    ],
    "incident": {
        "date": "2024-01-10",
        "time": "15:30",
        "location": "Main St and Oak Ave",
        "police_report_filed": true
    },
    "vehicles": [
        {
            "owner_type": "Policyholder",
            "year": 2019,
            "make": "Honda",
            "model": "Civic",
            "vin": "1HGBH41JXMN109186"
        },
        {
            "owner_type": "Third Party",
            "year": 2021,
            "make": "Toyota",
            "model": "Camry",
            "vin": ""
        }
    ],
    "damages": [
        {
            "type": "vehicle",
            "description": "front bumper dent",
            "estimated_cost": 800.0
        },
        {
            "type": "vehicle",
            "description": "headlight replacement",
            "estimated_cos

### Question 9: Multi-Language Content Analysis with Hierarchical Classification
**Concept**: Complex TypedDict structures with multi-level categorization, confidence scoring, and language detection.

**Description**: Analyze social media posts in multiple languages and extract hierarchical topic classification, sentiment analysis, entity recognition, trend identification, and content moderation flags. Handle mixed-language content, cultural context, and ambiguous classifications with confidence intervals.

**Sample Input**: 
```
"Â¡IncreÃ­ble concierto de Taylor Swift anoche en Madrid! ðŸŽµâœ¨ The production was absolutely amazing and the crowd energy was unmatched. Definitely worth the 200â‚¬ ticket price. #TaylorSwift #Madrid #Concert2024 #WorthIt. Next stop: Paris! Who's going? DM me!"
```

**Expected Output Structure**:
```python
{
    "content_analysis": {
        "languages": [
            {
                "code": "es",
                "confidence": 0.65,
                "text_portion": "Â¡IncreÃ­ble concierto de Taylor Swift anoche en Madrid!"
            },
            {
                "code": "en", 
                "confidence": 0.85,
                "text_portion": "The production was absolutely amazing..."
            }
        ],
        "dominant_language": "en"
    },
    "topic_classification": {
        "primary_category": {
            "name": "entertainment",
            "subcategory": "music_concert",
            "confidence": 0.95
        },
        "secondary_categories": [
            {
                "name": "travel",
                "subcategory": "event_tourism", 
                "confidence": 0.72
            },
            {
                "name": "consumer_review",
                "subcategory": "entertainment_review",
                "confidence": 0.88
            }
        ]
    },
    "entities": [
        {
            "type": "person",
            "value": "Taylor Swift",
            "confidence": 0.99,
            "context": "performer"
        },
        {
            "type": "location",
            "value": "Madrid", 
            "confidence": 0.95,
            "context": "event_location"
        },
        {
            "type": "location",
            "value": "Paris",
            "confidence": 0.90,
            "context": "future_event"
        },
        {
            "type": "currency",
            "value": "200â‚¬",
            "normalized_value": 200.00,
            "currency_code": "EUR"
        }
    ],
    "sentiment_analysis": {
        "overall_sentiment": "positive",
        "confidence": 0.92,
        "emotion_breakdown": {
            "excitement": 0.85,
            "satisfaction": 0.78,
            "anticipation": 0.65
        }
    },
    "hashtags": ["#TaylorSwift", "#Madrid", "#Concert2024", "#WorthIt"],
    "user_intent": {
        "primary": "sharing_experience",
        "secondary": ["seeking_connection", "recommending"],
        "call_to_action": "DM me!"
    },
    "moderation_flags": [],
    "trending_indicators": {
        "viral_potential": "medium",
        "engagement_predictors": ["celebrity_mention", "location_tag", "call_to_action"]
    }
}
```

In [29]:
class Language(TypedDict):
    code: Annotated[str, ..., "Language code, e.g., en for English"]
    confidence: Annotated[float, ..., "Confidence score between 0 and 1"]
    text_portions: list[Annotated[str, ..., "Portions of text in this language"]]

class ContentAnalysis(TypedDict):
    language: list[Language] = Field(description = "List of languages detected in the text with confidence scores")
    dominant_language: Annotated[str, ..., "Dominant language if multiple languages are present"]

class PrimaryCategory(TypedDict):
    name: Annotated[str, ..., "Name of the primary category"]
    subcategory: Annotated[str, ..., "Subcategory within the primary category"]
    confidence: Annotated[float, ..., "Confidence score between 0 and 1"]
    
class SecondaryCategory(TypedDict):
    name: Annotated[str, ..., "Name of the secondary category"]
    subcategory: Annotated[str, ..., "Subcategory within the secondary category"]
    confidence: Annotated[float, ..., "Confidence score between 0 and 1"]

class TopicClassification(TypedDict):
    primary_category: Annotated[str, ..., "Primary category of the text"]
    secondary_categories: list[SecondaryCategory] = Field(description = "List of secondary categories with confidence scores")

class Entities(TypedDict):
    type: Annotated[str, ..., "Type of entity, e.g., Person, Organization, Location"]
    value: Annotated[str, ..., "Value of the entity"]
    confidence: Annotated[float, ..., "Confidence score between 0 and 1"]
    context: Annotated[str, ..., "Context in which the entity appears"]
    
class SentimentAnalysis(TypedDict):
    overall_sentiment: Annotated[str, ..., "Overall sentiment of the text, can be positive, negative or neutral"]
    confidence: Annotated[float, ..., "Confidence score between 0 and 1"]
    emotion_breakdown: dict[Annotated[str, ..., "Emotion type, e.g., Joy, Anger, Sadness"], Annotated[float, ..., "Confidence score between 0 and 1"]]

class EmotionBreakdown(TypedDict):
    excitement: Annotated[float, ..., "Confidence score for excitement"]
    satisfaction: Annotated[float, ..., "Confidence score for satisfaction"]
    anticipation: Annotated[float, ..., "Confidence score for anticipation"]
    
class UserIntent(TypedDict):
    primary: Annotated[str, ..., "Primary intent of the user, can be Informational, Navigational or Transactional"]
    secondary: list[Annotated[str, ..., "List of secondary intents"]]
    call_to_action: Annotated[str, ..., "Call to action if any, e.g., Buy Now, Learn More"]
    
class TrendingIndicators(TypedDict):
    viral_potential: Annotated[float, ..., "Confidence score for viral potential"]
    engagement_predictors: list[Annotated[str, ..., "List of engagement predictors, e.g., Shares, Comments"]]

class MultiLanguage(TypedDict):
    content_analysis: ContentAnalysis = Field(description = "Content analysis of the text")
    topic_classification: TopicClassification = Field(description = "Classification of the main topic")
    entities: list[Entities] = Field(description = "List of named entities mentioned in the text")
    sentiment_analysis: Annotated[str, ..., "Sentiment analysis of the text, can be positive, negative or neutral"]
    hashtags: list[Annotated[str, ..., "List of relevant hashtags for the text"]]
    user_intent: Annotated[str, ..., "Intent of the user, can be Informational, Navigational or Transactional"]
    moderation_flags: list[Annotated[str, ..., "List of moderation flags, can be Hate Speech, Violence, Adult Content"]]
    trending_indicators: list[Annotated[str, ..., "List of trending indicators, can be Viral, Emerging or Declining"]]
    
  


In [30]:
structured_llm = llm.with_structured_output(
    schema = MultiLanguage,
    method = "function_calling"
)

In [31]:
response = structured_llm.invoke("Â¡IncreÃ­ble concierto de Taylor Swift anoche en Madrid! ðŸŽµâœ¨ The production was absolutely amazing and the crowd energy was unmatched. Definitely worth the 200â‚¬ ticket price. #TaylorSwift #Madrid #Concert2024 #WorthIt. Next stop: Paris! Who's going? DM me!")

print(json.dumps(response, indent = 4))

{
    "entities": [
        {
            "value": "Taylor Swift",
            "context": "Taylor Swift anoche en Madrid",
            "type": "Person",
            "confidence": 0.99
        },
        {
            "value": "Madrid",
            "type": "Location",
            "context": "anoche en Madrid",
            "confidence": 0.99
        },
        {
            "value": "Concierto",
            "context": "Incre\u00edble concierto",
            "type": "Event",
            "confidence": 0.95
        },
        {
            "value": "200\u20ac",
            "type": "Money",
            "context": "200\u20ac ticket price",
            "confidence": 0.9
        },
        {
            "value": "Paris",
            "type": "Location",
            "context": "Next stop: Paris",
            "confidence": 0.97
        }
    ],
    "user_intent": "Informational",
    "content_analysis": {
        "language": [
            {
                "text_portions": [
                    "\

### Question 10: Complex Legal Document Analysis with Cross-Reference Validation
**Concept**: Advanced nested Pydantic models with custom validators, cross-field dependencies, and legal domain expertise.

**Description**: Parse complex legal contracts and extract structured information including parties, obligations, terms, dates, conditional clauses, penalties, and legal references. Implement sophisticated validation for legal requirements, date dependencies, monetary calculations, and cross-reference consistency. Handle legal jargon, conditional logic, and hierarchical clause structures.

**Sample Input**: 
```
"SOFTWARE LICENSE AGREEMENT between TechCorp Inc. (Licensor), a Delaware corporation with principal office at 123 Innovation Drive, San Jose, CA 95110, and DataSystems LLC (Licensee), a New York limited liability company. TERM: This agreement commences January 1, 2024 and continues for 36 months, with automatic renewal for successive 12-month periods unless terminated with 90-day written notice. LICENSE FEES: Initial fee $50,000 due upon execution, followed by monthly payments of $8,500 due by the 15th of each month. Late payments incur 1.5% monthly penalty. TERMINATION: Either party may terminate for material breach with 30-day cure period. Upon termination, Licensee must cease use within 7 days and return all materials. GOVERNING LAW: This agreement governed by California law, disputes resolved via binding arbitration in San Francisco County. FORCE MAJEURE: Performance excused for events beyond reasonable control including acts of God, government action, or pandemic declarations lasting more than 60 days."
```

**Expected Output Structure**:
```python
{
    "document_type": "software_license_agreement",
    "execution_date": "extracted_or_null",
    "parties": [
        {
            "role": "licensor",
            "entity_name": "TechCorp Inc.",
            "entity_type": "corporation",
            "jurisdiction": "Delaware",
            "address": {
                "street": "123 Innovation Drive",
                "city": "San Jose", 
                "state": "CA",
                "zip": "95110"
            }
        },
        {
            "role": "licensee",
            "entity_name": "DataSystems LLC",
            "entity_type": "limited_liability_company", 
            "jurisdiction": "New York",
            "address": null
        }
    ],
    "term_structure": {
        "commencement_date": "2024-01-01",
        "initial_term_months": 36,
        "renewal_term_months": 12,
        "renewal_type": "automatic",
        "termination_notice_days": 90,
        "notice_type": "written"
    },
    "financial_terms": {
        "initial_fee": {
            "amount": 50000.00,
            "currency": "USD",
            "due_date": "upon_execution"
        },
        "recurring_payments": {
            "amount": 8500.00,
            "frequency": "monthly",
            "due_day": 15,
            "currency": "USD"
        },
        "penalties": {
            "late_payment_rate": 0.015,
            "calculation_period": "monthly"
        }
    },
    "termination_clauses": [
        {
            "termination_type": "for_cause",
            "trigger": "material_breach", 
            "cure_period_days": 30,
            "notice_required": true
        }
    ],
    "post_termination_obligations": [
        {
            "party": "licensee",
            "action": "cease_use",
            "timeframe_days": 7
        },
        {
            "party": "licensee", 
            "action": "return_materials",
            "timeframe_days": 7
        }
    ],
    "governing_law": {
        "jurisdiction": "California",
        "dispute_resolution": "binding_arbitration",
        "venue": "San Francisco County"
    },
    "force_majeure": {
        "included": true,
        "covered_events": ["acts_of_god", "government_action", "pandemic"],
        "minimum_duration_days": 60,
        "effect": "performance_excused"
    },
    "validation_results": {
        "date_consistency": "valid",
        "financial_calculations": "valid", 
        "legal_requirements": "review_required",
        "cross_references": "valid"
    },
    "risk_flags": [
        "automatic_renewal_clause",
        "broad_force_majeure"
    ]
}
```