In [2]:
from typing import List, Optional
from pydantic import BaseModel, Field
from langchain.chains import create_extraction_chain_pydantic

In [16]:
import os
import google.generativeai as genai

In [18]:
from google import genai
from google.genai import types

In [24]:
tracphone_message_content = """## Tracfone Broadband Plan Summary - $40 Unlimited Talk & Text

**Plan Details:**

*   **Plan Name:** Broadband Unlimited Talk and Text with 20GB of Data
*   **Price:** $40.00 per month
*   **Hotspot Capable:** Yes
*   **ID Protection:** Included
*   **Contract Required:** No

**Additional Charges & Terms:**

*   **Activation Fee:** $0.00
*   **Provider Monthly Fees:** $0.00
*   **One-Time Fees at Purchase:** Varies
*   **Federal Universal Service Fund (FUSF):** $0.23
*   **Regulatory Cost Recovery:** $0.08
*   **Early Termination Fee:** $0.00
*   **Government Taxes:** Varies by location

**Speeds:**

*   **Typical Download Speed:** 35-143 Mbps (5G)
*   **Typical Upload Speed:** 5-31 Mbps
*   **Typical Latency:** 42-64 ms

**Data Included:**

*   20 GB of Data

**General Features:**

*   Speeds provided are typical and may vary.
*   Speeds are based on 5G network.
*   Access to FCC Consumer Resource Center: fcc.gov/consumer"""

In [26]:
impoved_system_message = f"""
You are an intelligent assistant that extracts structured data about broadband internet plans.

Your task is to convert any descriptive text into a JSON object that matches the following Pydantic schema exactly.

Make sure:
- All required fields are included.
- All values are realistic and contextually accurate.
- Use correct data types and field formats (e.g., URLs, phone numbers, dollar amounts).
- Format monetary values as strings (e.g., "$price", "$price/line").
- Include nested objects and lists where appropriate.

Here is the schema:

```python
class Fee(BaseModel):
    name: str  # Name or description of the fee
    amount: str  # Monetary value of the fee (e.g., "$price", "Varies")

class SpeedInfo(BaseModel):
    network_type: str  # E.g., "5G", "Fiber"
    typical_download_speed: str  # E.g., "number Mbps"
    typical_upload_speed: str  # E.g., "number Mbps"
    typical_latency: str  # E.g., "number ms"

class NetworkPolicy(BaseModel):
    title: str  # Policy name
    policy_link: HttpUrl  # Link to full policy

class CustomerSupport(BaseModel):
    website: HttpUrl
    phone_number: str

class BroadbandFacts(BaseModel):
    provider: str
    plan_name: str
    disclosure_title: str
    monthly_price: str
    monthly_price_notes: List[str]
    additional_fees: List[Fee]
    one_time_fees: List[Fee]
    early_termination: List[Fee]
    federal_universal_fund_fee: List[Fee]
    regulatory_cost_recovery: List[Fee]
    government_taxes: List[Fee]
    discounts_and_bundles_link: Optional[HttpUrl]
    speeds: List[SpeedInfo]
    data_included: str
    additional_data_charges: str
    network_policies: List[NetworkPolicy]
    customer_support: CustomerSupport
    fcc_consumer_info_link: HttpUrl
    reference_code: Optional[str]"""

In [20]:
# create client
client = genai.Client(api_key="<API KEY>")

In [None]:
# response = client.models.generate_content(
#     model='gemini-2.0-flash-lite',
#     contents='List a few popular cookie recipes.',
#     config={
#         'response_mime_type': 'application/json',
#         'response_schema': list[Recipe],
#     },
# )

In [37]:
from pydantic import BaseModel, HttpUrl, Field
from typing import Optional, List

class Fee(BaseModel):
    name: str = Field(..., description="Name or description of the fee (e.g., 'Activation Fee').")
    amount: str = Field(..., description="Monetary value of the fee (e.g., '$35', 'Varies').")

class SpeedInfo(BaseModel):
    network_type: str = Field(..., description="Type of network (e.g., '5G', '4G LTE', 'Fiber').")
    typical_download_speed: str = Field(..., description="Typical download speed users can expect (e.g., '300 Mbps').")
    typical_upload_speed: str = Field(..., description="Typical upload speed users can expect (e.g., '20 Mbps').")
    typical_latency: str = Field(..., description="Typical network latency (e.g., '20 ms').")

class NetworkPolicy(BaseModel):
    title: str = Field(..., description="Title or brief description of the network policy (e.g., 'Data Throttling Policy').")
    policy_link: HttpUrl = Field(..., description="URL to the full network management policy.")

class CustomerSupport(BaseModel):
    website: HttpUrl = Field(..., description="Official customer support website URL.")
    phone_number: str = Field(..., description="Customer support phone number.")

class BroadbandFacts(BaseModel):
    provider: str = Field(..., description="Name of the broadband service provider.")
    plan_name: str = Field(..., description="Name of the broadband plan being described.")
    disclosure_title: str = Field(..., description="Title of the disclosure document (e.g., 'Broadband Consumer Label').")
    monthly_price: str = Field(..., description="Advertised monthly price per line or plan (e.g., '$65').")
    monthly_price_notes: List[str] = Field(..., description="Additional notes about monthly pricing (e.g., discount conditions, taxes, or fees).")
    
    additional_fees: List[Fee] = Field(..., description="List of recurring or optional additional fees.")
    one_time_fees: List[Fee] = Field(..., description="List of one-time fees such as activation or setup charges.")
    early_termination: List[Fee] = Field(..., description="Fees applicable if service is cancelled early.")
    federal_universal_fund_fee: List[Fee] = Field(..., description="Fees related to federal Universal Service Fund.")
    regulatory_cost_recovery: List[Fee] = Field(..., description="Regulatory recovery costs or related charges.")
    government_taxes: List[Fee] = Field(..., description="Applicable government taxes.")
    
    discounts_and_bundles_link: Optional[HttpUrl] = Field(None, description="Link to available discounts or bundles, if any.")
    
    speeds: List[SpeedInfo] = Field(..., description="Details about typical speeds and latency for the service.")
    data_included: str = Field(..., description="Amount of data included in the plan (e.g., 'Unlimited', '200 GB').")
    additional_data_charges: str = Field(..., description="Fees or throttling policy once the included data limit is exceeded.")


In [33]:
import os
if "GOOGLE_API_KEY" not in os.environ:
    os.environ["GOOGLE_API_KEY"] = "<API KEY>"

In [44]:
from google.genai.types import GenerateContentConfig, Part, SafetySetting

In [69]:
from neo4j import GraphDatabase
import json

In [52]:
from langchain_google_genai import ChatGoogleGenerativeAI

# Initialize the model
llm = ChatGoogleGenerativeAI(model="gemini-2.0-flash", temperature=0.1)
structured_llm = llm.with_structured_output(BroadbandFacts)

messages = [
    ("system", impoved_system_message),
    ("human", tracphone_message_content),
]
# Invoke the model with a query asking for structured information
result = structured_llm.invoke(messages)
print(result) 

provider='Tracfone' plan_name='Broadband Unlimited Talk and Text with 20GB of Data' disclosure_title='Broadband Consumer Label' monthly_price='$40.00' monthly_price_notes=['per month'] additional_fees=[] one_time_fees=[Fee(name='One-Time Fees at Purchase', amount='Varies'), Fee(name='Activation Fee', amount='$0.00'), Fee(name='Provider Monthly Fees', amount='$0.00')] early_termination=[Fee(name='Early Termination Fee', amount='$0.00')] federal_universal_fund_fee=[Fee(name='Federal Universal Service Fund (FUSF)', amount='$0.23')] regulatory_cost_recovery=[Fee(name='Regulatory Cost Recovery', amount='$0.08')] government_taxes=[Fee(name='Government Taxes', amount='Varies by location')] discounts_and_bundles_link=None speeds=[SpeedInfo(network_type='5G', typical_download_speed='35-143 Mbps', typical_upload_speed='5-31 Mbps', typical_latency='42-64 ms')] data_included='20 GB' additional_data_charges='Check terms and conditions'


In [64]:
# ---- Data ----

provider = 'Tracfone'
plan_name = 'Broadband Unlimited Talk and Text with 20GB of Data'
disclosure_title = 'Broadband Consumer Label'
monthly_price = '$40.00'
monthly_price_notes = ['per month']
additional_fees: List[Fee] = []
one_time_fees = [
    Fee(name='One-Time Fees at Purchase', amount='Varies'),
    Fee(name='Activation Fee', amount='$0.00'),
    Fee(name='Provider Monthly Fees', amount='$0.00')
]
early_termination = [Fee(name='Early Termination Fee', amount='$0.00')]
federal_universal_fund_fee = [Fee(name='Federal Universal Service Fund (FUSF)', amount='$0.23')]
regulatory_cost_recovery = [Fee(name='Regulatory Cost Recovery', amount='$0.08')]
government_taxes = [Fee(name='Government Taxes', amount='Varies by location')]
discounts_and_bundles_link = None
speeds = [SpeedInfo(
    network_type='5G',
    typical_download_speed='35-143 Mbps',
    typical_upload_speed='5-31 Mbps',
    typical_latency='42-64 ms'
)]
data_included = '20 GB'
additional_data_charges = 'Check terms and conditions'

# ---- Neo4j connection ----


In [71]:

# Replace with your Neo4j credentials
NEO4J_URI = "neo4j+s://3ea3293d.databases.neo4j.io"
NEO4J_USER = "neo4j"
NEO4J_PASSWORD = "<API KEY>"

driver = GraphDatabase.driver(NEO4J_URI, auth=(NEO4J_USER, NEO4J_PASSWORD))

def create_broadband_plan(tx):
    query = """
    MERGE (p:Provider {name: $provider})
    CREATE (plan:Plan {
        name: $plan_name,
        disclosure_title: $disclosure_title,
        monthly_price: $monthly_price,
        data_included: $data_included,
        additional_data_charges: $additional_data_charges
    })
    CREATE (p)-[:OFFERS]->(plan)
    
    FOREACH (note IN $monthly_price_notes | 
        CREATE (:PriceNote {note: note})-[:APPLIES_TO]->(plan)
    )
    
    FOREACH (fee IN $one_time_fees |
        CREATE (f:Fee {name: fee.name, amount: fee.amount})
        CREATE (plan)-[:HAS_ONE_TIME_FEE]->(f)
    )
    FOREACH (fee IN $early_termination |
        CREATE (f:Fee {name: fee.name, amount: fee.amount})
        CREATE (plan)-[:HAS_EARLY_TERMINATION_FEE]->(f)
    )
    FOREACH (fee IN $federal_universal_fund_fee |
        CREATE (f:Fee {name: fee.name, amount: fee.amount})
        CREATE (plan)-[:HAS_FEDERAL_FEE]->(f)
    )
    FOREACH (fee IN $regulatory_cost_recovery |
        CREATE (f:Fee {name: fee.name, amount: fee.amount})
        CREATE (plan)-[:HAS_REGULATORY_FEE]->(f)
    )
    FOREACH (fee IN $government_taxes |
        CREATE (f:Fee {name: fee.name, amount: fee.amount})
        CREATE (plan)-[:HAS_TAX]->(f)
    )
    
    FOREACH (s IN $speeds |
        CREATE (sp:SpeedInfo {
            network_type: s.network_type,
            download: s.typical_download_speed,
            upload: s.typical_upload_speed,
            latency: s.typical_latency
        })
        CREATE (plan)-[:HAS_SPEED_INFO]->(sp)
    )
    """
    tx.run(query,
           provider=provider,
           plan_name=plan_name,
           disclosure_title=disclosure_title,
           monthly_price=monthly_price,
           data_included=data_included,
           additional_data_charges=additional_data_charges,
           monthly_price_notes=monthly_price_notes,
           one_time_fees=[fee.dict() for fee in one_time_fees],
           early_termination=[fee.dict() for fee in early_termination],
           federal_universal_fund_fee=[fee.dict() for fee in federal_universal_fund_fee],
           regulatory_cost_recovery=[fee.dict() for fee in regulatory_cost_recovery],
           government_taxes=[fee.dict() for fee in government_taxes],
           speeds=[s.dict() for s in speeds]
    )

# ---- Run the transaction ----

with driver.session() as session:
    session.write_transaction(create_broadband_plan)

print("Broadband plan uploaded to Neo4j.")

  session.write_transaction(create_broadband_plan)
C:\Users\krish\AppData\Local\Temp\ipykernel_12780\1955327189.py:63: PydanticDeprecatedSince20: The `dict` method is deprecated; use `model_dump` instead. Deprecated in Pydantic V2.0 to be removed in V3.0. See Pydantic V2 Migration Guide at https://errors.pydantic.dev/2.11/migration/
  one_time_fees=[fee.dict() for fee in one_time_fees],
C:\Users\krish\AppData\Local\Temp\ipykernel_12780\1955327189.py:64: PydanticDeprecatedSince20: The `dict` method is deprecated; use `model_dump` instead. Deprecated in Pydantic V2.0 to be removed in V3.0. See Pydantic V2 Migration Guide at https://errors.pydantic.dev/2.11/migration/
  early_termination=[fee.dict() for fee in early_termination],
C:\Users\krish\AppData\Local\Temp\ipykernel_12780\1955327189.py:65: PydanticDeprecatedSince20: The `dict` method is deprecated; use `model_dump` instead. Deprecated in Pydantic V2.0 to be removed in V3.0. See Pydantic V2 Migration Guide at https://errors.pydantic

Broadband plan uploaded to Neo4j.
