In [1]:
import sys
sys.path.append("../")
import pandas as pd
import openpyxl as oxl

from utils.prompts import render
from utils.llm_client import LLMClient
from utils.logging_utils import log_llm_call
from utils.router import pick_model,should_use_reasoning_model
from IPython.display import display, Markdown
from utils.token_utils import count_messages_tokens,fit_within_context
from utils.json_utils import CrisisEvent,pydantic_to_json_schema,parse_json_with_pydantic,validate_json_schema,safe_parse_json

### Load data from text file

In [2]:
file_path = '../data/raw/News feed.txt'
news=[]

with open(file_path, 'r', encoding='utf-8') as f:
    news = [line.strip() for line in f if line.strip()]

print(news)



### Extract JSON data using json_extract.v1

In [3]:
main_schema=pydantic_to_json_schema(CrisisEvent)

prompt_text, spec = render(
    "json_extract.v1",
    schema=main_schema,
    text="{text}"
)

### Validate the JSON data and get the output

In [None]:
model = pick_model('openai', 'general')
client = LLMClient('openai', model)

valid_feed = []

for idx, new in enumerate(news, start=1):
    full_prompt = prompt_text.replace("{text}", new)

    message = [{'role': 'user', 'content': full_prompt}]    
    response = client.chat(message, temperature=0.0)
    
    # Extract raw text from response
    raw_text = response['text'].strip() 
    
    try:
        # Validate using Pydantic model
        validated_obj = CrisisEvent.model_validate_json(raw_text)
        valid_feed.append(validated_obj.model_dump())
        
        display(Markdown(
            f"**Validation Success (ID {idx}):** District: `{validated_obj.district}` | "
            f"Province: `{validated_obj.province}` | "
            f"Flood Level: `{validated_obj.flood_level_meters}` | "
            f"Victims: `{validated_obj.vicm_count}` | "
            f"Main Need: `{validated_obj.main_need}` | "
            f"Status: `{validated_obj.status}`"
        ))
        
    except Exception as e:
        snippet = new[:50] + ("..." if len(new) > 50 else "")
        display(Markdown(
            f"**Validation Error (ID {idx}):** `{snippet}`\n\n"
            f"> Error: {str(e)}"
        ))

**Validation Success (ID 1):** District: `Colombo` | Province: `Western` | Flood Level: `9.5` | Victims: `0` | Main Need: `General Assistance` | Status: `Critical`

**Validation Success (ID 2):** District: `Gampaha` | Province: `Western` | Flood Level: `None` | Victims: `5` | Main Need: `Rescue` | Status: `Critical`

**Validation Success (ID 3):** District: `Kandy` | Province: `Central` | Flood Level: `None` | Victims: `0` | Main Need: `General Assistance` | Status: `Stable`

**Validation Success (ID 4):** District: `Kalutara` | Province: `Western` | Flood Level: `None` | Victims: `0` | Main Need: `Rescue` | Status: `Critical`

**Validation Success (ID 5):** District: `Gampaha` | Province: `Western` | Flood Level: `2.0` | Victims: `0` | Main Need: `Food/Water` | Status: `Critical`

**Validation Success (ID 6):** District: `Colombo` | Province: `Western` | Flood Level: `None` | Victims: `0` | Main Need: `General Assistance` | Status: `Stable`

**Validation Success (ID 7):** District: `Matara` | Province: `Southern` | Flood Level: `None` | Victims: `0` | Main Need: `General Assistance` | Status: `Stable`

**Validation Success (ID 8):** District: `Colombo` | Province: `Western` | Flood Level: `1.5` | Victims: `0` | Main Need: `Rescue` | Status: `Critical`

**Validation Success (ID 9):** District: `Galle` | Province: `Southern` | Flood Level: `None` | Victims: `0` | Main Need: `General Assistance` | Status: `Stable`

**Validation Success (ID 10):** District: `Gampaha` | Province: `Western` | Flood Level: `None` | Victims: `0` | Main Need: `General Assistance` | Status: `Warning`

**Validation Success (ID 11):** District: `Colombo` | Province: `Western` | Flood Level: `None` | Victims: `0` | Main Need: `Evacuation` | Status: `Critical`

ClientError: 429 RESOURCE_EXHAUSTED. {'error': {'code': 429, 'message': 'You exceeded your current quota, please check your plan and billing details. For more information on this error, head to: https://ai.google.dev/gemini-api/docs/rate-limits. To monitor your current usage, head to: https://ai.dev/usage?tab=rate-limit. \n* Quota exceeded for metric: generativelanguage.googleapis.com/generate_content_free_tier_requests, limit: 10, model: gemini-2.5-flash-lite\nPlease retry in 45.473762552s.', 'status': 'RESOURCE_EXHAUSTED', 'details': [{'@type': 'type.googleapis.com/google.rpc.Help', 'links': [{'description': 'Learn more about Gemini API quotas', 'url': 'https://ai.google.dev/gemini-api/docs/rate-limits'}]}, {'@type': 'type.googleapis.com/google.rpc.QuotaFailure', 'violations': [{'quotaMetric': 'generativelanguage.googleapis.com/generate_content_free_tier_requests', 'quotaId': 'GenerateRequestsPerMinutePerProjectPerModel-FreeTier', 'quotaDimensions': {'location': 'global', 'model': 'gemini-2.5-flash-lite'}, 'quotaValue': '10'}]}, {'@type': 'type.googleapis.com/google.rpc.RetryInfo', 'retryDelay': '45s'}]}}

### Convert valid objects to a Pandas DataFrame

In [None]:
df = pd.DataFrame(valid_feed)
df.head()

Unnamed: 0,district,province,flood_level_meters,vicm_count,main_need,status
0,Colombo,Western,9.5,0,General Assistance,Critical
1,Gampaha,Western,,0,Rescue,Critical
2,Kandy,Central,,0,General Assistance,Stable
3,Kalutara,Western,,0,Rescue,Critical
4,Gampaha,Western,2.0,500,Food/Water,Critical


### Save the data into excel file

In [None]:
df.to_excel('../data/output/flood_report.xlsx', index=False)