In [5]:
import sys
sys.path.append("../")
import pandas as pd
import openpyxl as oxl

from utils.prompts import render
from utils.llm_client import LLMClient
from utils.logging_utils import log_llm_call
from utils.router import pick_model,should_use_reasoning_model
from IPython.display import display, Markdown
from utils.token_utils import count_messages_tokens,fit_within_context
from utils.json_utils import CrisisEvent,pydantic_to_json_schema,parse_json_with_pydantic,validate_json_schema,safe_parse_json

### Load data from text file

In [6]:
file_path = '../data/raw/News feed.txt'
news=[]

with open(file_path, 'r', encoding='utf-8') as f:
    news = [line.strip() for line in f if line.strip()]

print(news)



### Extract JSON data using json_extract.v1

In [7]:
main_schema=pydantic_to_json_schema(CrisisEvent)

prompt_text, spec = render(
    "json_extract.v1",
    schema=main_schema,
    text="{text}"
)

### Validate the JSON data and get the output

In [8]:
model = pick_model('openai', 'general')
client = LLMClient('openai', model)

valid_feed = []

for idx, new in enumerate(news, start=1):
    full_prompt = prompt_text.replace("{text}", new)

    message = [{'role': 'user', 'content': full_prompt}]    
    response = client.chat(message, temperature=0.0)
    
    # Extract raw text from response
    raw_text = response['text'].strip() 
    
    try:
        # Validate using Pydantic model
        validated_obj = CrisisEvent.model_validate_json(raw_text)
        valid_feed.append(validated_obj.model_dump())
        
        display(Markdown(
            f"**Validation Success (ID {idx}):** District: `{validated_obj.district}`"))
        
    except Exception as e:
        snippet = new[:50] + ("..." if len(new) > 50 else "")
        display(Markdown(
            f"**Validation Error (ID {idx}):** `{snippet}`\n\n"
            f"> Error: {str(e)}"
        ))

**Validation Success (ID 1):** District: `Colombo`

**Validation Success (ID 2):** District: `Gampaha`

**Validation Success (ID 3):** District: `Kandy`

**Validation Success (ID 4):** District: `Kalutara`

**Validation Success (ID 5):** District: `Gampaha`

**Validation Success (ID 6):** District: `Colombo`

**Validation Success (ID 7):** District: `Matara`

**Validation Success (ID 8):** District: `Colombo`

**Validation Success (ID 9):** District: `Galle`

**Validation Success (ID 10):** District: `Gampaha`

**Validation Success (ID 11):** District: `Colombo`

**Validation Success (ID 12):** District: `None`

**Validation Success (ID 13):** District: `Kandy`

**Validation Success (ID 14):** District: `Gampaha`

**Validation Success (ID 15):** District: `Nuwara Eliya`

**Validation Success (ID 16):** District: `Gampaha`

**Validation Success (ID 17):** District: `Ratnapura`

**Validation Success (ID 18):** District: `Colombo`

**Validation Success (ID 19):** District: `Kalutara`

**Validation Success (ID 20):** District: `Colombo`

**Validation Success (ID 21):** District: `Gampaha`

**Validation Success (ID 22):** District: `Kandy`

**Validation Success (ID 23):** District: `None`

**Validation Success (ID 24):** District: `Kalutara`

**Validation Success (ID 25):** District: `Matara`

**Validation Success (ID 26):** District: `Colombo`

**Validation Success (ID 27):** District: `Galle`

**Validation Success (ID 28):** District: `Gampaha`

**Validation Success (ID 29):** District: `None`

**Validation Success (ID 30):** District: `Kegalle`

### Convert valid objects to a Pandas DataFrame

In [9]:
df = pd.DataFrame(valid_feed)
df.head()

Unnamed: 0,district,province,flood_level_meters,vicm_count,main_need,status
0,Colombo,Western,9.5,0,General Assistance,Critical
1,Gampaha,Western,,0,Rescue,Critical
2,Kandy,Central,,0,General Assistance,Stable
3,Kalutara,Western,,0,Rescue,Critical
4,Gampaha,Western,2.0,500,Food/Water,Critical


### Save the data into excel file

In [10]:
df.to_excel('../data/output/flood_report.xlsx', index=False)