In [None]:
import sys
import pathlib
import pandas as pd

project_root = pathlib.Path.cwd().resolve()
while not (project_root / "backend").is_dir():
    if project_root.parent == project_root:
        raise RuntimeError("Could not find project root containing 'backend/'")
    project_root = project_root.parent

if str(project_root) not in sys.path:
    sys.path.insert(0, str(project_root))

from backend.utils import fetch_metrics, country_data_fetch
from backend import constants, data_retrieval, data_push

# --- Paths (used if we need to look up country/name mappings) ---------------
BACKEND_DIR      = project_root / "backend"
PROCESSED_DATA   = BACKEND_DIR / "data" / "wb_panel_wide"
RAW_DATA_EXCEL   = BACKEND_DIR / "data" / "country_data.xlsx"

## Pick One Country

In [8]:
data_path = project_root / "backend" / "data" / "country_data.xlsx"
country_data_df = pd.read_excel(data_path)
countries: list = country_data_df["Country_Name"].values.tolist()

# Testing Program for Random Country
selected_country = countries[13] # ---- Pick the country here
country_iso_code = country_data_df[country_data_df["Country_Name"] == selected_country]["iso2Code"].values.tolist()[0]

# Fetch All Data Points For A Random Country
country_data = fetch_metrics.build_country_panel(country_iso_code, constants.INDICATORS)
print(f"Country: {selected_country} | Country ISO Code: {country_iso_code}")
country_data

Country: Germany | Country ISO Code: DE


Unnamed: 0,INFLATION,UNEMPLOYMENT,FDI_PCT_GDP,POL_STABILITY,RULE_OF_LAW,CONTROL_CORRUPTION,GINI_INDEX,GDP_PC_GROWTH,INT_PAYM_PCT_REV
1960,1.536612,,,,,,,,
1961,2.293695,,,,,,,3.498576,
1962,2.843270,,,,,,,3.707413,
1963,2.966960,,,,,,,1.788486,
1964,2.335736,,,,,,,5.784319,
...,...,...,...,...,...,...,...,...,...
2020,0.144878,3.881,4.486666,0.645049,1.520128,1.825342,32.4,-4.173453,1.064802
2021,3.066667,3.594,2.934708,0.725788,1.571805,1.782925,,3.626129,0.976158
2022,6.872574,3.120,2.094713,0.628492,1.531995,1.818363,,0.641609,1.476653
2023,5.946437,3.068,1.711092,0.586989,1.551208,1.664166,,-0.389989,


## Ensure Processed World Bank Panel Exists

In [None]:
# Bootstrap once: if PROCESSED_DATA (/data/wb_panel_wide) is missing, create it,
# then build the wide World Bank indicator panel for `country_iso_code` and
# ingest/write it as a Parquet file; if the directory already exists, skip.
panel_dir = PROCESSED_DATA
if not panel_dir.is_dir():
    panel_dir.mkdir(parents=True, exist_ok=True)
    panel = fetch_metrics.build_country_panel(
        country_iso_code,
        indicators=constants.INDICATORS,
        start=None,
        end=None,
        tidy_fetch=True,
    )
    country_data_fetch.ingest_panel_wide(panel, country_iso_code, root=PROCESSED_DATA,)

## Read Country Parquet File and Prepare Data for LLM Inference

In [10]:
# Build a human-readable LLM input payload for `country_iso_code`
payload = data_retrieval.prepare_llm_payload_pretty(
    country_iso=country_iso_code,
    indicators=constants.INDICATORS,
    since=2015,
    lookback=10,
    deltas=(1, 5),
)
payload

{'country': 'DE',
 'latest_year': 2024,
 'indicators': {'Inflation (% y/y)': {'latest': 2.26,
   'Δ1y': -0.621,
   'Δ5y': 0.561,
   'series': {2015: 0.51,
    2016: 0.49,
    2017: 1.51,
    2018: 1.73,
    2019: 1.45,
    2020: 0.14,
    2021: 3.07,
    2022: 6.87,
    2023: 5.95,
    2024: 2.26}},
  'Unemployment (% labour force)': {'latest': 3.41,
   'Δ1y': 0.11,
   'Δ5y': 0.077,
   'series': {2015: 4.61,
    2016: 4.1,
    2017: 3.78,
    2018: 3.38,
    2019: 3.16,
    2020: 3.88,
    2021: 3.59,
    2022: 3.12,
    2023: 3.07,
    2024: 3.41}},
  'FDI inflow (% GDP)': {'latest': 1.02,
   'Δ1y': -0.403,
   'Δ5y': -0.464,
   'series': {2015: 1.82,
    2016: 1.64,
    2017: 2.89,
    2018: 4.0,
    2019: 1.91,
    2020: 4.49,
    2021: 2.93,
    2022: 2.09,
    2023: 1.71,
    2024: 1.02}},
  'Political stability (z-score)': {'latest': None,
   'Δ1y': None,
   'Δ5y': None,
   'series': {2015: 0.69,
    2016: 0.67,
    2017: 0.57,
    2018: 0.58,
    2019: 0.55,
    2020: 0.65,
    2

## Add Country Data and Most Recent Geopolitical/Economic News Links Into LLM Prompt

In [None]:
import sys
import json
import pathlib
from datetime import datetime, timedelta, timezone

# --- Resolve project root so "backend/" is importable ------------------------
project_root = pathlib.Path.cwd().resolve()
while not (project_root / "backend").is_dir():
    if project_root.parent == project_root:
        raise RuntimeError("Could not find project root containing 'backend/'")
    project_root = project_root.parent

if str(project_root) not in sys.path:
    sys.path.insert(0, str(project_root))

# --- Imports after sys.path tweak -------------------------------------------
from backend.ai import langchain_llm
from backend.utils import fetch_links
from backend import constants, data_retrieval

# --- Helpers ----------------------------------------------------------------
def _parse_iso(s: str):
    if not s:
        return None
    try:
        return datetime.fromisoformat(s.replace("Z", "+00:00"))
    except Exception:
        return None

def _clip_words(s: str, max_words: int = 600) -> str:
    parts = s.split()
    return " ".join(parts[:max_words])

# ---------------------------------------------------------------------------
# 1) Macro payload (pretty, JSON-serializable)
# ---------------------------------------------------------------------------
payload = data_retrieval.prepare_llm_payload_pretty(
    country_iso=country_iso_code,
    indicators=constants.INDICATORS,
    since=2015,
    lookback=10,
    deltas=(1, 5),
)

# ---------------------------------------------------------------------------
# 2) Google News (expanded with extracted article text) + recency/dedup
# ---------------------------------------------------------------------------
DAYS = 365
query = f'{selected_country} (economy OR politics OR conflict OR sanctions OR inflation OR war)'

items = fetch_links.gnews_rss(
    query=query,
    max_results=10,
    expand=True,         # fetch & extract article body
    extract_chars=3500,  # cap per-article text
    lang="en",
    country="US",
    build_summary=True,
    summary_words=240,
) or []

# 2a) Filter by recency
cutoff = datetime.now(timezone.utc) - timedelta(days=DAYS)
items = [
    it for it in items
    if (dt := _parse_iso(it.get("published") or "")) is None or dt >= cutoff
]

# 2b) Deduplicate by title & normalize `summary`
dedup = []
seen_titles = set()
for it in items:
    title = (it.get("title") or "").strip()
    if not title or title in seen_titles:
        continue
    seen_titles.add(title)
    body = (it.get("summary") or it.get("text") or it.get("snippet") or "").strip()
    it2 = dict(it)
    it2["summary"] = _clip_words(body, max_words=240)
    dedup.append(it2)
articles = dedup[:10]  # ensure max 10

# ---------------------------------------------------------------------------
# 3) LLM: classify qualitative subscores + per-article impacts (schema-locked)
# ---------------------------------------------------------------------------
llm_output = langchain_llm.country_llm_score(
    country_display=selected_country,
    payload=payload,
    articles=articles,
    model="gpt-4o-2024-08-06",
    temperature=0.0,
    seed=42,
)

# ---------------------------------------------------------------------------
# 4) Combine & print
# ---------------------------------------------------------------------------
report = {**payload, "llm_output": llm_output}
print(json.dumps(report, indent=2, ensure_ascii=False))

{
  "country": "DE",
  "latest_year": 2024,
  "indicators": {
    "Inflation (% y/y)": {
      "latest": 2.26,
      "Δ1y": -0.621,
      "Δ5y": 0.561,
      "series": {
        "2015": 0.51,
        "2016": 0.49,
        "2017": 1.51,
        "2018": 1.73,
        "2019": 1.45,
        "2020": 0.14,
        "2021": 3.07,
        "2022": 6.87,
        "2023": 5.95,
        "2024": 2.26
      }
    },
    "Unemployment (% labour force)": {
      "latest": 3.41,
      "Δ1y": 0.11,
      "Δ5y": 0.077,
      "series": {
        "2015": 4.61,
        "2016": 4.1,
        "2017": 3.78,
        "2018": 3.38,
        "2019": 3.16,
        "2020": 3.88,
        "2021": 3.59,
        "2022": 3.12,
        "2023": 3.07,
        "2024": 3.41
      }
    },
    "FDI inflow (% GDP)": {
      "latest": 1.02,
      "Δ1y": -0.403,
      "Δ5y": -0.464,
      "series": {
        "2015": 1.82,
        "2016": 1.64,
        "2017": 2.89,
        "2018": 4.0,
        "2019": 1.91,
        "2020": 4.49,
    