## Updated the code and it still doesn't work I give up

In [1]:
import requests
import pandas as pd
import xml.etree.ElementTree as ET
from urllib.parse import urlparse, parse_qs, urlencode, urlunparse

BASE_URLS = [
    "https://sdg6data.org",
    "https://sdg6-backup.stage.rw1.co.za",
]

DEFAULT_TIMEOUT = 25
DEFAULT_HEADERS = {
    "User-Agent": "SDG6-Client/1.0 (+https://example.org)",
}

def build_api_url(base, indicator_code, fmt="xml", params=None):
    """
    Construct a SDG6 API indicator URL.
    Example: https://sdg6data.org/api/indicator/6.1.1?_format=xml&per_page=1000&page=0
    """
    if params is None:
        params = {}
    params = {"_format": fmt, **params}
    path = f"/api/indicator/{indicator_code}"
    query = urlencode(params, doseq=True)
    return urlunparse(("https", urlparse(base).netloc, path, "", query, ""))

def fetch_text_with_fallback(indicator_code, fmt="xml", params=None, timeout=DEFAULT_TIMEOUT):
    """
    Try primary, then backup. Returns (text, final_url, base_used).
    Raises the last exception if all attempts fail.
    """
    last_err = None
    for base in BASE_URLS:
        url = build_api_url(base, indicator_code, fmt=fmt, params=params)
        try:
            resp = requests.get(url, headers=DEFAULT_HEADERS, timeout=timeout)
            if resp.ok:
                return resp.text, url, base
            else:
                last_err = RuntimeError(f"{base} returned HTTP {resp.status_code} for {url}")
        except Exception as e:
            last_err = e
    raise last_err

def convert_xml_to_dataframe(xml_text):
    """
    Parse SDG6 XML into a tidy DataFrame with common fields:
    Region, GeoAreaCode, Indicator, SeriesDescription, TimePeriod, Value, Location
    We scan for any element that contains these expected children.
    """
    root = ET.fromstring(xml_text)

    rows = []
    for node in root.iter():
        geo = node.find("GeoAreaName")
        val = node.find("Value")
        if geo is not None and val is not None:
            row = {
                "Region": (geo.text or "").strip(),
                "GeoAreaCode": (node.findtext("GeoAreaCode") or "").strip(),
                "Indicator": (node.findtext("Indicator") or "").strip(),
                "SeriesDescription": (node.findtext("SeriesDescription") or "").strip(),
                "TimePeriod": (node.findtext("TimePeriod") or "").strip(),
                "Value": pd.to_numeric((val.text or "").strip(), errors="coerce"),
                "Location": (node.findtext("Location") or "").strip(),
            }
            rows.append(row)

    if not rows:
        raise ValueError("No data rows detected in XML. The response might be pagination-only or empty.")
    return pd.DataFrame(rows)

def convert_json_to_dataframe(json_text):
    df = pd.read_json(json_text)
    if df.shape[1] == 1 and isinstance(df.iloc[0,0], (list, dict)):
        df = pd.json_normalize(df.iloc[:,0])
    return df

def convert_to_csv(df, output_file):
    df.to_csv(output_file, index=False)


indicator_code = "6.1.1"
fmt = "xml"  # or "json"
params = {"per_page": 1000, "page": 0}

data_text, used_url, used_base = fetch_text_with_fallback(indicator_code, fmt=fmt, params=params)

q = parse_qs(urlparse(used_url).query)
data_format = (q.get("_format", ["xml"])[0] or "xml").lower()

if data_format == "xml":
    df = convert_xml_to_dataframe(data_text)
else:
    df = convert_json_to_dataframe(data_text)

output_file = f"data_{indicator_code}.{data_format}.csv"
convert_to_csv(df, output_file)
print(f"Fetched from: {used_base}\nSaved: {output_file}")


weights = {
    "WaterScarcityScore": 0.4,
    "ContaminationScore": 0.3,
    "VulnerabilityScore": 0.3,
}

needed_cols = set(weights.keys())
if needed_cols.issubset(df.columns):
    df["WeightedScore"] = (
        df["WaterScarcityScore"] * weights["WaterScarcityScore"]
        + df["ContaminationScore"] * weights["ContaminationScore"]
        + df["VulnerabilityScore"] * weights["VulnerabilityScore"]
    )
else:
    if "Value" not in df.columns:
        raise KeyError("Neither custom score columns nor 'Value' found in the dataset.")
    df["WeightedScore"] = pd.to_numeric(df["Value"], errors="coerce")

if "TimePeriod" in df.columns:
    df["_Year"] = pd.to_numeric(df["TimePeriod"], errors="coerce")
    df_latest = df.sort_values(["Region", "_Year"]).groupby("Region", as_index=False).tail(1)
else:
    df_latest = df.copy()

sorted_data = df_latest.sort_values(by="WeightedScore", ascending=False)
print(sorted_data[["Region", "TimePeriod", "WeightedScore"]].head(5).to_string(index=False))


RuntimeError: https://sdg6-backup.stage.rw1.co.za returned HTTP 500 for https://sdg6-backup.stage.rw1.co.za/api/indicator/6.1.1?_format=xml&per_page=1000&page=0

In [None]:
import requests

def check_api_status_with_fallback(indicator_code="6.1.1", fmt="xml", params=None):
    if params is None:
        params = {"per_page": 1, "page": 0}
    final_status = None
    final_url = None

    for base in BASE_URLS:
        url = build_api_url(base, indicator_code, fmt=fmt, params=params)
        try:
            resp = requests.head(url, headers=DEFAULT_HEADERS, allow_redirects=True, timeout=DEFAULT_TIMEOUT)
            if not resp.ok:
                resp = requests.get(url, headers=DEFAULT_HEADERS, stream=True, timeout=DEFAULT_TIMEOUT)
            final_status = resp.status_code
            final_url = resp.url
            break
        except requests.RequestException:
            continue

    return final_status, final_url

status_code, error_url = check_api_status_with_fallback("6.1.1", fmt="xml")

if status_code is not None:
    print(f"API server status code: {status_code}")
    if status_code >= 500 and error_url:
        print(f"Server error URL: {error_url}")
else:
    print("Could not reach either the primary or the backup API.")


In [None]:
import requests

def debug_request(indicator_code="6.1.1", fmt="xml", params=None):
    if params is None:
        params = {"per_page": 1, "page": 0}
    for base in BASE_URLS:
        url = build_api_url(base, indicator_code, fmt=fmt, params=params)
        try:
            resp = requests.head(url, headers=DEFAULT_HEADERS, allow_redirects=True, timeout=DEFAULT_TIMEOUT)
        except requests.RequestException:
            try:
                resp = requests.get(url, headers=DEFAULT_HEADERS, stream=True, timeout=DEFAULT_TIMEOUT)
            except requests.RequestException as e:
                print(f"[{base}] Error connecting: {e}")
                continue

        req = resp.request
        print(f"Request: {req.method} {req.url}")
        print("Request Headers:")
        for name, value in req.headers.items():
            print(f"{name}: {value}")
        print(f"Response status: {resp.status_code}")
        print(f"Final URL (after redirects): {resp.url}\n")
        break
debug_request("6.1.1", fmt="xml")
