In [14]:
"""
########################################################################################
# FAO – Events Visualization in Emergencies (EVE) - Flood monitoring platform.
# EVE Data Extraction Script (Tables and Spatial Outputs)
########################################################################################

# Description:
This script programmatically downloads filtered subsets of the EVE **administrative
polygon feature layer** hosted on the FAO Data in Emergencies Hub (DIEM). 
it can export both **tabular data** and **spatial data** in multiple formats.

# Key concept:
- The EVE feature service is queried.
- From that same query, the script can generate:
  - Tabular outputs (CSV, Excel)
  - Spatial outputs (Shapefile, File Geodatabase feature class)

# Functionality:
- Connects to FAO DIEM Hub using DIEM credentials.
- Queries the EVE administrative polygon feature layer.
- Allows filtering by:
  - Country (`ISO3`):
    - A specific ISO3 code (e.g. "MOZ")
    - Empty value to retrieve all countries
  - Period (`PERIOD_NUMBER`):
    - `None`: retrieve **all available periods**
    - Integer: retrieve a **specific biweekly period**
    - `"latest"`: automatically retrieve the **most recent available period**
      - The latest period is dynamically computed (two periods per month,
        starting from January 2024).
      - If the expected latest period has no data, previous periods are checked
        automatically until data is found.
- Automatically builds clear and safe output filenames including:
  - ISO3 code
  - Period number
  - Biweekly group (when available)
  - Timestamp
- Ensures File Geodatabase outputs comply with Esri naming rules.

# Supported outputs (all optional and configurable):
- CSV (tabular, geometry removed)
- Excel (.xlsx, tabular, geometry removed)
- Shapefile (.shp)
- File Geodatabase feature class (.gdb)

# ⚠ Shapefile limitations:
# - When exporting to Shapefile (.shp), field names are limited to 10 characters.
# - Longer field names will be automatically truncated by the Shapefile format.
# - This may result in abbreviated or less readable column names.
# - CSV, Excel, and File Geodatabase outputs preserve full field names.

# Output behavior:
- All outputs are derived from the **same feature layer query**.
- The output folder and geodatabase are created automatically if needed.
- Feature class names are sanitized to avoid File GDB errors.

# Data source:
- EVE administrative polygon feature service
- Used for flood monitoring, exposure analysis, and reporting.

# About EVE:
- EVE (Events Visualization in Emergencies) is an FAO tool for flood monitoring
  and impact analysis, supporting evidence-based decision-making.
- Dashboard:
  https://data-in-emergencies.fao.org/apps/22e659f381fa41e5af05a67db001ac26/explore
- More information:
  https://data-in-emergencies.fao.org/pages/diem_eve

# Requirements:
- A DIEM account with access to the EVE feature services.
- Recommended execution from an ArcGIS Pro Python environment
  when exporting to File Geodatabase.

########################################################################################
"""


import os, re
import datetime
from arcgis.gis import GIS

# =============================================================================
# USER PARAMETERS (edit only this section)
# =============================================================================

# --- Authentication (leave USERNAME/PASS empty to use an existing ArcGIS Pro sign-in via GIS("home")) ---
PORTAL_URL = "https://hqfao-hub.maps.arcgis.com"
USERNAME = ""  
PASS = ""      

# --- Output location ---
OUTPUT_LOCATION = r"C:\temp"  # for CSV/Excel/Shapefile outputs
GDB_PATH = r"C:\temp\evedownloads.gdb"  # only used if exporting to GDB; it will be created if missing

# --- Data filters ---
ISO3 = "MOZ"              # e.g. "BGD" or "" for all countries
PERIOD_NUMBER = "latest"  # None for all periods, an integer (e.g. 27), or "latest"

# --- What to export (single source layer, multiple outputs) ---
EXPORT_CSV = True
EXPORT_EXCEL = True
EXPORT_SHAPEFILE = True
EXPORT_GDB_FEATURECLASS = True

# =============================================================================
# INTERNAL CONSTANTS (do not edit unless needed)
# =============================================================================

FEATURES_ITEM_ID = "ced2d931709b4cbeb68d5e89b93fe0d0"

current_datetime = datetime.datetime.now()
formatted_datetime = current_datetime.strftime("%Y%m%d%H%M%S")


def get_gis():
    if USERNAME and PASS:
        return GIS(PORTAL_URL, USERNAME, PASS)

    print(
        "DIEM credentials not provided.\n"
        "Please set DIEM USERNAME and PASS at the top of the script ")
    return None



def calculate_current_period(now_dt: datetime.datetime) -> int:
    start_year = 2024
    current_year = now_dt.year
    current_month = now_dt.month

    total_periods_since_start = (current_year - start_year) * 24 + (current_month - 1) * 2

    if now_dt.day >= 15:
        total_periods_since_start += 2
    else:
        total_periods_since_start += 1

    return total_periods_since_start


def safe_filename(text: str) -> str:
    return (
        str(text)
        .replace(" ", "_")
        .replace("/", "-")
        .replace("\\", "-")
        .replace(":", "-")
        .replace("*", "")
        .replace("?", "")
        .replace('"', "")
        .replace("<", "")
        .replace(">", "")
        .replace("|", "")
    )


def ensure_gdb_exists(gdb_path: str):
    if os.path.exists(gdb_path):
        return

    if not gdb_path.lower().endswith(".gdb"):
        raise ValueError("GDB_PATH must end with .gdb (file geodatabase path).")

    parent_dir = os.path.dirname(gdb_path)
    os.makedirs(parent_dir, exist_ok=True)

    # Create FGDB using the ArcGIS Python API helper if available
    # Falls back to a clear error if something in the environment is missing.
    try:
        from arcgis.features import GeoAccessor  # noqa: F401
        import arcgis  # noqa: F401
        # Use arcpy if present (most reliable for FGDB creation on Windows + ArcGIS Pro)
        try:
            import arcpy
            gdb_name = os.path.basename(gdb_path)
            arcpy.management.CreateFileGDB(parent_dir, gdb_name)
        except Exception as e:
            raise RuntimeError(
                "Could not create the file geodatabase automatically. "
                "If you are not running inside an ArcGIS Pro conda environment, "
                "create the .gdb manually in ArcGIS Pro and retry."
            ) from e
    except Exception:
        # If arcpy/ArcGIS Pro env is not available, we cannot create a FGDB reliably
        raise RuntimeError(
            "Cannot create a file geodatabase because arcpy is not available in this environment. "
            "Create the .gdb manually in ArcGIS Pro (or run this script from an ArcGIS Pro Python environment)."
        )


def build_where_clause(iso3_value, period_value) -> str:
    clauses = []
    if iso3_value:
        clauses.append(f"adm0_iso3 = '{iso3_value}'")
    if period_value is not None:
        clauses.append(f"period_number = {period_value}")
    return " AND ".join(clauses) if clauses else "1=1"


def resolve_latest_period(layer, iso3_value) -> int:
    candidate = calculate_current_period(current_datetime)
    iso3_clause = f"adm0_iso3 = '{iso3_value}' AND " if iso3_value else ""

    while True:
        print(f"Trying period_number: {candidate}")
        test = layer.query(
            where=f"{iso3_clause}period_number = {candidate}",
            out_fields="period_number",
            return_geometry=False,
            as_df=True
        )
        if not test.empty:
            print(f"Data found for period_number: {candidate}")
            return candidate

        print(f"No data for period_number: {candidate}, trying previous period...")
        candidate -= 1


def get_feature_layer(gis):
    item = gis.content.get(FEATURES_ITEM_ID)
    if not item:
        raise ValueError(f"Item not found. Check the ID: {FEATURES_ITEM_ID}")
    if not getattr(item, "layers", None) or len(item.layers) == 0:
        raise ValueError("The item does not expose any layers.")
    return item.layers[0]

def gdb_safe_name(name: str, max_len: int = 150) -> str:
    """
    File GDB feature class naming rules (safe subset):
    - letters, numbers, underscore only
    - must start with a letter
    - reasonably short
    """
    n = safe_filename(name)

    # Replace anything not alphanumeric or underscore with underscore
    n = re.sub(r"[^A-Za-z0-9_]", "_", n)

    # Collapse multiple underscores
    n = re.sub(r"_+", "_", n).strip("_")

    # Must start with a letter
    if not n or not n[0].isalpha():
        n = "fc_" + n

    # Enforce length
    return n[:max_len]



def export_outputs(layer, where_clause: str, iso3_label: str, period_value):
    os.makedirs(OUTPUT_LOCATION, exist_ok=True)

    # Query once: features + attributes
    fs = layer.query(where=where_clause, out_fields="*", return_geometry=True)
    sdf = fs.sdf
    if sdf is None or sdf.empty:
        raise ValueError("No records returned for the selected filters.")

    # Base name: include ISO3 and period clearly
    iso_part = iso3_label
    period_part = f"period_{period_value}" if period_value is not None else "allperiods"

    # If biweekly_group exists, add it as extra context
    if "biweekly_group" in sdf.columns and not sdf["biweekly_group"].isna().all():
        biweekly_group = safe_filename(sdf["biweekly_group"].dropna().iloc[0])
        base_name = f"eve_{iso_part}_{period_part}_{biweekly_group}_{formatted_datetime}"
    else:
        base_name = f"eve_{iso_part}_{period_part}_{formatted_datetime}"

    # -------------------------------------------------------------------------
    # Tabular exports (from the same feature layer)
    # -------------------------------------------------------------------------
    if EXPORT_CSV or EXPORT_EXCEL:
        df = sdf.copy()

        # Drop geometry for tabular exports if present
        for col in ["SHAPE", "Shape", "geometry"]:
            if col in df.columns:
                df = df.drop(columns=[col], errors="ignore")

        # Keep original behavior: pop_affected -> pop_exposed rename if needed
        if "pop_affected" in df.columns and "pop_exposed" not in df.columns:
            df.rename(columns={"pop_affected": "pop_exposed"}, inplace=True)

        if EXPORT_CSV:
            csv_path = os.path.join(OUTPUT_LOCATION, f"{base_name}.csv")
            df.to_csv(csv_path, index=False)
            print(f"Saved CSV: {csv_path}")

        if EXPORT_EXCEL:
            xlsx_path = os.path.join(OUTPUT_LOCATION, f"{base_name}.xlsx")
            df.to_excel(xlsx_path, index=False)
            print(f"Saved Excel: {xlsx_path}")

    # -------------------------------------------------------------------------
    # Spatial exports
    # -------------------------------------------------------------------------
    if EXPORT_SHAPEFILE:
        shp_path = os.path.join(OUTPUT_LOCATION, f"{base_name}.shp")
        sdf.spatial.to_featureclass(location=shp_path)
        print(f"Saved Shapefile: {shp_path}")

    if EXPORT_GDB_FEATURECLASS:
        ensure_gdb_exists(GDB_PATH)

        fc_name = gdb_safe_name(base_name, max_len=150)
        fc_path = os.path.join(GDB_PATH, fc_name)
        
        # optional but recommended: overwrite if it already exists
        sdf.spatial.to_featureclass(location=fc_path, overwrite=True)

        print(f"Saved GDB feature class: {fc_path}")

    print("Process completed.")


def main():
    gis = get_gis()
    if gis is None:
        return
    layer = get_feature_layer(gis)

    period_value = PERIOD_NUMBER
    if isinstance(period_value, str) and period_value.strip().lower() == "latest":
        period_value = resolve_latest_period(layer, ISO3)

    if isinstance(period_value, str) and period_value.isdigit():
        period_value = int(period_value)

    if period_value is not None and not isinstance(period_value, int):
        raise ValueError("PERIOD_NUMBER must be None, an integer, or 'latest'.")

    where_clause = build_where_clause(ISO3, period_value)
    iso3_label = ISO3 if ISO3 else "allcountries"

    export_outputs(layer, where_clause, iso3_label, period_value)


if __name__ == "__main__":
    main()




Trying period_number: 51
No data for period_number: 51, trying previous period...
Trying period_number: 50
Data found for period_number: 50
Saved CSV: C:\temp\eve_MOZ_period_50_Period_#50_(16-01-2026_to_31-01-2026)_20260202141141.csv
Saved Excel: C:\temp\eve_MOZ_period_50_Period_#50_(16-01-2026_to_31-01-2026)_20260202141141.xlsx
Saved Shapefile: C:\temp\eve_MOZ_period_50_Period_#50_(16-01-2026_to_31-01-2026)_20260202141141.shp
Saved GDB feature class: C:\temp\evedownloads.gdb\eve_MOZ_period_50_Period_50_16_01_2026_to_31_01_2026_20260202141141
Process completed.
