In [None]:
import sys
import datetime as dt
from datetime import timedelta
import json
from pathlib import Path
import csv
from pprint import pprint
import pandas as pd
from itertools import islice
import report_utils as ru

sys.version_info

In [None]:
# Meatball batched, copied from itertools docs
def batched(iterable, n):
    # batched('ABCDEFG', 3) → ABC DEF G
    if n < 1:
        raise ValueError("n must be at least one")
    iterator = iter(iterable)
    while batch := tuple(islice(iterator, n)):
        yield (b[1] for b in batch)

In [2]:
# Set up for local dev of this notebook
test_pipeline_root = Path(
    "/home/john/projects/flowbot-pipelines/hti-preparedness-2023-09-01"
)
root_pipeline_root = Path(
    "/home/john/projects/flowbot-pipelines/flowbot-pipeline-root/{{ cookiecutter.__project_slug }}"
)

In [3]:
report_json = "report_components/report_components.json"
execution_date = "2024-08-01"
publication_date = "2020-01-01"
html_out_folder = "html"
shared_data_dir = test_pipeline_root / "data" / "shared"
dagrun_data_dir = (
    test_pipeline_root
    / "data/dagruns/2024-08-01/preparedness_report_v2__scheduled__2024-08-01T00:00:00+00:00"
)

static_dir = root_pipeline_root / "static"
template_dir = "templates"
key_obs_dir = test_pipeline_root / "key_obs"
country = "nepal"
partners = [
    "swiss_sponsor_crop.jpeg",
    "hewlett.png",
    "afd.png",
    "usaid.png",
    "digicel_red.jpeg",
]

In [4]:
execution_date_str = execution_date
execution_date = dt.date.fromisoformat(execution_date)
report_json = Path(dagrun_data_dir) / report_json
html_out_folder = Path(shared_data_dir) / html_out_folder
template_path = Path(static_dir) / template_dir
partners = [Path(s) for s in partners]

In [5]:
html_out_folder.mkdir(parents=True, exist_ok=True)

In [6]:
class MissingDataError(Exception):
    pass


def get_update_num():
    return 1


def get_manual_key_obs():
    try:
        return (Path(key_obs_dir) / (execution_date_str + ".html")).read_text()
    except FileNotFoundError:
        return "No key observations written for this report."


def get_last_report_date():
    return dt.date(1990, 1, 1)


def get_last_stable_date():
    return dt.date(1990, 1, 1)


def get_last_stable_report_num():
    return 1


def get_last_prep_report_num():
    return 1


def stats_box_csv_to_values(csv_path) -> dict:
    with open(csv_path, newline="") as csvfile:
        reader = csv.DictReader(csvfile)

        def _conditional_round(value):
            try:
                return round(float(value))
            except ValueError:
                return value

        return {key: _conditional_round(value) for key, value in next(reader).items()}

In [None]:
print(report_json)
print(html_out_folder)
from jinja2 import Environment, FileSystemLoader

jinja_env = Environment(loader=FileSystemLoader(template_path))


report_data = json.loads(report_json.read_text())
report_data

In [8]:
period_start = dt.datetime.strptime(report_data.pop("period_start"), "%Y-%m-%d")
period_end = dt.datetime.strptime(report_data.pop("period_end"), "%Y-%m-%d")
date_range_start = ru.ReportDate(
    period_start.year, period_start.month, period_start.day
)
date_range_end = ru.ReportDate(period_end.year, period_end.month, period_end.day)
report_data = {
    area_name: {
        k: Path(v.replace("/opt/airflow/task_data", str(dagrun_data_dir)))
        for k, v in area_data.items()
    }
    for area_name, area_data in report_data.items()
}

In [None]:
national_data = report_data.pop("national")

national_df = pd.read_csv(national_data["TablePopVariation"]).drop(
    "Unnamed: 0", axis="columns"
)
national_df

ROWS_PER_PAGE = 74
pages_dfs = [
    pd.DataFrame.from_records(page_records)
    for page_records in batched(national_df.iterrows(), ROWS_PER_PAGE)
]

In [10]:
partner_logo_1, partner_logo_2, partner_logo_3, partner_logo_4, partner_logo_5 = (
    partners[:5]
)
general_args = dict(
    pub_date=publication_date,
    period=ru.ReportPeriod(date_range_start, date_range_end),
    period_short=ru.ReportPeriod(date_range_start, date_range_end).short(),
    country=country.capitalize(),
    total_pages=len(report_data)
    + len(pages_dfs)
    + 2,  # One per region + one per summary table page + 1 for back matter + 1 for cover
    partner_logo_1=ru.StaticReportImage(figure_path=partner_logo_1),
    partner_logo_2=ru.StaticReportImage(figure_path=partner_logo_2),
    partner_logo_3=ru.StaticReportImage(figure_path=partner_logo_3),
    partner_logo_4=ru.StaticReportImage(figure_path=partner_logo_4),
    partner_logo_5=ru.StaticReportImage(figure_path=partner_logo_5),
)

In [None]:
top_average_change_df = national_df.sort_values(
    by="Average change", ascending=False, na_position="last"
)
cover = ru.PreparednessTemplateCover(
    key_obs=get_manual_key_obs(),
    top_average_change_table=ru.ReportTable(top_average_change_df, rows=5),
    period_start=date_range_start,
    period_end=date_range_end,
    country_pop_map=ru.ReportImage(national_data["MapResidentsTrendsNational"]),
    page_num=1,
    **general_args
)
pprint(cover)

In [12]:
ru.render_report(
    cover,
    page_name="preparedness_report/cover.html",
    style="preparedness_report/preparedness.css",
    env=jinja_env,
    out_path=html_out_folder / "01_cover.html",
)

In [None]:
running_page_num = 2
for region_name, region_data in report_data.items():
    region = ru.PreparednessTemplateDepartment(
        department_name=region_name.capitalize(),
        dept_pop_ts=ru.ReportImage(region_data["TimeSeriesAggregateResidents"]),
        outlier_areas_ts=ru.ReportImage(region_data["TimeSeriesTop3Residents"]),
        fluctuating_areas_ts=ru.ReportImage(
            region_data["TimeSeriesTop3FluctuatingResidents"]
        ),
        pop_trend_areas_map=ru.ReportImage(region_data["MapResidentsTrendsByRegion"]),
        period_start=date_range_start,
        period_end=date_range_end,
        page_num=running_page_num,
        **general_args,
    )

    ru.render_report(
        region,
        page_name="preparedness_report/dept_overview.html",
        style="preparedness_report/preparedness.css",
        env=jinja_env,
        out_path=html_out_folder
        / f"{running_page_num:02}_{region.department_name}.html",
    )
    running_page_num += 1
    pprint(region)

In [None]:
from pprint import pp

for page_df in pages_dfs:
    full_table = ru.SplitReportTable(page_df, ROWS_PER_PAGE // 2)
    pp(full_table)

    summary = ru.PreparednessTemplateSummary(
        department_name=region_name.capitalize(),
        areas_of_interest_table_left=full_table.left,
        areas_of_interest_table_right=full_table.right,
        page_num=running_page_num,
        **general_args,
    )
    ru.render_report(
        summary,
        page_name="preparedness_report/summary_table.html",
        style="preparedness_report/preparedness.css",
        env=jinja_env,
        out_path=html_out_folder / f"{running_page_num:02}_summary_table.html",
    )
    running_page_num += 1

In [None]:
last_report_date = get_last_report_date()
last_stable_date = get_last_stable_date()
back_matter = ru.PreparednessTemplateBackMatter(
    month=execution_date.month,
    year=execution_date.year,
    prep_month=last_report_date.month,
    prep_year=last_report_date.year,
    prep_num=get_last_prep_report_num(),
    strec_month=last_stable_date.month,
    strec_year=last_stable_date.year,
    strec_num=get_last_stable_report_num(),
    page_num=running_page_num,
    **general_args,
)
pprint(back_matter)

In [17]:
ru.render_report(
    back_matter,
    page_name="preparedness_report/back_matter.html",
    style="preparedness_report/preparedness.css",
    env=jinja_env,
    out_path=html_out_folder / f"{running_page_num:02}_back_matter.html",
)