In [None]:
import traceback
from typing import List, Optional
import logging
from bokeh.io import output_notebook
import polars as pl
from cfa_analysis.data_retrieval import (
    get_country_mapping,
    get_indicators_data,
    get_all_duplicate_dfs,
    get_imf_data_df,
    get_cfa_and_noncfa_data,
)
from cfa_analysis.data_cleanup import (
    find_outliers_IQR,
    clean_up_indicators_dict,
    find_duplicate_indicators,
    merge_duplicate_dfs,
)
from cfa_analysis.analysis import process_single_indicator
from cfa_analysis.data_classes import Indicator
from cfa_analysis.constants import (
    CFA_FRANC_ZONE,
    WEST_AFRICA,
    MIDDLE_AFRICA,
    SKIP_INDICATORS,
)

%load_ext jupyter_ai_magics
output_notebook()
%env OPENAI_API_KEY=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

In [None]:
def generate_metric_graphs(only_these_indicators: Optional[List[str]] = None) -> None:
    logging.basicConfig(
        level=logging.DEBUG, format="%(asctime)s - %(levelname)s - %(message)s"
    )
    (
        all_countries,
        countries,
    ) = get_country_mapping()
    indicators = clean_up_indicators_dict(get_indicators_data())
    duplicate_indicators = find_duplicate_indicators(indicators)
    processed_dupes = (
        set()
    )  # after processing duplicates, also can use to set indicators in general that I want to skip
    for indicator_abbrv in indicators:
        # Wang Jahan ends in 2015 and its an index, requires more research to see if useful
        # ommitting Capital Flows because data appears very incomplete
        if (
            (indicator_abbrv in SKIP_INDICATORS)
            or (indicator_abbrv in processed_dupes)
            or (only_these_indicators and indicator_abbrv not in only_these_indicators)
            or (
                indicators[indicator_abbrv]["source"]
                in ["Wang-Jahan Index", "Capital Flows in Developing Economies"]
            )
        ):
            continue
        indicator_info = indicators.get(indicator_abbrv, {})
        indicator_label = indicator_info.get("label", "")
        indicator_label = (
            indicator_label.strip("\n") if indicator_label is not None else None
        )
        indicator_unit = indicator_info.get("unit", "")
        indicator_unit = (
            indicator_unit.strip("\n") if indicator_unit is not None else None
        )
        indicator = Indicator(
            indicator_abbrv,
            indicator_info.get("description", ""),
            indicator_label,
            indicator_unit,
        )
        try:
            if (indicator.label, indicator.unit) in duplicate_indicators:
                all_data_df = merge_duplicate_dfs(
                    get_all_duplicate_dfs(
                        duplicate_indicators,
                        indicator.label,
                        indicator.unit,
                        processed_dupes,
                        countries,
                        all_countries,
                    ),
                    indicator.label,
                )
            else:
                all_data_df = get_imf_data_df(
                    get_cfa_and_noncfa_data(indicator.abbrv, countries, all_countries),
                    indicator.label,
                )

            # add logic that raises exception if not enough data
            # null_cfa = len(all_data_df.select(pl.col('Country', indicator.label)).filter((pl.col("Country").is_in(CFA_FRANC_ZONE)) & (pl.col(indicator.label).is_null())))
            # null_west_africa = len(all_data_df.select(pl.col('Country', indicator.label)).filter((pl.col("Country").is_in(WEST_AFRICA)) & (pl.col(indicator.label).is_null())))
            # null_middle_africa = len(all_data_df.select(pl.col('Country', indicator.label)).filter((pl.col("Country").is_in(MIDDLE_AFRICA)) & (pl.col(indicator.label).is_null())))
            # # logging.debug(
            # #     f"""Number of null values for CFA FRANC: {null_cfa} \n
            # #     Number of null values for WEST AFRICA: {null_west_africa} \n
            # #     Number of null values in MIDDLE AFRICA: {null_middle_africa}"""
            # # )
            # print(f"""Number of null values for CFA FRANC: {null_cfa} \n
            #     Number of null values for WEST AFRICA: {null_west_africa} \n
            #     Number of null values in MIDDLE AFRICA: {null_middle_africa}""")

        except Exception as e:
            logging.debug(
                f"issue with indicator {indicator.label}, abbrv: {indicator.abbrv}, exception: {e}"
            )
            # logging.debug(traceback.print_exc())

        try:
            process_single_indicator(
                all_data_df, indicator.label, indicator.unit, indicator.description
            )

        except Exception as e:
            logging.debug(
                f"issue with indicator {indicator.label}, abbrv: {indicator.abbrv}, exception: {e}"
            )
            # logging.debug(traceback.print_exc())


generate_metric_graphs()