## Detailed results

This notebook is for producing detailed activity avoided for model >= 3.1 - with sex and birth episodes flag in inpatients.

Assumes you have already authenticated via az. Outputs into a `data/` folder the avoided activity for inpatients with HRG, an indicator if LOS is 0 or >0, and pod.

Also assumes that the scenario has already been run with full_model_results = True

This notebook uses the new format of the aggregated results parquet files rather than the old massive JSON

In [None]:
agg_results_folder = "aggregated-model-results/vX.X/RXX/scenarioname/datetime"

In [None]:
# We want to be in the nhp_products root folder so that we can load nhpy.az
%cd ../..

import pandas as pd
import numpy as np
from collections import defaultdict
from dotenv import load_dotenv
import os
from nhpy import az, process_data

%load_ext autoreload
%autoreload 2

In [None]:
# Setup

load_dotenv()
account_url = os.getenv("AZ_STORAGE_EP")
results_container = os.getenv("AZ_STORAGE_RESULTS")
data_container = os.getenv("AZ_STORAGE_DATA")
api_key = os.getenv("API_KEY")

results_connection = az.connect_to_container(account_url, results_container)
data_connection = az.connect_to_container(account_url, data_container)

In [None]:
# Set variables

params = az.load_agg_params(results_connection, agg_results_folder)

model_version = agg_results_folder.split("/")[1]
dataset = agg_results_folder.split("/")[2]
scenario_name = agg_results_folder.split("/")[3]
create_datetime = agg_results_folder.split("/")[4]
baseline_year = params["start_year"]

In [None]:
# Patch model version for loading the data
# Results folder name truncated, e.g. v3.0 - does not show the patch version. But data stores in format v3.0.1
model_version_data = az.find_latest_version(data_connection, model_version)
print(f"Using data: {model_version_data}")

In [None]:
# Add Data folder if it doesn't exist

if not os.path.exists("notebooks/PRODUCT_detailed_activity-avoided/data/"):
    os.makedirs("notebooks/PRODUCT_detailed_activity-avoided/data/")

In [None]:
# Start processing

original_df = az.load_data_file(
    data_connection, model_version_data, dataset, "ip", baseline_year
)
original_df.head()

In [None]:
model_runs = {}
for run in range(1, 257):
    df = az.load_model_run_results_file(
        results_connection,
        model_version,
        dataset,
        scenario_name,
        create_datetime,
        "ip_avoided",
        run,
    )
    # We want to use the speldur and classpat from the results, not from the original df
    merged = (
        original_df.copy()
        .drop(columns=["speldur", "classpat"])
        .merge(df, on="rn", how="inner")
    )
    results_dict = process_data.process_ip_activity_avoided(merged).to_dict()
    for k, v in results_dict["value"].items():
        if k not in model_runs.keys():
            model_runs[k] = []
        model_runs[k].append(v)

In [None]:
model_runs_df = process_data.process_model_runs_dict(
    model_runs, columns=["pod", "los_group", "sushrg", "measure"]
)

In [None]:
model_runs_df.to_csv(
    f"notebooks/PRODUCT_detailed_activity-avoided/data/{scenario_name}_ip_activity_avoided_hrg.csv"
)