# New time profiles

## Background

"Time profiles" are the term for the year-by-year projection of future activity that is output by the NHP model. 

To produce time profiles for a particular model scenario, set the path of the `aggregated-model-results` folder on Azure as the variable `path_to_results_file`. The time profiles will be output as CSV and Parquet files saved to the same folder as this notebook.

Note that the step_counts currently combines all A&E arrival types (walk-in and ambulance) together, so the time profiles combines both together as well.

[The methodology for this notebook is detailed here](https://connect.strategyunitwm.nhs.uk/nhp/project_information/user_guide/glossary.html#time-profiles)

In [None]:
agg_results_folder = #'aggregated-model-results/vX.X/RXX/scenarioname/datetime/'

In [None]:
# We want to be in the nhp_products root folder so that we can load nhpy.az
%cd ../..

from nhpy import az, process_data, process_results

%load_ext autoreload
%autoreload 2

In [None]:
import os
import numpy as np
import pandas as pd

from dotenv import load_dotenv


# Load all environment variables
load_dotenv()
account_url = os.getenv("AZ_STORAGE_EP")
results_container = os.getenv("AZ_STORAGE_RESULTS")

results_connection = az.connect_to_container(account_url, results_container)
params = az.load_agg_params(results_connection, agg_results_folder)

In [None]:
# Calculate "principal" step count which is the mean of the "model_runs" column

horizon_year = params['end_year']
baseline_year = params['start_year']
step_counts = az.load_agg_results(results_connection, agg_results_folder, "step_counts")
step_counts_df = process_results.convert_results_format(step_counts[step_counts["change_factor"] != "baseline"], include_baseline=False).drop(columns = ["dataset","scenario","app_version","create_datetime", "model_runs"]).set_index(['change_factor', 'activity_type', 'strategy', 'sitetret', 'pod', 'measure'])
change_factors = step_counts_df.index.get_level_values("change_factor").unique()
step_counts_df.head()

In [None]:
# Get time profile mappings from params. All change_factors in step_counts need to have a time profile type
# Handle cases where time profiles are missing. Could replace with dict.get("linear")?
time_profile_mappings = params['time_profile_mappings'].copy()
for k,v in time_profile_mappings.items():
    if v == 'none':
        time_profile_mappings[k] = "linear"
if 'activity_avoidance' in time_profile_mappings:
    for activity_type in time_profile_mappings['activity_avoidance'].keys():
        time_profile_mappings['activity_avoidance'][activity_type]['activity_avoidance_interaction_term'] = "linear"

In [None]:
# Get factors over the years from baseline to horizon year for each change_factor and strategy
factor_dict = {}

for year in range(1,horizon_year-baseline_year):
    time_profiles_dict = process_results.create_time_profiles(horizon_year-baseline_year, year)
    for change_factor in change_factors:
        # set up blank dict for each change_factor if not already in the factor_dict
        if change_factor not in factor_dict.keys():
            factor_dict[change_factor] = {}
        # get all non-mitigator change factors
        if change_factor not in ['activity_avoidance', 'efficiencies']: 
            # default to linear if not in params. For example model_interaction_term is not in params
            time_profile_type = time_profile_mappings.get(change_factor, "linear")
            factor = process_results.get_time_profiles_factor(time_profile_type, time_profiles_dict, baseline_year)
            factor_dict[change_factor][year] = factor
        # get mitigator change factors
        # bit more complicated because they're nested in the time_profile_mappings
        else:
            for k,v in time_profile_mappings[change_factor].items():
                for strategy, time_profile_type in v.items():
                    # Make blank dict if not already in there
                    if strategy not in factor_dict.keys():
                        factor_dict[strategy] = {}
                    factor = process_results.get_time_profiles_factor(time_profile_type, time_profiles_dict, baseline_year)
                    factor_dict[strategy][year] = factor


In [None]:
# Convert to tele is just "convert_to_tele" in the step counts 
# but it is divided into different categories e.g. "convert_to_tele_adult_non-surgical" and "convert_to_tele_adult_surgical" in the time_profile settings
# get the mean of the factors across all four categories and use that as the combined "convert_to_tele" factor

convert_to_tele = {}
for k, v in factor_dict.items():
    if k.startswith('convert_to_tele'):
        for year, factor in v.items():
            if year not in convert_to_tele:
                convert_to_tele[year] = []
            convert_to_tele[year].append(factor)
for k, v in convert_to_tele.items():
    convert_to_tele[k] = np.mean(v)
factor_dict['convert_to_tele'] = convert_to_tele


In [None]:
# Create df to work with

working_step_counts_df = step_counts_df.rename(columns = {"mean": horizon_year}).sort_index().copy()

In [None]:
# Get actual values of change for each param and year

# Non mitigator ones first
for change_factor in change_factors:
    if change_factor not in ['activity_avoidance', 'efficiencies']: 
        indices = working_step_counts_df.loc[(change_factor, slice(None), slice(None), slice(None), slice(None), slice(None)), :].index
        # Iterate through all the change_factors named in the step_counts, get the factor for the specific year/param and multiply by baseline year
        for i in indices:
            for year, factor in factor_dict[change_factor].items():
                working_step_counts_df.loc[i, baseline_year + year] = working_step_counts_df.loc[i, horizon_year] * factor
# Mitigators
for strategy in working_step_counts_df.index.get_level_values('strategy').unique():
    if strategy != '-':
        indices = working_step_counts_df.loc[(slice(None), slice(None), strategy, slice(None), slice(None), slice(None)), :].index
        # Iterate through all the mitigator strategies named in the step_counts, get the factor for the specific year/param and multiply by baseline year
        for i in indices:
            for year, factor in factor_dict[strategy].items():
                working_step_counts_df.loc[i, baseline_year + year] = working_step_counts_df.loc[i, horizon_year] * factor
working_step_counts_df = working_step_counts_df.sort_index(axis = 1)

In [None]:
# Create time_profiles_df with the values for the baseline year as template

time_profiles_df = (
    step_counts[
        (step_counts["change_factor"] == "baseline") & (step_counts["model_run"] == 1)
    ]
    .groupby(["sitetret", "pod", "measure"])[["value"]]
    .sum()
    .rename(columns={"value": baseline_year})
)

In [None]:
# Align indices of step_counts_df with the template df

step_counts_grouped = working_step_counts_df.reset_index().groupby(['sitetret', 'pod', 'measure']).sum(True).sort_index(axis=1)

In [None]:
# Add all PODs into the time_profiles_df, even if they didn't exist in the original baseline
missing_indices = step_counts_grouped.index.difference(time_profiles_df.index)
if len(missing_indices) > 0:
    filler_df = pd.DataFrame(
        {baseline_year: 0},
        index=missing_indices
    )
    time_profiles_df = pd.concat([time_profiles_df, filler_df])
time_profiles_df = time_profiles_df.sort_index()

In [None]:
# difference between horizon year and baseline year
years = step_counts_grouped.columns
for i in time_profiles_df.index:
    time_profiles_df.loc[i, years] = time_profiles_df.loc[i, baseline_year] + step_counts_grouped.loc[i]
time_profiles_df

In [None]:
time_profiles_df.to_parquet(f'{params["scenario"]}_timeprofile.parquet')
time_profiles_df.to_csv(f'{params["scenario"]}_timeprofile.csv')

In [None]:
# QA: check if numbers align 
default_df = process_results.convert_results_format(az.load_agg_results(results_connection, agg_results_folder, "default"))
# Account for differences between step_counts measures and default measures
default_df['measure'] = default_df['measure'].apply(lambda x: "arrivals" if x in ["ambulance", "walk-in"] else x)
default_df = default_df[default_df["measure"] != "procedures"]
default_df = default_df.groupby(["sitetret", "pod", "measure"])[["mean"]].sum().loc[time_profiles_df.index]
# Script needs to print this out for checking 
default_df["mean"].compare(time_profiles_df[horizon_year])

In [None]:
print (f"{len(default_df["mean"].compare(time_profiles_df[horizon_year]))} rows different from total of {len(time_profiles_df)} rows")