In [1]:
import numpy as np
import pandas as pd
import pyarrow as pa
import pyarrow.parquet as pq

import json

from model.AaEModel import AaEModel
from model.InpatientsModel import InpatientsModel
from model.OutpatientsModel import OutpatientsModel

# Run Models

In [2]:
dataset = "RL4"
model_run = "test/20220110_104353"

model_path = f"data/{dataset}/results/{model_run}"

## Inpatients

In [3]:
# load the baseline data
ip = pq.read_pandas(f"data/{dataset}/ip.parquet").to_pandas()
ip.drop(["hsagrp"], axis = "columns", inplace =  True)
# create the model instance
ip_m = InpatientsModel(model_path)
# run the principal projection
ip_mr_change_factors, ip_mr = ip_m.run(0)
# some rows need to be shifted to outpatients
ip_op_row_ix = ip_mr["classpat"] == "-1"
ip_op_rows = (ip
  .merge(ip_mr[ip_op_row_ix][["rn"]], on = "rn")
  .value_counts(["age", "sex", "imd04_decile", "ethnos", "tretspef"])
  .to_frame("attendances")
  .reset_index()
)
ip_op_rows["is_first"] = False
ip_op_rows["has_procedures"] = True
ip_op_rows["tele_attendances"] = 0
# remove the ip to op rows
ip_mr = (ip
  .drop(["classpat", "speldur"], axis = "columns")
  .merge(ip_mr[~ip_op_row_ix], on = "rn")
)
# join the results and the baseline data together
ip["type"] = "baseline"
ip_mr["type"] = "model"
ip = pd.concat([ip, ip_mr])
# create an admission group column
ip["admission_group"] = "non-elective"
ip.loc[ip["admimeth"].str.startswith("1"), "admission_group"] = "elective"
# quick dq fix: convert any "non-elective" daycases to "elective"
ip.loc[ip["classpat"].isin(["2", "3"]), "admission_group"] = "elective"
# create a "pod" column, starting with the admission group
ip["pod"] = ip["admission_group"]
ip.loc[ip["classpat"].isin(["1", "4"]), "pod"] += "_admission"
ip.loc[ip["classpat"].isin(["2", "3"]), "pod"] += "_daycase"
ip.loc[ip["classpat"] == "5", "pod"] += "_birth-episode"
ip["beddays"] = ip["speldur"] + 1

## Outpatients

In [4]:
# load the baseline data
op = pq.read_pandas(f"data/{dataset}/op.parquet").to_pandas()
op.drop(["hsagrp"], axis = "columns", inplace =  True)
# make sure to convert imd04_decile to a string
op["imd04_decile"] = op["imd04_decile"].astype(str)
# create the model instance
op_m = OutpatientsModel(model_path)
# run the principal projection
op_mr = (op[["rn", "age", "sex", "imd04_decile", "ethnos", "tretspef", "is_first", "has_procedures"]]
  .merge(op_m.run(0), on = "rn")
  .drop("rn", axis = "columns")
)
op_mr = pd.concat([op_mr, ip_op_rows])
# join the results and the baseline data together
op["type"] = "baseline"
op_mr["type"] = "model"
op = (pd.concat([op[op_mr.columns.tolist()], op_mr])
  .groupby(["age", "sex", "imd04_decile", "ethnos", "tretspef", "is_first", "has_procedures", "type"], as_index = False)
  .agg(sum)
)
op.loc[ op["is_first"], "pod"] = "op_first"
op.loc[~op["is_first"], "pod"] = "op_follow-up"
op.loc[op["has_procedures"], "pod"] = "op_procedure"
# repromote imd04_decile to categorial, make sure to use ip's categories
op["imd04_decile"] = pd.Categorical(
  op["imd04_decile"].astype("category"),
  ip["imd04_decile"].cat.categories
)

## A&E

In [5]:
# load the baseline data
aae = pq.read_pandas(f"data/{dataset}/aae.parquet").to_pandas()
aae.drop(["hsagrp"], axis = "columns", inplace =  True)
# create the pod type
aae["pod"] = "type-" + aae["aedepttype"] + "_"
aae.loc[aae["aearrivalmode"] == 1, "pod"] += "ambulance"
aae.loc[aae["aearrivalmode"] != 1, "pod"] += "walk-in"
# create the model instance
aae_m = AaEModel(model_path)
# run the principal projection
aae_mr = (aae[["rn", "age", "sex", "imd04_decile", "ethnos", "aedepttype", "aearrivalmode", "pod"]]
  .merge(aae_m.run(0), on = "rn")
  .drop("rn", axis = "columns")
)
# join the results and the baseline data together
aae["type"] = "baseline"
aae_mr["type"] = "model"
aae = (pd.concat([aae[aae_mr.columns.tolist()], aae_mr])
  .groupby(["age", "sex", "imd04_decile", "ethnos", "aedepttype", "aearrivalmode", "type"], as_index = False)
  .agg(sum)
)

# Save results

In [6]:
ip.to_parquet(f"{model_path}/ip_principal.parquet")
op.to_parquet(f"{model_path}/op_principal.parquet")
aae.to_parquet(f"{model_path}/aae_principal.parquet")

In [7]:
# handle encoding of numpy values (source: https://stackoverflow.com/a/65151218/4636789)
def np_encoder(object):
  if isinstance(object, np.generic):
    return object.item()

with open(f"{model_path}/ip_principal_change_factors.json", "w") as f:
  json.dump(ip_mr_change_factors, f, indent = 2, default = np_encoder)

# Model Aggregations

In [8]:
(ip
  .groupby(["pod", "type"])
  .agg({ "rn": len, "speldur": np.mean, "beddays": np.sum })
  .rename({"rn": "n"}, axis = "columns")
)

Unnamed: 0_level_0,Unnamed: 1_level_0,n,speldur,beddays
pod,type,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
elective_admission,baseline,7484,4.267905,39425
elective_admission,model,9075,4.321763,48295
elective_daycase,baseline,62207,0.0,62207
elective_daycase,model,72725,0.0,72725
non-elective_admission,baseline,62712,4.30782,332864
non-elective_admission,model,66931,3.96722,332461
