In [516]:
%load_ext lab_black

The lab_black extension is already loaded. To reload it, use:
  %reload_ext lab_black


In [517]:
import reader
from cleaners import COUNTY_FIPS_LOOKUP
import pandas as pd
import math

In [518]:
import altair as alt
import altair_latimes as lat

In [519]:
alt.themes.register("latimes", lat.theme)
alt.themes.enable("latimes")

ThemeRegistry.enable('latimes')

## Import

Read in raw capacity data and locations from HHS

In [520]:
raw_data = raw_csv("hhs/hospital_capacity.csv", dtype={"fips_code": str, "zip": str})

💽📖 ./raw/hhs/hospital_capacity.csv ➡️ 92269 records


In [521]:
raw_locations = raw_csv(
    "hhs/hospital_locations.csv", dtype={"CCN": str, "Zip_Code": str}
)

💽📖 ./raw/hhs/hospital_locations.csv ➡️ 6025 records


Read in county metadata

In [522]:
metadata_df = reader.raw_csv("latimes/city-metadata.csv", dtype={"fips": str})

💽📖 ./raw/latimes/city-metadata.csv ➡️ 60 records


In [523]:
metadata_df = metadata_df[["county", "fips", "population", "region"]]

## Transform

Filter the hospitals down to just CA

In [524]:
ca_data = raw_data.copy()

In [525]:
ca_data = ca_data.loc[raw_data.state == "CA"]

Merge county names and regions

In [526]:
ca_data["fips"] = ca_data["fips_code"].str[2:5]

In [527]:
ca_timeseries = pd.merge(
    ca_data, metadata_df, how="left", left_on="fips", right_on="fips"
)

Find min and max dates

In [528]:
ca_timeseries.collection_week.min()

'2020-07-31'

In [529]:
ca_timeseries.collection_week.max()

'2020-12-04'

Trim to just the metrics we need

In [530]:
ca_timeseries_trim = ca_timeseries[
    [
        # metadata
        "collection_week",
        # "ccn",
        "hospital_name",
        # "address",
        # "city",
        # "zip",
        "county",
        "fips",
        "region",
        #####
        "total_beds_7_day_sum",  # inpatient and outpatient (what CDPH uses)
        # "all_adult_hospital_beds_7_day_sum",
        # "inpatient_beds_7_day_sum",  # inpatient only
        # "all_adult_hospital_inpatient_beds_7_day_sum",
        "inpatient_beds_used_7_day_sum",
        # "all_adult_hospital_inpatient_bed_occupied_7_day_sum",
        "total_adult_patients_hospitalized_confirmed_and_suspected_covid_7_day_sum",
        # "total_adult_patients_hospitalized_confirmed_covid_7_day_sum",
        "total_pediatric_patients_hospitalized_confirmed_and_suspected_covid_7_day_sum",
        # "total_pediatric_patients_hospitalized_confirmed_covid_7_day_sum",
        # "total_icu_beds_7_day_sum",
        "total_staffed_adult_icu_beds_7_day_sum",  # subset of above
        # "icu_beds_used_7_day_sum",
        "staffed_adult_icu_bed_occupancy_7_day_sum",  # subset of above
        "staffed_icu_adult_patients_confirmed_and_suspected_covid_7_day_sum",
        # "staffed_icu_adult_patients_confirmed_covid_7_day_sum",
        ####
        "total_beds_7_day_coverage",
        "inpatient_beds_used_7_day_coverage",
        "total_adult_patients_hospitalized_confirmed_and_suspected_covid_7_day_coverage",
        "total_pediatric_patients_hospitalized_confirmed_and_suspected_covid_7_day_coverage",
        "total_staffed_adult_icu_beds_7_day_coverage",
        "staffed_adult_icu_bed_occupancy_7_day_coverage",
        "staffed_icu_adult_patients_confirmed_and_suspected_covid_7_day_coverage",
    ]
]

When there are fewer than four patients in a field, it is redacted to -999999.0. Let's convert them to nulls

In [531]:
ca_timeseries = ca_timeseries.replace([-999999.0], [0])

### Hospitalizations

Convert the `7_day_sum` columns to `7_day_avg` by dividing by coverage value

In [532]:
numerators = [
    "total_beds_7_day_sum",
    "inpatient_beds_used_7_day_sum",
    "total_adult_patients_hospitalized_confirmed_and_suspected_covid_7_day_sum",
    "total_pediatric_patients_hospitalized_confirmed_and_suspected_covid_7_day_sum",
    "total_staffed_adult_icu_beds_7_day_sum",
    "staffed_adult_icu_bed_occupancy_7_day_sum",
    "staffed_icu_adult_patients_confirmed_and_suspected_covid_7_day_sum",
]

In [533]:
denominators = [
    "total_beds_7_day_coverage",
    "inpatient_beds_used_7_day_coverage",
    "total_adult_patients_hospitalized_confirmed_and_suspected_covid_7_day_coverage",
    "total_pediatric_patients_hospitalized_confirmed_and_suspected_covid_7_day_coverage",
    "total_staffed_adult_icu_beds_7_day_coverage",
    "staffed_adult_icu_bed_occupancy_7_day_coverage",
    "staffed_icu_adult_patients_confirmed_and_suspected_covid_7_day_coverage",
]

In [534]:
lat_avgs = [
    "total_beds_7_day_lat_avg",
    "inpatient_beds_used_7_day_lat_avg",
    "total_adult_patients_hospitalized_confirmed_and_suspected_covid_7_day_lat_avg",
    "total_pediatric_patients_hospitalized_confirmed_and_suspected_covid_7_day_lat_avg",
    "total_staffed_adult_icu_beds_7_day_lat_avg",
    "staffed_adult_icu_bed_occupancy_7_day_lat_avg",
    "staffed_icu_adult_patients_confirmed_and_suspected_covid_7_day_lat_avg",
]

In [535]:
ca_timeseries["total_beds_7_day_lat_avg"] = (
    ca_timeseries["total_beds_7_day_sum"] / ca_timeseries["total_beds_7_day_coverage"]
)

In [536]:
ca_timeseries["inpatient_beds_used_7_day_lat_avg"] = (
    ca_timeseries["inpatient_beds_used_7_day_sum"]
    / ca_timeseries["inpatient_beds_used_7_day_coverage"]
)

In [537]:
ca_timeseries[
    "total_adult_patients_hospitalized_confirmed_and_suspected_covid_7_day_lat_avg"
] = (
    ca_timeseries[
        "total_adult_patients_hospitalized_confirmed_and_suspected_covid_7_day_sum"
    ]
    / ca_timeseries[
        "total_adult_patients_hospitalized_confirmed_and_suspected_covid_7_day_coverage"
    ]
)

In [538]:
ca_timeseries[
    "total_pediatric_patients_hospitalized_confirmed_and_suspected_covid_7_day_lat_avg"
] = (
    ca_timeseries[
        "total_pediatric_patients_hospitalized_confirmed_and_suspected_covid_7_day_sum"
    ]
    / ca_timeseries[
        "total_pediatric_patients_hospitalized_confirmed_and_suspected_covid_7_day_coverage"
    ]
)

In [539]:
ca_timeseries["total_staffed_adult_icu_beds_7_day_lat_avg"] = (
    ca_timeseries["total_staffed_adult_icu_beds_7_day_sum"]
    / ca_timeseries["total_staffed_adult_icu_beds_7_day_coverage"]
)

In [540]:
ca_timeseries["staffed_adult_icu_bed_occupancy_7_day_lat_avg"] = (
    ca_timeseries["staffed_adult_icu_bed_occupancy_7_day_sum"]
    / ca_timeseries["staffed_adult_icu_bed_occupancy_7_day_coverage"]
)

In [541]:
ca_timeseries[
    "staffed_icu_adult_patients_confirmed_and_suspected_covid_7_day_lat_avg"
] = (
    ca_timeseries["staffed_icu_adult_patients_confirmed_and_suspected_covid_7_day_sum"]
    / ca_timeseries[
        "staffed_icu_adult_patients_confirmed_and_suspected_covid_7_day_coverage"
    ]
)

Add up total covid patients (adult and pediatric) (fillna to account for children's hospitals and facilities with no pedatric cases)

In [542]:
ca_timeseries["total_covid_patients_7_day_avg_lat"] = ca_timeseries[
    "total_adult_patients_hospitalized_confirmed_and_suspected_covid_7_day_lat_avg"
].fillna(0) + ca_timeseries[
    "total_pediatric_patients_hospitalized_confirmed_and_suspected_covid_7_day_lat_avg"
].fillna(
    0
)

Calculate percent of beds occupied by covid patients

Note: I've chosen to use `total_beds_7_day_sum`, which matches the definition the state puts out for its hospital metrics

In [543]:
ca_timeseries["pct_beds_occupied_covid"] = (
    ca_timeseries["total_covid_patients_7_day_avg_lat"]
    / ca_timeseries["total_beds_7_day_lat_avg"]
)

Calculate percent of beds occupied

In [544]:
ca_timeseries["pct_beds_occupied"] = (
    ca_timeseries["inpatient_beds_used_7_day_lat_avg"]
    / ca_timeseries["total_beds_7_day_lat_avg"]
)

In [545]:
ca_timeseries[
    ca_timeseries.collection_week == ca_timeseries.collection_week.max()
].sort_values("pct_beds_occupied_covid", ascending=False).head()

Unnamed: 0,hospital_pk,collection_week,state,ccn,hospital_name,address,city,zip,hospital_subtype,fips_code,...,total_beds_7_day_lat_avg,inpatient_beds_used_7_day_lat_avg,total_adult_patients_hospitalized_confirmed_and_suspected_covid_7_day_lat_avg,total_pediatric_patients_hospitalized_confirmed_and_suspected_covid_7_day_lat_avg,total_staffed_adult_icu_beds_7_day_lat_avg,staffed_adult_icu_bed_occupancy_7_day_lat_avg,staffed_icu_adult_patients_confirmed_and_suspected_covid_7_day_lat_avg,total_covid_patients_7_day_avg_lat,pct_beds_occupied_covid,pct_beds_occupied
237,50298,2020-12-04,CA,50298,BARSTOW COMMUNITY HOSPITAL,820 E MOUNTAIN VIEW STREET,BARSTOW,92311,Short Term,6071,...,41.0,30.0,34.285714,0.0,8.0,7.428571,13.0,34.285714,0.836237,0.731707
51,51301,2020-12-04,CA,51301,ADVENTIST HEALTH TEHACHAPI VALLEY,1100 MAGELLAN,TEHACHAPI,93561,Critical Access Hospitals,6029,...,19.428571,16.714286,11.428571,0.0,4.0,2.571429,2.285714,11.428571,0.588235,0.860294
233,50758,2020-12-04,CA,50758,MONTCLAIR HOSPITAL MEDICAL CENTER,5000 SAN BERNARDINO ST,MONTCLAIR,91763,Short Term,6071,...,54.857143,43.142857,30.142857,0.0,10.0,9.571429,9.571429,30.142857,0.549479,0.786458
242,50709,2020-12-04,CA,50709,DESERT VALLEY HOSPITAL,16850 BEAR VALLEY RD,VICTORVILLE,92395,Short Term,6071,...,159.857143,123.571429,85.142857,0.0,21.285714,20.857143,16.714286,85.142857,0.532618,0.773012
56,50121,2020-12-04,CA,50121,ADVENTIST HEALTH HANFORD,115 MALL DRIVE,HANFORD,93230,Short Term,6031,...,147.285714,132.285714,73.0,0.0,19.142857,16.142857,8.714286,73.0,0.495635,0.898157


### Chart

In [546]:
# hospitalization_pivot = (
#     ca_latest_regions_metrics[
#         [
#             "hospital_name",
#             "county",
#             "total_covid_patients_7_day_avg_lat",
#             # "non_covid_patients_7_day_avg_lat",
#             "total_available_beds_7_day_avg_lat",
#         ]
#     ]
#     .set_index(["hospital_name", "county"])
#     .stack()
#     .reset_index()
#     .rename(columns={"level_1": "type", 0: "patients"})
# )

In [547]:
# segments = [
#     "total_covid_patients_7_day_avg_lat",
#     # "non_covid_patients_7_day_avg_lat",
#     "total_available_beds_7_day_avg_lat",
# ]

In [548]:
# segment_order = dict((v, i + 1) for i, v in enumerate(segments))

In [549]:
# hospitalization_pivot["segment_order"] = hospitalization_pivot.level_2.map(
#     segment_order
# )

In [550]:
# hospitalization_pivot_sorted = hospitalization_pivot.sort_values(
#     ["county", "hospital_name", "segment_order"]
# )

In [551]:
# alt.Chart(
#     hospitalization_pivot_sorted[
#         hospitalization_pivot_sorted.county == "San Bernardino"
#     ]
# ).mark_bar().encode(
#     x=alt.X("sum(patients)", stack="normalize"),
#     y="hospital_name",
#     color=alt.Color(
#         "level_2",
#         sort=alt.EncodingSortField("segment_order", order="descending"),
#     ),
#     order="segment_order",
#     tooltip=[
#         alt.Tooltip("hospital_name"),
#     ],
# ).properties(
#     title="Percentage of covid patients occupying inpatient beds of county hospitals"
# )

### ICUs

Percent of ICU beds occupied by covid patients

In [573]:
ca_timeseries["pct_icu_beds_occupied_covid"] = (
    ca_timeseries[
        "staffed_icu_adult_patients_confirmed_and_suspected_covid_7_day_lat_avg"
    ]
    / ca_timeseries["total_staffed_adult_icu_beds_7_day_lat_avg"]
)

Percent of ICU beds occuppied

In [574]:
ca_timeseries["pct_icu_beds_occupied"] = (
    ca_timeseries["staffed_adult_icu_bed_occupancy_7_day_lat_avg"]
    / ca_timeseries["total_staffed_adult_icu_beds_7_day_lat_avg"]
)

In [554]:
# icu_pivot = (
#     ca_latest_regions_metrics[
#         [
#             "hospital_name",
#             "county",
#             "staffed_icu_adult_patients_confirmed_and_suspected_covid_7_day_avg_lat",
#             # "non_covid_icu_patients_7_day_avg_lat",
#             "total_available_icu_beds_7_day_avg_lat",
#         ]
#     ]
#     .set_index(["hospital_name", "county"])
#     .stack()
#     .reset_index()
#     .rename(columns={"level_1": "type", 0: "icu_patients"})
# )

In [555]:
# icu_segments = [
#     "staffed_icu_adult_patients_confirmed_and_suspected_covid_7_day_avg_lat",
#     "non_covid_icu_patients_7_day_avg_lat",
#     "total_available_icu_beds_7_day_avg_lat",
# ]

In [556]:
# icu_segment_order = dict((v, i + 1) for i, v in enumerate(icu_segments))

In [557]:
# icu_pivot["segment_order"] = icu_pivot.level_2.map(icu_segment_order)

In [558]:
# icu_pivot_sorted = icu_pivot.sort_values(["county", "hospital_name", "segment_order"])

In [559]:
# alt.Chart(icu_pivot_sorted[icu_pivot_sorted.county == "Los Angeles"]).mark_bar().encode(
#     x=alt.X("sum(icu_patients)", stack="normalize"),
#     y="hospital_name",
#     color=alt.Color(
#         "level_2",
#         sort=alt.EncodingSortField("segment_order", order="descending"),
#     ),
#     order="segment_order",
#     tooltip=[
#         alt.Tooltip("hospital_name"),
#     ],
# ).properties(
#     title="Percentage of covid patients occupying ICU beds of county hospitals"
# )

### Pediatric patients

In [560]:
pediatric_patients = ca_timeseries[
    [
        "hospital_name",
        "total_pediatric_patients_hospitalized_confirmed_and_suspected_covid_7_day_lat_avg",
    ]
]

In [561]:
pediatric_patients_sorted = pediatric_patients[
    ~pediatric_patients[
        "total_pediatric_patients_hospitalized_confirmed_and_suspected_covid_7_day_lat_avg"
    ].isnull()
].sort_values(
    "total_pediatric_patients_hospitalized_confirmed_and_suspected_covid_7_day_lat_avg",
    ascending=False,
)

### Group by county

In [562]:
counties_df = ca_latest_metrics.groupby(["county"]).sum().reset_index()

In [563]:
counties_df["pct_icu_covid"] = (
    counties_df[
        "staffed_icu_adult_patients_confirmed_and_suspected_covid_7_day_lat_avg"
    ]
    / counties_df["total_staffed_adult_icu_beds_7_day_lat_avg"]
)

In [564]:
counties_df["pct_icu_available_7_day_lat_avg"] = (
    counties_df["staffed_adult_icu_bed_occupancy_7_day_lat_avg"]
    / counties_df["total_staffed_adult_icu_beds_7_day_lat_avg"]
)

### Group by region

In [565]:
regions_df = ca_timeseries.groupby(["region"]).sum().reset_index()

In [566]:
regions_df["pct_icu_covid"] = (
    regions_df["staffed_icu_adult_patients_confirmed_and_suspected_covid_7_day_lat_avg"]
    / regions_df["total_staffed_adult_icu_beds_7_day_lat_avg"]
)

In [567]:
regions_df["pct_icu_available_7_day_avg_lat"] = (
    regions_df["staffed_adult_icu_bed_occupancy_7_day_lat_avg"]
    / regions_df["total_staffed_adult_icu_beds_7_day_lat_avg"]
)

In [568]:
# regional_icu_pivot = (
#     regions_df[
#         [
#             "region",
#             "staffed_adult_icu_bed_occupancy_7_day_lat_avg",
#             # "non_covid_icu_patients_7_day_avg_lat",
#             "total_staffed_adult_icu_beds_7_day_lat_avg",
#         ]
#     ]
#     .set_index(["region"])
#     .stack()
#     .reset_index()
#     .rename(columns={"level_1": "type", 0: "icu_patients"})
# )

In [569]:
# icu_segments = [
#     "staffed_adult_icu_bed_occupancy_7_day_lat_avg",
#     # "non_covid_icu_patients_7_day_avg_lat",
#     "total_staffed_adult_icu_beds_7_day_lat_avg",
# ]
# icu_segment_order = dict((v, i + 1) for i, v in enumerate(icu_segments))
# regional_icu_pivot["segment_order"] = regional_icu_pivot.type.map(icu_segment_order)
# regional_icu_pivot_sorted = regional_icu_pivot.sort_values(["region", "segment_order"])

In [570]:
# alt.Chart(regional_icu_pivot_sorted).mark_bar().encode(
#     x=alt.X("sum(icu_patients)", stack="normalize"),
#     y="region",
#     color=alt.Color(
#         "type",
#         sort=alt.EncodingSortField("segment_order", order="descending"),
#     ),
#     order="segment_order",
#     tooltip=[
#         alt.Tooltip("region"),
#     ],
# ).properties(
#     title="Percentage of covid patients occupying ICU beds of county hospitals"
# )

## Trim

In [575]:
timeseries_trimmed = ca_timeseries[
    [
        "hospital_name",
        "collection_week",
        "ccn",
        "county",
        "region",
        "total_beds_7_day_lat_avg",
        "inpatient_beds_used_7_day_lat_avg",
        "total_adult_patients_hospitalized_confirmed_and_suspected_covid_7_day_lat_avg",
        "total_pediatric_patients_hospitalized_confirmed_and_suspected_covid_7_day_lat_avg",
        "total_covid_patients_7_day_avg_lat",
        "pct_beds_occupied_covid",
        "pct_beds_occupied",
        "total_staffed_adult_icu_beds_7_day_lat_avg",
        "staffed_adult_icu_bed_occupancy_7_day_lat_avg",
        "staffed_icu_adult_patients_confirmed_and_suspected_covid_7_day_lat_avg",
        "pct_icu_beds_occupied_covid",
        "pct_icu_beds_occupied",
    ]
]

In [576]:
timeseries_trimmed = timeseries_trimmed.rename(
    columns={
        "collection_week": "week",
        "total_beds_7_day_lat_avg": "total_beds",
        "inpatient_beds_used_7_day_lat_avg": "beds_used",
        "total_adult_patients_hospitalized_confirmed_and_suspected_covid_7_day_lat_avg": "adult_patients_confirmed_and_suspected_covid",
        "total_pediatric_patients_hospitalized_confirmed_and_suspected_covid_7_day_lat_avg": "pediatric_patients_confirmed_and_suspected_covid",
        "total_staffed_adult_icu_beds_7_day_lat_avg": "total_covid_patients",
        "total_staffed_adult_icu_beds_7_day_lat_avg": "total_staffed_icu_beds",
        "staffed_adult_icu_bed_occupancy_7_day_lat_avg": "icu_bed_occupancy",
        "staffed_icu_adult_patients_confirmed_and_suspected_covid_7_day_lat_avg": "icu_adult_patients_confirmed_and_suspected_covid",
    }
)

Get the latest

In [577]:
ca_latest = timeseries_trimmed.copy()

In [578]:
ca_latest = ca_latest[ca_latest.collection_week == ca_latest.collection_week.max()]

## Export

In [None]:
export_options = dict(
    serialize_dates=["week"],
)

In [None]:
df = pd.merge(ca_data, ca_locations, how="left", on="ccn")

In [None]:
ca_locations = raw_locations.copy()

# zero fill the CCN field to match the data
ca_locations["CCN"] = ca_locations["CCN"].str.zfill(6)

# make it match the casing of the data
ca_locations = ca_locations.rename(columns={"CCN": "ccn"})

# filter down to CA
ca_locations = ca_locations[ca_locations["State"] == "CA"]

# we only need a few fields for joining
ca_locations = ca_locations[["ccn", "latitude", "longitude"]]

# for whatever reason the CCN of this hospital has a typo in it, so we
# make a duplicate of it with the bum ID so both will successfully match
# in the event they fix it later
long_beach_memorial_row = ca_locations[ca_locations.ccn == "050485"].copy()
long_beach_memorial_row["ccn"] = "05T485"
ca_locations = ca_locations.append(long_beach_memorial_row)