# Analyzing HHS hospital occupancy data

#### An analysis of HHS hospital data, by @datagraphics and @stiles.

#### **Questions?** [matt.stiles@latimes.com](matt.stiles@latimes.com) \\ 310.529.8749

---

### Import Python tools

In [1]:
%load_ext lab_black

In [2]:
import pandas as pd
import geopandas as gpd
import jenkspy
import matplotlib.pyplot as plt

%matplotlib inline
import json
import numpy as np
import altair as alt
import altair_latimes as lat

alt.themes.register("latimes", lat.theme)
alt.themes.enable("latimes")
pd.options.display.max_columns = 50
pd.options.display.max_rows = 1000
alt.data_transformers.disable_max_rows()

DataTransformerRegistry.enable('default')

---

### Define cities and regions metadata

In [3]:
metadata_df = pd.read_csv("input/city-metadata.csv", dtype={"fips": str})

In [4]:
metadata_df = metadata_df[["county", "fips", "population", "region"]]

In [5]:
socal = ["Los Angeles", "Orange", "Ventura", "San Bernardino", "Riverside"]
bayarea = [
    "Alameda",
    "Contra Costa",
    "Marin",
    "Napa",
    "San Francisco",
    "San Mateo",
    "Santa Clara",
    "Solano",
    "Sonoma",
]

### Get the latest url from the HHS API and read in the latest dataframe

In [6]:
# Data dictionary: https://healthdata.gov/covid-19-reported-patient-impact-and-hospital-capacity-facility-data-dictionary
# Data source: https://healthdata.gov/dataset/covid-19-reported-patient-impact-and-hospital-capacity-facility

In [7]:
metadata = pd.read_json(
# TODO this API will keep working with the addition of the legacy in the domain, but the code needs to be ported to use the new API
    "https://legacy.healthdata.gov/api/3/action/package_show?id=d475cc4e-83cd-4c16-be57-9105f300e0bc&page=0"
).result[0]["resources"]

In [8]:
src = pd.read_csv(
    metadata[0]["url"],
    dtype={"fips_code": str, "zip": str},
    infer_datetime_format=True,
    parse_dates=True,
)

In [9]:
# src = pd.read_csv('https://healthdata.gov/sites/default/files/reported_hospital_capacity_admissions_facility_level_weekly_average_timeseries_20201228.csv',\
# dtype={'fips_code':str, 'zip':str}, infer_datetime_format=True, parse_dates=True)

In [10]:
df = src.copy()

In [11]:
ca_data = pd.DataFrame(df.loc[df.state == "CA"])

In [12]:
ca_data["fips"] = ca_data["fips_code"].str[2:5]

In [13]:
ca_timeseries = pd.merge(
    ca_data, metadata_df, how="left", left_on="fips", right_on="fips"
)

### What's the most recent collection week?

In [14]:
ca_timeseries["collection_week"] = pd.to_datetime(ca_timeseries["collection_week"])

In [15]:
ca_timeseries["collection_week"].max()

Timestamp('2021-01-01 00:00:00')

### Get rid of the -999999 suppressed values

In [16]:
ca_timeseries = ca_timeseries.replace([-999999.0], [0])

In [17]:
ca_timeseries["hospital_name"] = (ca_timeseries["hospital_name"]).str.title()
ca_timeseries["hospital_name"] = (
    (ca_timeseries["hospital_name"])
    .str.replace(" Of ", " of ")
    .str.replace("Hlth", "Health")
    .str.replace(" La", " LA")
)

---

## Calculate some hospitalization/covid rates

In [18]:
ca_timeseries.head()

Unnamed: 0,hospital_pk,collection_week,state,ccn,hospital_name,address,city,zip,hospital_subtype,fips_code,is_metro_micro,total_beds_7_day_avg,all_adult_hospital_beds_7_day_avg,all_adult_hospital_inpatient_beds_7_day_avg,inpatient_beds_used_7_day_avg,all_adult_hospital_inpatient_bed_occupied_7_day_avg,total_adult_patients_hospitalized_confirmed_and_suspected_covid_7_day_avg,total_adult_patients_hospitalized_confirmed_covid_7_day_avg,total_pediatric_patients_hospitalized_confirmed_and_suspected_covid_7_day_avg,total_pediatric_patients_hospitalized_confirmed_covid_7_day_avg,inpatient_beds_7_day_avg,total_icu_beds_7_day_avg,total_staffed_adult_icu_beds_7_day_avg,icu_beds_used_7_day_avg,staffed_adult_icu_bed_occupancy_7_day_avg,...,previous_day_admission_adult_covid_confirmed_40-49_7_day_sum,previous_day_admission_adult_covid_confirmed_50-59_7_day_sum,previous_day_admission_adult_covid_confirmed_60-69_7_day_sum,previous_day_admission_adult_covid_confirmed_70-79_7_day_sum,previous_day_admission_adult_covid_confirmed_80+_7_day_sum,previous_day_admission_adult_covid_confirmed_unknown_7_day_sum,previous_day_admission_pediatric_covid_confirmed_7_day_sum,previous_day_covid_ED_visits_7_day_sum,previous_day_admission_adult_covid_suspected_7_day_sum,previous_day_admission_adult_covid_suspected_18-19_7_day_sum,previous_day_admission_adult_covid_suspected_20-29_7_day_sum,previous_day_admission_adult_covid_suspected_30-39_7_day_sum,previous_day_admission_adult_covid_suspected_40-49_7_day_sum,previous_day_admission_adult_covid_suspected_50-59_7_day_sum,previous_day_admission_adult_covid_suspected_60-69_7_day_sum,previous_day_admission_adult_covid_suspected_70-79_7_day_sum,previous_day_admission_adult_covid_suspected_80+_7_day_sum,previous_day_admission_adult_covid_suspected_unknown_7_day_sum,previous_day_admission_pediatric_covid_suspected_7_day_sum,previous_day_total_ED_visits_7_day_sum,previous_day_admission_influenza_confirmed_7_day_sum,fips,county,population,region
0,50283,2021-01-01,CA,50283,Stanford Health Care - Valleycare,5555 WEST LAS POSITAS BOULEVARD,PLEASANTON,94588,Short Term,6001,True,257.0,253.0,237.0,93.4,93.4,33.1,29.7,0.0,0.0,241.1,38.0,22.0,20.0,16.9,...,0.0,0.0,0.0,7.0,12.0,0.0,0.0,59.0,10.0,0.0,0.0,0.0,0.0,0.0,0.0,5.0,0.0,0.0,0.0,508.0,0.0,1,Alameda,1643700,Bay Area
1,50305,2021-01-01,CA,50305,Alta Bates Summit Medical Center - Alta Bates ...,2450 ASHBY AVENUE,BERKELEY,94705,Short Term,6001,True,403.0,348.0,341.0,144.1,143.7,38.0,36.4,0.0,0.0,396.0,30.0,30.0,15.1,15.1,...,5.0,0.0,6.0,10.0,10.0,0.0,0.0,232.0,19.0,0.0,5.0,4.0,0.0,4.0,0.0,0.0,0.0,0.0,0.0,558.0,0.0,1,Alameda,1643700,Bay Area
2,50320,2021-01-01,CA,50320,Highland Hospital,1411 EAST 31ST STREET,OAKLAND,94602,Short Term,6001,True,432.0,424.0,424.0,169.0,168.0,37.6,35.1,0.0,0.0,432.0,41.0,33.0,24.9,23.9,...,4.0,5.0,7.0,5.0,4.0,0.0,0.0,602.0,12.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1362.0,0.0,1,Alameda,1643700,Bay Area
3,50488,2021-01-01,CA,50488,Eden Medical Center,20103 LAKE CHABOT ROAD,CASTRO VALLEY,94546,Short Term,6001,True,150.4,150.0,139.3,139.6,139.1,44.1,43.4,0.0,0.0,139.7,24.0,24.0,22.7,22.7,...,0.0,0.0,7.0,6.0,6.0,0.0,0.0,323.0,17.0,0.0,0.0,4.0,0.0,0.0,0.0,0.0,4.0,0.0,0.0,669.0,0.0,1,Alameda,1643700,Bay Area
4,50002,2021-01-01,CA,50002,St Rose Hospital,27200 CALAROGA AVE,HAYWARD,94545,Short Term,6001,True,95.0,95.0,95.0,56.6,56.6,24.7,22.9,0.0,0.0,95.0,15.0,15.0,10.6,10.6,...,0.0,0.0,0.0,4.0,0.0,0.0,0.0,58.0,13.0,0.0,0.0,0.0,0.0,0.0,4.0,0.0,0.0,0.0,0.0,413.0,0.0,1,Alameda,1643700,Bay Area


#### How many beds

In [19]:
ca_timeseries["total_beds"] = round(
    (
        ca_timeseries["total_beds_7_day_sum"]
        / ca_timeseries["total_beds_7_day_coverage"]
    ),
    0,
)

#### Calculate daily average of total staffed ICU beds

In [20]:
ca_timeseries["total_staffed_adult_icu_beds"] = round(
    (
        ca_timeseries["total_staffed_adult_icu_beds_7_day_sum"]
        / ca_timeseries["total_staffed_adult_icu_beds_7_day_coverage"]
    ),
    0,
)

#### Calculate daily average of occupied ICU beds

In [21]:
ca_timeseries["total_occupied_adult_icu_beds"] = round(
    (
        ca_timeseries["staffed_adult_icu_bed_occupancy_7_day_sum"]
        / ca_timeseries["staffed_adult_icu_bed_occupancy_7_day_coverage"]
    ),
    0,
)

#### Calculate daily average of COVID patients in the ICU

In [22]:
ca_timeseries["total_covid_icu_patients"] = round(
    (
        ca_timeseries[
            "staffed_icu_adult_patients_confirmed_and_suspected_covid_7_day_sum"
        ]
        / ca_timeseries[
            "staffed_icu_adult_patients_confirmed_and_suspected_covid_7_day_coverage"
        ]
    ),
    0,
)

#### Number of ICU beds available

In [23]:
ca_timeseries["total_available_adult_icu_beds"] = round(
    (
        ca_timeseries["total_staffed_adult_icu_beds"]
        - ca_timeseries["total_occupied_adult_icu_beds"]
    ),
    0,
)

#### Calculate daily ICU occupancy as percentage

In [24]:
ca_timeseries["pct_occupied_adult_icu_beds"] = round(
    (
        ca_timeseries["total_occupied_adult_icu_beds"]
        / ca_timeseries["total_staffed_adult_icu_beds"]
    ),
    2,
)

#### If a hospital reports 0 staffed adult ICU beds, drop them from the dataframe

In [25]:
filtered_timeseries = ca_timeseries[
    (ca_timeseries.total_staffed_adult_icu_beds_7_day_sum.notnull())
    & (ca_timeseries.total_staffed_adult_icu_beds_7_day_sum != 0)
].sort_values("total_staffed_adult_icu_beds_7_day_sum")

#### How many patients?

In [26]:
ca_timeseries["all_patients"] = round(
    (
        ca_timeseries["inpatient_beds_used_7_day_sum"]
        / ca_timeseries["inpatient_beds_used_7_day_coverage"]
    ),
    0,
)

#### How many Covid patients

In [27]:
ca_timeseries["total_adult_covid_patients"] = (
    ca_timeseries[
        "total_adult_patients_hospitalized_confirmed_and_suspected_covid_7_day_sum"
    ]
    / ca_timeseries[
        "total_adult_patients_hospitalized_confirmed_and_suspected_covid_7_day_coverage"
    ]
)

In [28]:
ca_timeseries["total_pediatric_covid_patients"] = (
    ca_timeseries[
        "total_pediatric_patients_hospitalized_confirmed_and_suspected_covid_7_day_sum"
    ]
    / ca_timeseries[
        "total_pediatric_patients_hospitalized_confirmed_and_suspected_covid_7_day_coverage"
    ]
)

In [29]:
ca_timeseries["total_covid_patients"] = round(
    ca_timeseries["total_adult_covid_patients"]
    + ca_timeseries["total_pediatric_covid_patients"],
    0,
).fillna(0)

#### What's the rate of Covid patients?

In [30]:
ca_timeseries["covid_patients_share"] = round(
    (ca_timeseries["total_covid_patients"] / ca_timeseries["all_patients"]), 2
)

In [31]:
ca_timeseries = ca_timeseries[ca_timeseries["all_patients"] > 0]

---

### Trim to the columns we want

In [32]:
trimmed_timeseries = ca_timeseries[
    [
        "hospital_name",
        "hospital_subtype",
        "ccn",
        "collection_week",
        "county",
        "fips",
        "total_beds",
        "total_staffed_adult_icu_beds",
        "total_covid_icu_patients",
        "total_available_adult_icu_beds",
        "pct_occupied_adult_icu_beds",
        "total_covid_patients",
        "all_patients",
        "covid_patients_share",
        "region",
    ]
].rename(
    columns={
        "hospital_name": "hospital",
        "collection_week": "week",
    }
)

In [33]:
trimmed_timeseries.covid_patients_share.mean()

0.15842891203413226

---

### Filter the dataframe for Los Angeles County facilities

In [34]:
la = trimmed_timeseries[trimmed_timeseries["fips"] == "037"]
oc = trimmed_timeseries[trimmed_timeseries["fips"] == "059"]

### Filter the California dataframe to the most recent collection week

In [35]:
current_ca = trimmed_timeseries[
    trimmed_timeseries["week"] == trimmed_timeseries["week"].max()
].sort_values("covid_patients_share", ascending=False)

In [36]:
current_la = la[la["week"] == la["week"].max()].sort_values(
    "covid_patients_share", ascending=False
)

In [37]:
current_oc = oc[oc["week"] == oc["week"].max()].sort_values(
    "covid_patients_share", ascending=False
)

In [38]:
current_ca.to_csv("output/current_ca.csv", index=False)

### Specific place?

In [39]:
current_ca[current_ca["hospital"].str.contains("Methodist Hospital")].iloc[0]

hospital                          Methodist Hospital of Southern Ca
hospital_subtype                                         Short Term
ccn                                                          050238
week                                            2021-01-01 00:00:00
county                                                  Los Angeles
fips                                                            037
total_beds                                                   256.00
total_staffed_adult_icu_beds                                  21.00
total_covid_icu_patients                                      21.00
total_available_adult_icu_beds                                 1.00
pct_occupied_adult_icu_beds                                    0.95
total_covid_patients                                         143.00
all_patients                                                 211.00
covid_patients_share                                           0.68
region                                          

### Which CA hospitals have the greatest share of covid patients? 

In [118]:
current_ca[
    (current_ca["hospital_subtype"] != "Long Term")
    & (current_ca["total_covid_icu_patients"] > 0)
][
    [
        "hospital",
        "total_beds",
        "total_covid_patients",
        "all_patients",
        "covid_patients_share",
        "region",
        "county",
    ]
].sort_values(
    "covid_patients_share", ascending=False
).tail(
    10
)

Unnamed: 0,hospital,total_beds,total_covid_patients,all_patients,covid_patients_share,region,county
8,Alta Bates Summit Medical Center,296.0,25.0,191.0,0.13,Bay Area,Alameda
234,Loma Linda University Children'S Hospital,363.0,29.0,217.0,0.13,Southern California,San Bernardino
273,Chinese Hospital,92.0,5.0,42.0,0.12,Bay Area,San Francisco
266,California Pacific Medical Center- Van Ness Ca...,301.0,23.0,188.0,0.12,Bay Area,San Francisco
26,Sutter Coast Hospital,51.0,3.0,31.0,0.1,Northern California,Del Norte
58,Adventist Health Clearlake,37.0,2.0,21.0,0.1,Northern California,Lake
88,City of Hope Helford Clinical Research Hospital,208.0,19.0,189.0,0.1,Southern California,Los Angeles
324,Sonoma Valley Hospital,44.0,2.0,26.0,0.08,Bay Area,Sonoma
246,Hi-Desert Medical Center,133.0,8.0,129.0,0.06,Southern California,San Bernardino
41,St Joseph Hospital,182.0,6.0,117.0,0.05,Northern California,Humboldt


### Which facilities in LA have the greatest share of covid patients? 

In [41]:
current_la[
    (current_la["hospital_subtype"] != "Long Term")
    & (current_la["total_covid_icu_patients"] > 0)
][
    [
        "hospital",
        "total_beds",
        "total_covid_patients",
        "all_patients",
        "covid_patients_share",
    ]
].sort_values(
    "covid_patients_share", ascending=False
).head(
    10
)

Unnamed: 0,hospital,total_beds,total_covid_patients,all_patients,covid_patients_share
102,Kaiser Foundation Hospital - Downey,447.0,231.0,256.0,0.9
120,Kaiser Foundation Hospital - Baldwin Park,311.0,179.0,202.0,0.89
125,Palmdale Regional Medical Center,184.0,117.0,137.0,0.85
67,Community Hospital of Huntington Park,65.0,36.0,44.0,0.82
112,Kaiser Foundation Hospital - West LA,318.0,133.0,167.0,0.8
92,Kaiser Foundation Hospital - Panorama City,293.0,152.0,202.0,0.75
69,San Gabriel Valley Medical Center,111.0,63.0,84.0,0.75
89,"Martin Luther King, Jr. Community Hospital",233.0,156.0,209.0,0.75
133,San Dimas Community Hospital,164.0,49.0,65.0,0.75
122,Alhambra Hospital Medical Center,101.0,51.0,68.0,0.75


### Which places have the lowest share? 

In [42]:
current_la[
    (current_la["hospital_subtype"] != "Long Term")
    & (current_la["total_covid_icu_patients"] > 0)
][
    [
        "hospital",
        "total_beds",
        "total_covid_patients",
        "all_patients",
        "covid_patients_share",
    ]
].sort_values(
    "covid_patients_share", ascending=False
).tail(
    10
)

Unnamed: 0,hospital,total_beds,total_covid_patients,all_patients,covid_patients_share
104,St Mary Medical Center,406.0,91.0,221.0,0.41
137,Cedars-Sinai Medical Center,928.0,336.0,830.0,0.4
68,Santa Monica - Ucla Med Ctr & Orthopaedic Hosp...,302.0,95.0,243.0,0.39
96,Encino Hospital Medical Center,70.0,25.0,70.0,0.36
116,Memorial Hospital of Gardena,186.0,59.0,165.0,0.36
74,Keck Hospital of Usc,401.0,62.0,202.0,0.31
107,Ronald Reagan U C L A Medical Center,457.0,110.0,385.0,0.29
139,College Medical Center,235.0,38.0,129.0,0.29
140,Lac/Rancho Los Amigos National Rehabilitation ...,166.0,27.0,145.0,0.19
88,City of Hope Helford Clinical Research Hospital,208.0,19.0,189.0,0.1


### Which facilities in OC have the greatest share of covid patients? 

In [43]:
current_oc[
    (current_oc["hospital_subtype"] != "Long Term")
    & (current_oc["total_covid_icu_patients"] > 0)
][
    ["hospital", "total_covid_patients", "all_patients", "covid_patients_share"]
].sort_values(
    "covid_patients_share", ascending=False
).head(
    5
)

Unnamed: 0,hospital,total_covid_patients,all_patients,covid_patients_share
178,Garden Grove Hospital & Medical Center,48.0,57.0,0.84
185,Kaiser Foundation Hospital - Orange County - A...,254.0,351.0,0.72
172,Ahmc Anaheim Regional Medical Center,96.0,137.0,0.7
193,Providence St. Jude Medical Center,210.0,305.0,0.69
184,Huntington Beach Hospital,44.0,64.0,0.69


In [44]:
current_oc[
    (current_oc["hospital_subtype"] != "Long Term")
    & (current_oc["total_covid_icu_patients"] > 0)
][
    [
        "hospital",
        "total_beds",
        "total_covid_patients",
        "all_patients",
        "covid_patients_share",
    ]
].sort_values(
    "covid_patients_share", ascending=False
).tail(
    10
)

Unnamed: 0,hospital,total_beds,total_covid_patients,all_patients,covid_patients_share
169,West Anaheim Medical Center,166.0,58.0,116.0,0.5
176,Foothill Regional Medical Center,125.0,34.0,75.0,0.45
183,Memorialcare Saddleback Medical Center,234.0,69.0,153.0,0.45
179,South Coast Global Medical Center,180.0,48.0,111.0,0.43
175,Providence Mission Hospital,337.0,113.0,274.0,0.41
168,University of California Irvine Medical Center,439.0,167.0,405.0,0.41
182,Hoag Memorial Hospital Presbyterian,441.0,172.0,426.0,0.4
166,Chapman Global Medical Center,78.0,23.0,62.0,0.37
181,Anaheim Global Medical Center,189.0,37.0,131.0,0.28
186,Orange County Global Medical Center,425.0,83.0,304.0,0.27


In [45]:
current_la["covid_patients_share"] = round(
    (current_la["covid_patients_share"] * 100), 2
)

### Output top LA hospitals for CMS table

In [46]:
current_la[
    (current_la["hospital_subtype"] != "Long Term")
    & (current_la["total_covid_icu_patients"] > 0)
][["hospital", "total_covid_patients", "covid_patients_share"]].sort_values(
    "covid_patients_share", ascending=False
).head(
    10
).to_csv(
    "output/current_la_hospitals.csv", index=False
)

### Which facilities in LA have the highest ICU capacity rates? 

In [47]:
current_la[(current_la["total_covid_icu_patients"] > 0)].sort_values(
    "pct_occupied_adult_icu_beds", ascending=False
).head(10)

Unnamed: 0,hospital,hospital_subtype,ccn,week,county,fips,total_beds,total_staffed_adult_icu_beds,total_covid_icu_patients,total_available_adult_icu_beds,pct_occupied_adult_icu_beds,total_covid_patients,all_patients,covid_patients_share,region
137,Cedars-Sinai Medical Center,Short Term,50625,2021-01-01,Los Angeles,37,928.0,107.0,80.0,-39.0,1.36,336.0,830.0,40.0,Southern California
102,Kaiser Foundation Hospital - Downey,Short Term,50139,2021-01-01,Los Angeles,37,447.0,58.0,52.0,0.0,1.0,231.0,256.0,90.0,Southern California
96,Encino Hospital Medical Center,Short Term,50158,2021-01-01,Los Angeles,37,70.0,6.0,3.0,0.0,1.0,25.0,70.0,36.0,Southern California
120,Kaiser Foundation Hospital - Baldwin Park,Short Term,50723,2021-01-01,Los Angeles,37,311.0,32.0,26.0,0.0,1.0,179.0,202.0,89.0,Southern California
111,Providence Little Company of Mary Med Ctr Torr...,Short Term,50353,2021-01-01,Los Angeles,37,334.0,33.0,23.0,0.0,1.0,164.0,279.0,59.0,Southern California
75,Whittier Hospital Medical Center,Short Term,50735,2021-01-01,Los Angeles,37,183.0,25.0,17.0,0.0,1.0,69.0,127.0,54.0,Southern California
70,Beverly Hospital,Short Term,50350,2021-01-01,Los Angeles,37,182.0,25.0,19.0,0.0,1.0,81.0,136.0,60.0,Southern California
95,Northridge Hospital Medical Center,Short Term,50116,2021-01-01,Los Angeles,37,325.0,45.0,33.0,0.0,1.0,130.0,257.0,51.0,Southern California
65,Pacifica Hospital of The Valley,Short Term,50378,2021-01-01,Los Angeles,37,118.0,13.0,4.0,0.0,1.0,32.0,64.0,50.0,Southern California
115,Usc Verdugo Hills Hospital,Short Term,50124,2021-01-01,Los Angeles,37,130.0,15.0,10.0,0.0,1.0,63.0,126.0,50.0,Southern California


### How many LA County hospitals (with Covid patients) are in our dataframe? 

In [48]:
len(current_la[(current_la["total_covid_icu_patients"] > 0)])

73

### How many have 'high' ICU occupancy rates? 

In [49]:
len(current_la[current_la["pct_occupied_adult_icu_beds"] > 0.90])

49

### Top 10 Bay Area hospotals by Covid share? 

In [50]:
current_ca[current_ca["county"].isin(bayarea)].sort_values(
    "covid_patients_share", ascending=False
).head(10)

Unnamed: 0,hospital,hospital_subtype,ccn,week,county,fips,total_beds,total_staffed_adult_icu_beds,total_covid_icu_patients,total_available_adult_icu_beds,pct_occupied_adult_icu_beds,total_covid_patients,all_patients,covid_patients_share,region
302,Kaiser Foundation Hospital-San Jose,Short Term,50604,2021-01-01,Santa Clara,85,175.0,20.0,16.0,2.0,0.9,97.0,159.0,0.61,Bay Area
299,Regional Medical Center of San Jose,Short Term,50125,2021-01-01,Santa Clara,85,286.0,47.0,29.0,0.0,1.0,118.0,242.0,0.49,Bay Area
289,Kaiser Foundation Hospital - South San Francisco,Short Term,50070,2021-01-01,San Mateo,81,98.0,13.0,6.0,1.0,0.92,43.0,89.0,0.48,Bay Area
6,Alameda Hospital,Short Term,50211,2021-01-01,Alameda,1,90.0,8.0,4.0,1.0,0.88,20.0,43.0,0.47,Bay Area
291,Seton Medical Center,Short Term,50289,2021-01-01,San Mateo,81,92.0,16.0,6.0,6.0,0.62,33.0,73.0,0.45,Bay Area
4,St Rose Hospital,Short Term,50002,2021-01-01,Alameda,1,95.0,15.0,7.0,4.0,0.73,25.0,57.0,0.44,Bay Area
11,Kaiser Foundation Hospital - San Leandro,Short Term,50777,2021-01-01,Alameda,1,182.0,19.0,13.0,1.0,0.95,73.0,166.0,0.44,Bay Area
287,San Mateo Medical Center,Short Term,50113,2021-01-01,San Mateo,81,54.0,7.0,5.0,0.0,1.0,17.0,40.0,0.42,Bay Area
23,Sutter Delta Medical Center,Short Term,50523,2021-01-01,Contra Costa,13,148.0,12.0,5.0,0.0,1.0,43.0,103.0,0.42,Bay Area
321,Healdsburg District Hospital,Critical Access Hospitals,51321,2021-01-01,Sonoma,97,28.0,5.0,0.0,5.0,0.0,4.0,10.0,0.4,Bay Area


---

## Get medical facilities' geographic data

In [51]:
# From HHS: https://maps3.arcgisonline.com/arcgis/rest/services/A-16/HHS_IOM_Health_Resources/MapServer

In [52]:
hospitals = gpd.read_file(
    "/Users/mhustiles/data/github/AGStoShapefile/backupdir/HHS_Hospital/HHS_Hospital_1608139617293.geojson"
)

In [53]:
medical_centers = gpd.read_file(
    "/Users/mhustiles/data/github/AGStoShapefile/backupdir/HHS_Medical_Center/HHS_Medical_Center_1608139616289.geojson"
)

### Clean up the column names so we can merge the facilities dataframes

In [54]:
facilities = [hospitals, medical_centers]

In [55]:
hospitals.rename(
    columns={
        "Name_new": "name",
        "Address_1": "address",
        "City": "city",
        "State_1": "state",
        "ZipCode": "zipcode",
        "PhoneNum": "phone",
        "County_Nam": "county",
        "Provider_N": "provider_id",
        "Hospital_T": "type",
        "Hospital_O": "operation",
        "Emergency_": "emergency",
    },
    inplace=True,
)

In [56]:
medical_centers.rename(
    columns={
        "Hospital_N": "name",
        "Address1": "address",
        "City_1": "city",
        "State_1": "state",
        "ZipCode": "zipcode",
        "PhoneNum": "phone",
        "County_Nam": "county",
        "Provider_N": "provider_id",
        "Hospital_T": "type",
        "Hospital_O": "operation",
        "Emergency_": "emergency",
    },
    inplace=True,
)

### Concatenate the different facility types into one dataframe, and also filter that just to CA

In [57]:
all_medical_geo = pd.concat(facilities)

In [58]:
all_medical_geo_ca = all_medical_geo[all_medical_geo["state"] == "CA"]

### We might be better off using HHS' own locations data with CCNs

In [59]:
locations = pd.read_csv(
    "input/hospital_locations.csv",
    dtype={"latitude": float, "longitude": float, "CCN": str},
)

In [60]:
locations.columns = (
    locations.columns.str.strip()
    .str.lower()
    .str.replace(" ", "_", regex=False)
    .str.replace("(", "", regex=False)
    .str.replace(")", "", regex=False)
    .str.replace("-", "_", regex=False)
)

### Even though it has the wrong location for White Memorial. Let's fix.

In [61]:
locations.loc[
    (locations.facility_name == "WHITE MEMORIAL MEDICAL CENTER"), "longitude"
] = -118.2176219

In [62]:
locations.loc[
    (locations.facility_name == "WHITE MEMORIAL MEDICAL CENTER"), "latitude"
] = 34.0493044

### Convert the lon/lat fields into a geodataframe

In [63]:
locations = gpd.GeoDataFrame(
    locations, geometry=gpd.points_from_xy(locations.longitude, locations.latitude)
)

### And then confine it to California

In [64]:
locationsca = gpd.GeoDataFrame(locations[locations["state"] == "CA"])

### Make the CCN string match the hospital capacity data

In [65]:
locationsca["ccn"] = locationsca["ccn"].str.zfill(6)

---

### Merge with our CA medical facilities

In [66]:
current_ca_geo = locationsca.merge(current_ca, left_on="ccn", right_on="ccn")

In [67]:
len(current_ca_geo)

347

In [68]:
current_ca_geo.rename(
    columns={
        "county_x": "county",
        "city_x": "city",
        "address_x": "address",
        "state_x": "state",
    },
    inplace=True,
)

In [69]:
# current_ca_geo.drop(['county_y', 'hospital_subtype', 'provider_id', 'ccn', 'emergency', 'address', 'OBJECTID', 'state', 'name', 'phone', 'zipcode'], axis=1, inplace=True)

---

### Filter to just LA County hospitals

In [70]:
current_la_geo = current_ca_geo[current_ca_geo["fips"] == "037"]

### How many facilities in California?

In [71]:
len(current_ca_geo)

347

### How many in L.A. County?

In [72]:
len(current_la_geo)

84

---

### Export hospital specific data for maps and tables

In [73]:
current_ca_geo.to_file("output/current_ca_geo.geojson", driver="GeoJSON")

In [74]:
current_ca_geo.to_csv("output/current_ca_geo.csv", index=False)

In [75]:
current_la_geo.to_file("output/current_la_geo.geojson", driver="GeoJSON")

In [76]:
current_la_geo.to_csv("output/current_la_geo.csv", index=False)

---

## Aggregate

### What's the average share of Covid patients in hospitals statewide?

In [77]:
state = (
    current_ca.groupby(["week"])
    .agg({"total_covid_patients": "sum", "all_patients": "sum"})
    .reset_index()
)

In [78]:
state["covid_patients_share"] = round(
    (state["total_covid_patients"] / state["all_patients"]), 2
)

In [79]:
state.head()

Unnamed: 0,week,total_covid_patients,all_patients,covid_patients_share
0,2021-01-01,21970.0,51255.0,0.43


### Covid patients by county

In [80]:
counties = (
    trimmed_timeseries.groupby(["county", "week", "region"])
    .agg({"total_covid_patients": "sum", "all_patients": "sum"})
    .reset_index()
)

In [81]:
counties["covid_patients_share"] = round(
    (counties["total_covid_patients"] / counties["all_patients"]), 2
)

#### Which counties have the most Covid patients (in most recent week)?

In [82]:
counties[counties["week"] == counties["week"].max()].sort_values(
    "total_covid_patients", ascending=False
).head(5)

Unnamed: 0,county,week,region,total_covid_patients,all_patients,covid_patients_share
409,Los Angeles,2021-01-01,Southern California,8200.0,15660.0,0.52
662,Orange,2021-01-01,Southern California,2185.0,4533.0,0.48
799,San Bernardino,2021-01-01,Southern California,1781.0,3230.0,0.55
731,Riverside,2021-01-01,Southern California,1637.0,2860.0,0.57
822,San Diego,2021-01-01,Southern California,1618.0,4302.0,0.38


#### Which counties have the highest percentage of Covid patients (in most recent week)?

In [83]:
counties[counties["week"] == counties["week"].max()].sort_values(
    "total_covid_patients", ascending=False
).head(5)

Unnamed: 0,county,week,region,total_covid_patients,all_patients,covid_patients_share
409,Los Angeles,2021-01-01,Southern California,8200.0,15660.0,0.52
662,Orange,2021-01-01,Southern California,2185.0,4533.0,0.48
799,San Bernardino,2021-01-01,Southern California,1781.0,3230.0,0.55
731,Riverside,2021-01-01,Southern California,1637.0,2860.0,0.57
822,San Diego,2021-01-01,Southern California,1618.0,4302.0,0.38


### How has this changed over time? 

In [84]:
counties_grouped = (
    counties.groupby(["county", "week", "region"])
    .agg({"total_covid_patients": "sum", "covid_patients_share": "mean"})
    .reset_index()
)

In [85]:
la_grouped = pd.DataFrame(counties_grouped[counties_grouped["county"] == "Los Angeles"])

In [86]:
la_grouped.tail()

Unnamed: 0,county,week,region,total_covid_patients,covid_patients_share
405,Los Angeles,2020-12-04,Southern California,3666.0,0.26
406,Los Angeles,2020-12-11,Southern California,5038.0,0.34
407,Los Angeles,2020-12-18,Southern California,6497.0,0.43
408,Los Angeles,2020-12-25,Southern California,7584.0,0.5
409,Los Angeles,2021-01-01,Southern California,8200.0,0.52


In [87]:
la_grouped.to_csv("output/la_grouped.csv", index=False)

In [88]:
alt.Chart(la_grouped).mark_area().encode(
    x=alt.X("week:T", axis=alt.Axis(format="", title=" ", tickCount=3)),
    y=alt.Y(
        "covid_patients_share",
        title="Covid patients in LA County hospitals",
        axis=alt.Axis(format="%", tickCount=6),
    ),
).properties(width=500, height=500)

### Just southern California counties? 

In [89]:
socal_grouped = (
    counties_grouped[counties_grouped["county"].isin(socal)]
    .groupby(["county", "week"])
    .agg({"total_covid_patients": "sum", "covid_patients_share": "mean"})
    .reset_index()
)

In [90]:
alt.Chart(socal_grouped).mark_area().encode(
    x=alt.X("week:T", axis=alt.Axis(format="", title=" ", tickCount=3)),
    y=alt.Y(
        "covid_patients_share",
        title=" ",
        axis=alt.Axis(format="%", title="", tickCount=6),
    ),
    facet=alt.Facet("county"),
).properties(
    width=200, height=200, title="Share of all patients with Covid in SoCal Counties"
)

### Bay Area? 

In [91]:
bayarea_grouped = (
    counties_grouped[counties_grouped["county"].isin(bayarea)]
    .groupby(["week"])
    .agg({"total_covid_patients": "sum", "covid_patients_share": "mean"})
    .reset_index()
)

In [92]:
bayarea_grouped.tail()

Unnamed: 0,week,total_covid_patients,covid_patients_share
18,2020-12-04,1183.0,0.146667
19,2020-12-11,1515.0,0.182222
20,2020-12-18,1806.0,0.21
21,2020-12-25,2001.0,0.235556
22,2021-01-01,2163.0,0.254444


In [93]:
alt.Chart(bayarea_grouped).mark_area().encode(
    x=alt.X("week:T", axis=alt.Axis(format="", title=" ", tickCount=3)),
    y=alt.Y(
        "covid_patients_share",
        title=" ",
        axis=alt.Axis(
            format="%",
            title="Share of all patients with Covid in Bay Area",
            tickCount=6,
        ),
    ),
).properties(width=500, height=500)

---

### What's going on in the most-populous counties 

In [94]:
counties_pop = pd.read_csv(
    "../census/processed/acs5_2018_population_counties.csv",
    dtype={"geoid": str, "population": int, "state": str, "county": str},
)

In [95]:
counties_pop.rename(columns={"universe": "population"}, inplace=True)

In [96]:
counties_pop.drop(
    ["universe_annotation", "universe_moe", "universe_moe_annotation"],
    axis=1,
    inplace=True,
)

In [97]:
ca_counties_pop = counties_pop[counties_pop["state"] == "06"]

In [98]:
big_ca_counties_pop = ca_counties_pop.sort_values("population", ascending=False).head(
    10
)

In [99]:
big_ca_counties_pop["name"] = big_ca_counties_pop["name"].str.replace(
    " County, California", ""
)

In [100]:
big_counties = list(big_ca_counties_pop.name)

In [101]:
big_counties_grouped = (
    counties_grouped[counties_grouped["county"].isin(big_counties)]
    .groupby(["county", "week", "region"])
    .agg({"total_covid_patients": "sum", "covid_patients_share": "mean"})
    .reset_index()
)

In [102]:
chart = (
    alt.Chart(big_counties_grouped)
    .mark_area()
    .encode(
        x=alt.X(
            "week:T", axis=alt.Axis(grid=False, format="%b.", title=" ", tickCount=4)
        ),
        y=alt.Y(
            "covid_patients_share",
            title=" ",
            axis=alt.Axis(
                format="%",
                title="",
                tickCount=5,
                gridColor="#dddddd",
                offset=6,
                tickSize=0,
                domainOpacity=0,
            ),
        ),
        facet=alt.Facet(
            "county",
            columns=5,
            title=" ",
            sort=alt.EncodingSortField(
                "covid_patients_share", op="max", order="descending"
            ),
        ),
        color=alt.Color("region", legend=alt.Legend(orient="top", title=" ")),
    )
    .properties(
        width=200,
        height=200,
        title="Share of all hospital patients with COVID-19 in large California counties",
    )
)

chart.configure_view(strokeOpacity=0)

---

### Regions

In [103]:
regions_timeseries = (
    trimmed_timeseries.groupby(["week", "region"])
    .agg({"total_covid_patients": "sum", "all_patients": "sum", "total_beds": "sum"})
    .reset_index()
)

In [104]:
regions_timeseries["covid_patients_share"] = round(
    (regions_timeseries["total_covid_patients"] / regions_timeseries["all_patients"]), 2
)

In [105]:
regions_timeseries.tail(5)

Unnamed: 0,week,region,total_covid_patients,all_patients,total_beds,covid_patients_share
110,2021-01-01,Bay Area,2442.0,9117.0,12932.0,0.27
111,2021-01-01,Greater Sacramento,969.0,3680.0,4829.0,0.26
112,2021-01-01,Northern California,119.0,785.0,1269.0,0.15
113,2021-01-01,San Joaquin Valley,2211.0,5243.0,6461.0,0.42
114,2021-01-01,Southern California,16229.0,32430.0,44439.0,0.5


In [106]:
regions_timeseries[
    regions_timeseries["week"] == "2021-01-01"
].covid_patients_share.mean()

0.32

In [107]:
chart = (
    alt.Chart(regions_timeseries)
    .mark_area()
    .encode(
        x=alt.X(
            "week:T", axis=alt.Axis(grid=False, format="%b.", title=" ", tickCount=4)
        ),
        y=alt.Y(
            "covid_patients_share",
            title=" ",
            axis=alt.Axis(
                format="%",
                title="",
                tickCount=5,
                gridColor="#dddddd",
                offset=6,
                tickSize=0,
                domainOpacity=0,
            ),
        ),
        facet=alt.Facet(
            "region",
            columns=5,
            title=" ",
            sort=alt.EncodingSortField(
                "covid_patients_share", op="max", order="descending"
            ),
        ),
        #     color=alt.Color('region', legend=alt.Legend(orient="top", title=' '))
    )
    .properties(
        width=200,
        height=200,
        title="Share of all hospital patients with COVID-19 by region",
    )
)

chart.configure_view(strokeOpacity=0)

---

### Hospital overcapacity

In [108]:
before = pd.read_csv("input/hospital_utilization.csv", dtype={"fac_no": str})

In [109]:
before[before["fac_name"].str.contains("KING")]

Unnamed: 0,fac_no,fac_name,fac_city,county,license_no,fac_zip,icu_beds,tot_lic_beds
185,106191230,"MARTIN LUTHER KING, JR. COMMUNITY HOSPITAL",LOS ANGELES,Los Angeles,60000132,90059,20,131


In [110]:
current_ca[current_ca["ccn"] == "050779"]

Unnamed: 0,hospital,hospital_subtype,ccn,week,county,fips,total_beds,total_staffed_adult_icu_beds,total_covid_icu_patients,total_available_adult_icu_beds,pct_occupied_adult_icu_beds,total_covid_patients,all_patients,covid_patients_share,region
89,"Martin Luther King, Jr. Community Hospital",Short Term,50779,2021-01-01,Los Angeles,37,233.0,28.0,14.0,1.0,0.96,156.0,209.0,0.75,Southern California


In [111]:
before[["fac_name"]].sort_values("fac_name", ascending=False).to_csv(
    "output/before_names.csv", index=False
)

In [112]:
current_ca[["hospital"]].sort_values("hospital", ascending=False).to_csv(
    "output/current_names.csv", index=False
)