In [None]:
import json
from collections import defaultdict

import numpy as np

from covidcaremap.constants import *
from covidcaremap.data import published_data_path, processed_data_path

## Compute Quantile Breaks

**Note:** This is a work in progress. Currently it just computes the min and max values.

In [None]:
with open(published_data_path('us_healthcare_capacity-county-CovidCareMap.geojson')) as f:
    by_county = json.loads(f.read())
    
with open(published_data_path('us_healthcare_capacity-state-CovidCareMap.geojson')) as f:
    by_state = json.loads(f.read())
    
with open(published_data_path('us_healthcare_capacity-hrr-CovidCareMap.geojson')) as f:
    by_hrr = json.loads(f.read())
    
with open(published_data_path('us_healthcare_capacity-facility-CovidCareMap.geojson')) as f:
    by_facility = json.loads(f.read())

In [None]:
def get_prop_values(features, columns):
    values = defaultdict(list)
    for feat in features:
        for prop in columns:
            v = feat['properties'][prop]
            if v is not None and not np.isnan(v):
                # If this is an occupancy value, then it should not 
                # be greater than 1.
                if 'Occupancy' in prop and v > 1.0:
                    v = 1.0
                    
                values[prop].append(v)
                
    return dict(values)

def compute_breaks(features, columns):
    prop_values = get_prop_values(features, columns)
    for k in prop_values:
        prop_values[k] = np.quantile(prop_values[k], [0, 0.25, 0.5, 0.75, 1], axis = 0).tolist()
    return prop_values

In [None]:
facility_breaks = compute_breaks(by_facility['features'], columns=(
    CCM_FACILITY_COUNT_COLUMNS + 
    list(CCM_FACILITY_OCCUPANCY_COLUMNS.keys())
))

with open(processed_data_path('ccm_facility_breaks.json'), 'w') as f:
          f.write(json.dumps(facility_breaks, indent=4))

In [None]:
county_breaks = compute_breaks(by_county['features'], columns=(
    CCM_FACILITY_COUNT_COLUMNS + 
    list(CCM_FACILITY_OCCUPANCY_COLUMNS.keys()) +
    CCM_PER_CAPITA_COLUMNS
))

with open(processed_data_path('ccm_county_breaks.json'), 'w') as f:
          f.write(json.dumps(county_breaks, indent=4))

In [None]:
state_breaks = compute_breaks(by_state['features'], columns=(
    CCM_FACILITY_COUNT_COLUMNS + 
    list(CCM_FACILITY_OCCUPANCY_COLUMNS.keys()) +
    CCM_PER_CAPITA_COLUMNS
))

with open(processed_data_path('ccm_state_breaks.json'), 'w') as f:
          f.write(json.dumps(state_breaks, indent=4))

In [None]:
hrr_breaks = compute_breaks(by_hrr['features'], columns=(
    CCM_FACILITY_COUNT_COLUMNS + 
    list(CCM_FACILITY_OCCUPANCY_COLUMNS.keys()) +
    CCM_PER_CAPITA_COLUMNS
))

with open(processed_data_path('ccm_hrr_breaks.json'), 'w') as f:
          f.write(json.dumps(hrr_breaks, indent=4))