# Test DataIO Python Library routines

This Jupyter notebook can be used to test the recently written dataIO python functions, which have been collected into the data_IO library.

In [65]:
# This forces a reload of any external library file if it changes.  
# Useful when developing external libraries since otherwise Jupyter 
# will not re-import any library without restarting the python kernel.

%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [66]:
import os
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
import pandas as pd

# Import COVID data IO routines from external python library
import COVIDlib.data_IO as COVID_IO

## Define variables of interest below
data_dir = 'our_data/'    # Data directory for the COVID datafiles
test_dir = 'test_data/'   # Data directory for storing test datafiles

## Define FIPS corresponding to various local areas
ClayFIPS = 27027
CassFIPS = 38017
MNFIPS = 27
NDFIPS = 38

# Test John Hopkins DataIO

Execute and test the John Hopkins DataIO routines first authored by Luke

In [3]:
## 
## Retrieve the John Hopkins data
##

# Retrieve John Hopkins dataframes
(JH_state_df, JH_cnty_df) = COVID_IO.GetCDRDataFrames()
# Retrieve State-Level data for Minnesota
MN_df = COVID_IO.GetCDRState(MNFIPS, JH_state_df)
# Retrieve County-level data for Clay County
CLAY_df = COVID_IO.GetCDRCounty(ClayFIPS, JH_cnty_df)

# Retrieve Test John Hopkins dataframes
test_cntyfile = test_dir+"TEST_countylevel_combinedCDR.csv"
test_statefile = test_dir+"TEST_statelevel_combinedCDR.csv"
(testJH_state_df, testJH_cnty_df) = COVID_IO.GetCDRDataFrames(stateFile = test_statefile, countyFile = test_cntyfile)
# Retrieve TEST State-Level data for Minnesota
TESTMN_df = COVID_IO.GetCDRState(MNFIPS, testJH_state_df)
# Retrieve TEST County-level data for Clay County
TESTCLAY_df = COVID_IO.GetCDRCounty(ClayFIPS, testJH_cnty_df)

ValueError: invalid literal for int() with base 10: '842.0'

In [None]:
##
## The Clay county data should have a steady 10 confirmed per day starting March 22, 2020, so the number of confirmed increases as 10 cases per day (means dConfirmed is 10/day)
## First death occurs 14 days later (since I "kill" 10% of the infected at the end of 14 days, the other 90% are recovered).  Check this!

##
## The Minnesota data should have an unreal situation of 10 additional new cases a day from March 22 to June 1.  We still "kill" 10% of the infected 14 days later and mark the
## other 90% "recovered."  Check this.

## Test Apple and Google Mobility DataIO

Execute and test the Apple and Google Mobility DataIO routines first authored by Dio

In [None]:
## 
## Retrieve the Apple Mobility Data
##

# Retrieve Apple Mobility Dataframe
(aapl_cnty_df, aapl_state_df) = COVID_IO.initAaplMobilityDataframes()
# Get real Clay county and Minnesota mobility data
aapl_CLAY_df = COVID_IO.getAaplCountyMobility(ClayFIPS, aapl_cnty_df)
aapl_MN_df = COVID_IO.getAaplStateMobility(MNFIPS, aapl_state_df)

# Retrieve TEST Apple Mobility Dataframe
test_cntyfile = test_dir+"TEST_aapl_mobility_cnty.csv"
test_statefile = test_dir+"TEST_aapl_mobility_state.csv"
(testaapl_cnty_df, testaapl_state_df) = COVID_IO.initAaplMobilityDataframes(countyFile = test_cntyfile, stateFile = test_statefile)
# Get TEST Clay county and Minnesota mobility data
testaapl_CLAY_df = COVID_IO.getAaplCountyMobility(ClayFIPS, testaapl_cnty_df)
testaapl_MN_df = COVID_IO.getAaplStateMobility(MNFIPS, testaapl_state_df)

## 
## Retrieve the Google Mobility Data
##

# Retrieve Google Mobility Dataframe
(goog_cnty_df, goog_state_df) = COVID_IO.initgoogMobilityDataframes()
# Get real Clay county and Minnesota mobility data
goog_CLAY_df = COVID_IO.getGoogleCountyMobility(ClayFIPS, goog_cnty_df)
goog_MN_df = COVID_IO.getGoogleStateMobility(MNFIPS, goog_state_df)

# Retrieve TEST Google Mobility Dataframe
test_cntyfile = test_dir+"TEST_goog_mobility_cnty.csv"
test_statefile = test_dir+"TEST_goog_mobility_state.csv"
(testgoog_cnty_df, testgoog_state_df) = COVID_IO.initgoogMobilityDataframes(countyFile = test_cntyfile, stateFile = test_statefile)
# Get TEST Clay county and Minnesota mobility data
testgoog_CLAY_df = COVID_IO.getGoogleCountyMobility(ClayFIPS, testgoog_cnty_df)
testgoog_MN_df = COVID_IO.getGoogleStateMobility(MNFIPS, testgoog_state_df)

In [None]:
##
## Test Apple Mobility Data should be checked
##  - For Clay county, the test data is a sawtooth pattern from -30 to +30 with a 7 day period starting on Feb. 15, 2020
##  - For Minnesota, the test data is a boxcar pattern 4 days at 30 followed by four days at -30 and back again starting on Feb. 15, 2020 starting on Feb. 15, 2020

In [None]:
##
## Test Google Mobility Data should be checked
##  - For Clay county, the test data is a sine wave with an amplitude of 20 and wavelength of 30 days starting on Feb. 15, 2020
##  - For Minnesota, the test data is a cosine wave with an amplitude of 20 and wavelength of 30 days starting on Feb. 15, 2020

## Test IMHE DataIO

Execute and test the Apple and Google Mobility DataIO routines first authored by Luke

In [67]:
## 
## Retrieve the IMHE Data
##

# Retrieve IMHE Dataframes
(summary_df, hospitalization_df) = COVID_IO.GetIMHEDataFrames()
# Retrieve specific Dataframes and Data for MN
equip_df = COVID_IO.GetEquipData(MNFIPS, summary_df)
icu_beds = COVID_IO.GetNumICUBeds(MNFIPS, summary_df)
all_beds = COVID_IO.GetNumAllBeds(MNFIPS, summary_df)
icu_usage = COVID_IO.GetICUBedUsage(MNFIPS, summary_df)
allbed_usage = COVID_IO.GetAllBedUsage(MNFIPS, summary_df)
hospital_df = COVID_IO.GetHospitalizationData(MNFIPS, hospitalization_df)

# Retrieve TEST IMHE Dataframes
testsummaryfile = test_dir+"TEST_imhe_summary.csv"
testhospitalizationfile = test_dir+"TEST_imhe_hospitalizations.csv"
(testsummary_df, testhospitalization_df) = COVID_IO.GetIMHEDataFrames(summaryFile = testsummaryfile, hospitalFile = testhospitalizationfile)
# Retrieve specific TEST Dataframes and Data for MN
testequip_df = COVID_IO.GetEquipData(MNFIPS, testsummary_df)
testicu_beds = COVID_IO.GetNumICUBeds(MNFIPS, testsummary_df)
testall_beds = COVID_IO.GetNumAllBeds(MNFIPS, testsummary_df)
testicu_usage = COVID_IO.GetICUBedUsage(MNFIPS, testsummary_df)
testallbed_usage = COVID_IO.GetAllBedUsage(MNFIPS, testsummary_df)
testhospital_df = COVID_IO.GetHospitalizationData(MNFIPS, testhospitalization_df)

 

In [68]:
##
## Test IMHE data for Minnesota should be checked.
##  All summary dates were set to May 15, 2020 (lower May 1, upper June 15)
##  Bed capacity was set to 2000 and ICU capacity to 200. 
##  The test data assumes 100% usage.

In [78]:
# Testing Functions for IMHE Summary Data
print("Testing Functions for IMHE Summary Data\n----------------------------------------\n")
if testicu_beds == 200:
    print("GetNumICUBeds : Passed")
else:
    print("GetNumICUBeds : Failed")
print("Expected value: 200 - Actual value: {0}".format(testicu_beds))
    
if testall_beds == 2000:
    print("GetNumAllBeds : Passed")
else:
    print("GetNumAllBeds : Failed")
print("Expected value: 2000 - Actual value: {0}".format(testall_beds) + "\n")
    
if testicu_usage == 200:
    print("GetICUBedUsage : Passed")
else:
    print("GetICUBedUsage : Failed")
print("Expected value: 200 - Actual value: {0}".format(testicu_usage))

if testallbed_usage == 2000:
    print("GetNumAllBeds : Passed")
else:
    print("GetNumAllBeds : Failed")
print("Expected value: 2000 - Actual value: {0}".format(testallbed_usage) + "\n")

if testequip_df['peak_bed_day_mean'].values[0] == np.datetime64('2020-05-15'):
    print("Peak Bed Day Mean - Passed")
else:
    print("Peak Bed Day Mean - Failed")

if testequip_df['peak_icu_bed_day_mean'].values[0] == np.datetime64('2020-05-15'):
    print("Peak ICU Bed Day Mean - Passed")
else:
    print("Peak ICU Bed Day Mean - Failed")
    
if testequip_df['peak_vent_day_mean'].values[0] == np.datetime64('2020-05-15'):
    print("Peak Vent Day Mean - Passed")
else:
    print("Peak Vent Day Mean - Failed")
    


Testing Functions for IMHE Summary Data
----------------------------------------

GetNumICUBeds : Passed
Expected value: 200 - Actual value: 200
GetNumAllBeds : Passed
Expected value: 2000 - Actual value: 2000

GetICUBedUsage : Passed
Expected value: 200 - Actual value: 200
GetNumAllBeds : Passed
Expected value: 2000 - Actual value: 2000

Peak Bed Day Mean - Passed
Peak ICU Bed Day Mean - Passed
Peak Vent Day Mean - Passed


In [36]:
# The hospitalization data starts on March 22, 2020
# test_imhe_hospitalizations['allbed_mean'] is increasing at 1000 per day (10% margins of error on lower/upper)
# test_imhe_hospitalizations['ICUbed_mean'] is increasing at 100 per day (10% margins of error on lower/upper)
# test_imhe_hospitalizations['InvVen_mean'] is increasing at 50 per day (10% margins of error on lower/upper)
# For the hospitalization deaths/admits/ICU use, the test data has a constant 10 deaths/100 admits/20 ICU a day 
# (with 10% margins of error on lower/upper)