In [1]:
import pandas as pd
import pyarrow.parquet as pq

In [63]:
locations = pd.read_csv("data/unique_lists/locations.csv")
locations = dict(locations.dropna()[['location_name', 'location']].to_dict('split')['data'])
quantiles = (0.99, 0.95, 0.9, 0.85, 0.8, 0.75, 0.7, 0.65, 0.6, 0.55, 0.5, 0.45, 0.4, 0.35, 0.3, 0.25, 0.2, 0.15, 0.1, 0.05, 0.01, 0.975, 0.025)

In [3]:
test = pd.read_parquet("data/2020-04-06.parquet")

In [4]:
test

Unnamed: 0,model,timezero,season,unit,target,class,value,cat,prob,sample,quantile,family,param1,param2,param3
0,LANL-GrowthRate,2020-04-06,2019-2020,40,7 wk ahead cum death,point,1173.6948628651,,,,,,,,
1,LANL-GrowthRate,2020-04-06,2019-2020,39,7 wk ahead cum death,point,1968.45553176896,,,,,,,,
2,LANL-GrowthRate,2020-04-06,2019-2020,38,7 wk ahead cum death,point,35.3911128655193,,,,,,,,
3,LANL-GrowthRate,2020-04-06,2019-2020,37,7 wk ahead cum death,point,503.147110564119,,,,,,,,
4,LANL-GrowthRate,2020-04-06,2019-2020,36,7 wk ahead cum death,point,55382.2767418851,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
223837,COVIDhub-baseline,2020-04-06,2019-2020,01,1 wk ahead cum death,quantile,72.0,,,,0.15,,,,
223838,COVIDhub-baseline,2020-04-06,2019-2020,01,1 wk ahead cum death,quantile,64.0,,,,0.1,,,,
223839,COVIDhub-baseline,2020-04-06,2019-2020,01,1 wk ahead cum death,quantile,56.0,,,,0.05,,,,
223840,COVIDhub-baseline,2020-04-06,2019-2020,01,1 wk ahead cum death,quantile,52.0,,,,0.025,,,,


In [48]:
# Forecast Series getter
def getFS(timezero, type="all", model="all", state="all"):
    """Gets the weekly forecasted series by model, state and forecast date

    Parameters
    ----------
    timezero : str or datetime
        The date when the forecast was performed. If a string, provide the format '%Y-%m-%d'. This argument is compulsory.
    type : str
        'cum case' for cumulative cases.
        'cum death' for cumulative deaths.
        'inc case' for incidental cases.
        'inc death' for incidental deaths.
        'all' for all types
    model : str
        The model of the forecast. Choose 'all' for returning every model.
    state : str
        The target state of the forecast (full name). Choose 'all' for returning every state.

    Returns
    -------
    pandas.DataFrame
        a data frame containing 5 series:
           - point series
           - 2.5% quantile
           - 25% quantile
           - 75% quantile
           - 97.5% quantile
        Indexes are of class pandas.DatetimeIndex.
    """

    data = pd.read_parquet("data/"+str(timezero)+".parquet")
    n = len(data)
    c1 = data['target'].apply(str.endswith, args=(type, 0)) if type != "all" else pd.Series([True]*n)
    c2 = data['model'] == model if model != "all" else pd.Series([True]*n)
    c3 = data['unit'] == locations[state] if state != "all" else pd.Series([True]*n)
    
    data = data[c1 & c2 & c3]

    if (data.empty):
        return None
    out = pd.DataFrame(data.iloc[:, -5:].values,
                       columns=data.columns[-5:],
                       index=pd.to_datetime(data.loc[:,'timezero'], format="%Y-%m-%d")
                       )

    return data


# Example: getFS(type="inc case", model="all", state="all", timezero="2020-04-06")
# Example: getFS(type="cum death", model="LANL-GrowthRate", state="Alabama", timezero="2020-04-06")

In [104]:
out = pd.DataFrame()
out[['target', 'value']] = test[test['class']=='point'].loc[:,['target', 'value']]
out['value'] = out['value'].astype(float).astype(int)
for q in quantiles:
    out['quantile' + str(q)] = test[(test['class']=='quantile') & (test['quantile']==str(q))].loc[:,'value'].reset_index(drop=True).astype(float).astype(int)
out

Unnamed: 0,target,value,quantile0.99,quantile0.95,quantile0.9,quantile0.85,quantile0.8,quantile0.75,quantile0.7,quantile0.65,...,quantile0.35,quantile0.3,quantile0.25,quantile0.2,quantile0.15,quantile0.1,quantile0.05,quantile0.01,quantile0.975,quantile0.025
0,7 wk ahead cum death,1173,1284.0,428.0,213,124.0,79.0,53,40.0,2790.0,...,1469.0,1351.0,1223,1104.0,989.0,869,723.0,556.0,370603.0,2572.0
1,7 wk ahead cum death,1968,73815.0,36607.0,19205,12364.0,8804.0,6453,3192.0,3583.0,...,1040.0,857.0,705,572.0,461.0,346,237.0,132.0,45.0,0.0
2,7 wk ahead cum death,35,10088.0,3819.0,1867,1123.0,802.0,589,4673.0,355.0,...,107.0,85.0,71,59.0,49.0,40,31.0,25.0,895.0,0.0
3,7 wk ahead cum death,503,7002.0,2447.0,1308,861.0,607.0,460,442.0,279.0,...,82.0,67.0,55,45.0,33.0,25,18.0,12.0,21.0,0.0
4,7 wk ahead cum death,55382,154878.0,47214.0,23419,15392.0,10858.0,7939,348.0,5001.0,...,1746.0,1504.0,1267,1050.0,873.0,698,537.0,343.0,103.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
26414,1 wk ahead cum death,468,,,27,,,26,,,...,,,20,,,73,,,,
26415,1 wk ahead cum death,23,,,105,,,93,,,...,,,85,,,61,,,,
26416,1 wk ahead cum death,89,,,9,,,38,,,...,,,37,,,35,,,,
26417,1 wk ahead cum death,8,,,40,,,8,,,...,,,7,,,6,,,,


In [99]:
q = 0.15
test[(test['class']=='quantile') & (test['quantile']==str(q))].loc[:,'value'].astype(float).astype(int).reset_index(drop=True)

0      989
1      461
2       49
3       33
4      873
      ... 
822     18
823     76
824     37
825      6
826     72
Name: value, Length: 827, dtype: int32

In [92]:
out.isna().sum()

point                0
quantile0.99     26419
quantile0.95     26419
quantile0.9      26419
quantile0.85     26419
quantile0.8      26419
quantile0.75     26419
quantile0.7      26419
quantile0.65     26419
quantile0.6      26419
quantile0.55     26419
quantile0.5      26419
quantile0.45     26419
quantile0.4      26419
quantile0.35     26419
quantile0.3      26419
quantile0.25     26419
quantile0.2      26419
quantile0.15     26419
quantile0.1      26419
quantile0.05     26419
quantile0.01     26419
quantile0.975    26419
quantile0.025    26419
dtype: int64