# Notebook to update data for the forecast

In [1]:
import numpy as np
import pandas as pd
from epiweeks import Week
import matplotlib.pyplot as plt
import os
os.getcwd()

'/Users/eduardoaraujo/Documents/Github/transfer-learning-forecast/forecast'

## LSTM models for the states and "Macroregionais de saúde"

### Path where the cases and climate data are saved:  

In [2]:
PATH = '../data'

### The dataframe below will be used in the functions to get the link between the geocodes and the health macroregion code: 
    

In [3]:
dfs = pd.read_csv('../macro_saude.csv')

dfs.head()

Unnamed: 0.1,Unnamed: 0,geocode,name_muni,name_region,code_region,name_macro,code_macro,state
0,0,1100015,Alta Floresta D'Oeste,Zona da Mata,11005,Cacoal,1101,RO
1,1,1100023,Ariquemes,Vale do Jamari,11001,Porto Velho,1102,RO
2,2,1100031,Cabixi,Cone Sul,11006,Cacoal,1101,RO
3,3,1100049,Cacoal,Café,11002,Cacoal,1101,RO
4,4,1100056,Cerejeiras,Cone Sul,11006,Cacoal,1101,RO


In [4]:
def add_new_columns(df):
    '''
    This function add the number of the ep week, the number of the month and the first difference of the cases 
    as new columns in the table
    '''
    
    df['month'] = df.index.month
    
    weeks = []
    for date in df.index:
        #print(date)
        weeks.append(Week.fromdate(date).weektuple()[1])
        #print(Week.fromdate(date).weektuple()[1])
        #break  
        
    df['SE'] = weeks
    
    df.loc[df.index == '2018-04-04', 'SE'] = 15
     
    diff_series = [df]
        
    for i in df.columns[df.columns.str.startswith('casos')]:

        diff_series.append(pd.DataFrame(data = np.diff(df[f'{i}'], 1), index = df.index[1:], columns = [f'diff_{i}']))

    df = pd.concat(diff_series, axis = 1, join = 'outer')    
    
    return df
    
    
    

In [5]:
def get_geocodes_and_state(macro): 
    '''
    This function is used to get the geocodes and state that refer to a specific health macro region code
    
    :param macro:int. A four-digit number
        
    '''
    
    dfs = pd.read_csv('../macro_saude.csv')
    
    geocodes = dfs.loc[dfs.code_macro == macro].geocode.unique()
    state = dfs.loc[dfs.code_macro == macro].state.values[0]

    return geocodes, state

In [6]:
def split_geocodes(geocodes):
    
    '''
    This function split the geocodes between the cities with populations up and below 30k in 2022.
    
    :param geocode:list of int. A list with seven-digit ibge codes for brazilian cities 
     
    '''
    
    dfpop = pd.read_csv('poptcu2010-2022_rgi.csv')

    g_low = dfpop.loc[ (dfpop.CODMUN7.isin(geocodes)) & (dfpop.POP22 <= 30000) ].CODMUN7.unique()
    
    g_up = np.setdiff1d(geocodes, g_low)
    
    if geocodes.shape[0] != g_low.shape[0] + g_up.shape[0]:
    
        print('Error subtracting geocodes')
    
    return g_up, g_low

In [7]:
def transform_data(df, geocode, geo_col = 'municipio_geocodigo'): 
    '''
    This filters the data for a specific region and returns it as a separate dataframe.
    
    :param df: pd.DataFrame.
    :param geocode:. Must be at the same type of the geo_col 
    :param geo_col: str. Name of the column in the df that it will be used to filter the geocode value
     
    '''
        
    
    df_ep = df.loc[df[geo_col] == geocode]
    
    del df_ep[geo_col]
    
    df_ep.columns = df_ep.columns + f'_{geocode}'
    
    return df_ep

In [8]:
predictors_clim = ['temp_min', 'temp_max', 'umid_min', 'umid_max',
                   'pressao_min', 'pressao_max', 'precip_tot', 'rainy_days',
                   'temp_mean', 'temp_amp','umid_mean','umid_amp',
                   'pressao_mean']

def predictors_ep_macro(macro): 
    '''
    This function is used to organize in a table the epidemiological predictors related to a specific health macroregion
    
    :params macro: int. A four digit number
    '''
    
    geocodes, state = get_geocodes_and_state(macro)

    # get epidemiological factors 
    df_ep = pd.read_parquet(f'{PATH}/cases/{state}_dengue.parquet',
                           columns = ['data_iniSE', 'casos_est', 'municipio_geocodigo', 'p_rt1', 'Rt', 'p_inc100k'])
    
    # select only the geocodes include in the health macroregion
    df_ep = df_ep.loc[df_ep.municipio_geocodigo.isin(geocodes)]
    
    df_ep = df_ep.sort_index()
    
    # split the geocodes between cities with population up and below 30k in 2022
    g_up, g_low = split_geocodes(geocodes)    

    # get the data of each city with population above 30k
    list_data_ep = []

    for g in g_up:

        list_data_ep.append(transform_data(df_ep, g))
    
    # get the total weekly cases of this health macroregion 
    data_macro_ep = df_ep[['casos_est']].resample('W-SUN').sum()#.agg({'casos_est':np.sum, 
                                                      #'p_rt1': np.mean, 
                                                      #'Rt': np.mean})

    data_macro_ep.columns = data_macro_ep.columns + f'_{macro}'

    list_data_ep.append(data_macro_ep)
    
    
    # aggregate the data from small cities
    
    data_small_cities = df_ep.loc[df_ep.municipio_geocodigo.isin(g_low)][['casos_est','p_rt1', 'Rt']].resample('W-SUN').agg({'casos_est':np.sum, 
                                                                                                        'p_rt1': np.mean, 'Rt': np.mean})
    data_small_cities.columns = data_small_cities.columns + f'_small'
    
    list_data_ep.append(data_small_cities)
    
    data_ep = pd.concat(list_data_ep, axis=1, join='outer')#.fillna(method='ffill')
    
    #remove columns with all values nan 
    data_ep = data_ep.dropna(axis =1, how = 'all')
    
    return data_ep 


def predictors_clim_macro(macro):
    '''
    This function is used to organize in a table the climate predictors related to a specific health macroregion
    
    :params macro: int. A four digit number
    '''
    geocodes, state = get_geocodes_and_state(macro)
    
    # get climate factors 
    df_clim = pd.read_parquet(f'../data/climate/{state}_climate.parquet',
                         columns = predictors_clim.append('geocodigo'))
    
    # select only the geocodes include in the health macroregion
    df_clim = df_clim.loc[df_clim.geocodigo.isin(geocodes)]

    df_clim = df_clim.loc[df_clim.index.year >= 2010]

    del df_clim['index']
    
    # compute other climate features 
    df_clim['temp_mean'] = (df_clim.temp_max+df_clim.temp_min)/2

    df_clim['pressao_mean'] = (df_clim.pressao_max+df_clim.pressao_min)/2

    df_clim['umid_mean'] = (df_clim.umid_max+df_clim.umid_min)/2

    df_clim['temp_amp'] = df_clim.temp_max-df_clim.temp_min
        # Rainy days
    df_clim['rainy_days'] = df_clim.precip_max > 0
        # Humidity amplitude
    df_clim['umid_amp'] = df_clim.umid_max - df_clim.umid_min

    # agg data by weekly since that's the time scale of the cases 
    df_clim = df_clim.groupby('geocodigo').resample('W-SUN').agg({'temp_min':np.mean, 'temp_max': np.mean,

                                                                'umid_min':np.mean, 'umid_max': np.mean,
                                                                'pressao_min':np.mean, 'pressao_max': np.mean,
                                                                'precip_tot':np.sum, 'rainy_days': np.sum,
                                                                'temp_mean':np.mean, 'temp_amp':np.mean,
                                                                'umid_mean': np.mean,'umid_amp': np.mean,
                                                                'pressao_mean':np.mean}).reset_index().set_index('date')
    
    # split the geocodes between cities with population up and below 30k in 2022
    g_up, g_low = split_geocodes(geocodes)
    
    
    # get the predictors of each city with population above 30k
    
    list_data_clim = []

    for g in g_up:

        list_data_clim.append(transform_data(df_clim, g, 'geocodigo'))

    #del df_clim['geocodigo']

    #data_macro_clim = df_clim.resample('W-SUN').agg({'temp_min':np.mean, 'temp_max': np.mean,

     #                                                           'umid_min':np.mean, 'umid_max': np.mean,
      #                                                          'pressao_min':np.mean, 'pressao_max': np.mean,
       #                                                         'precip_tot':np.sum, 'rainy_days': np.sum,
        #                                                        'temp_mean':np.mean, 'temp_amp':np.mean,
         #                                                       'umid_mean': np.mean,'umid_amp': np.mean,
          #                                                      'pressao_mean':np.mean}).reset_index().set_index('date')

    #data_macro_clim.columns = data_macro_clim.columns + f'_{macro}'

    #list_data_clim.append(data_macro_clim)
    
    # aggregate the data from small cities and save the mean as predictor
    
    data_small_cities = df_clim.loc[df_clim.geocodigo.isin(g_low)][['temp_min','temp_max',
                                                                'umid_min', 'umid_max',
                                                                'pressao_min', 'pressao_max',
                                                                'precip_tot', 'rainy_days',
                                                                'temp_mean', 'temp_amp',
                                                                'umid_mean', 'umid_amp',
                                                                'pressao_mean']].resample('W-SUN').mean()
    

    data_small_cities.columns = data_small_cities.columns + f'_small'
    
    list_data_clim.append(data_small_cities)
    
    data_clim = pd.concat(list_data_clim, axis=1, join='outer').fillna(method='ffill')
    
    #remove columns with all values nan 
    data_clim = data_clim.dropna(axis =1, how = 'all')
    
    return data_clim 


def get_data_macro(macro):
    '''
    This function is used to organize in a table the climate and epidemiological predictors 
    related to a specific health macroregion.
    
    :params macro: int. A four-digit number
    '''
    
    data_ep = predictors_ep_macro(macro)
    
    data_clim = predictors_clim_macro(macro)
    
    data_full = pd.concat([data_ep, data_clim], axis = 1, join = 'outer')#.fillna(method = 'ffill')
    
    data_full = add_new_columns(data_full)
    
    return data_full


def predictors_ep_state(state): 
    
    '''
    This function is used to organize in a table the epidemiological predictors related to a specific state    
    :params state: str. Two leters code 
    '''
        
    
    # get epidemiological factors 
    df_ep = pd.read_parquet(f'../data/cases/{state}_dengue.parquet',
                           columns = ['data_iniSE', 'casos_est', 'municipio_geocodigo', 'p_rt1', 'Rt'])

    df_ep = df_ep.sort_index()
    
    # this copy will be used to compute the target for all the state later 
    df_ep_copy = df_ep.copy()
    
    # link the geocode and the health macroregion code 
    df_ep = df_ep.reset_index().merge(dfs[['code_macro', 'geocode']].rename(columns = {'geocode':'municipio_geocodigo'}),
                          on = 'municipio_geocodigo').set_index('data_iniSE')
    
    del df_ep['municipio_geocodigo']
    
    # resample the data based of the macroregion 
    df_ep = df_ep.groupby('code_macro').resample('W-SUN').agg({'casos_est':np.sum, 
                               'p_rt1': np.mean, 
                               'Rt': np.mean}).reset_index().set_index('data_iniSE')
    
    df_ep.index = pd.to_datetime(df_ep.index)
    
    # transform in column the data of each predictor by macroregion
    list_data_ep = []

    for m in df_ep.code_macro.unique():

        list_data_ep.append(transform_data(df_ep, m, 'code_macro'))
    
    # get the total weekly cases of the state (it will be used as target)
    data_state_ep = df_ep_copy[['casos_est']].resample('W-SUN').sum()#agg({'casos':np.sum, 
                               #'p_rt1': np.mean, 
                               #'Rt': np.mean})

    data_state_ep.columns = data_state_ep.columns + f'_{state}'

    list_data_ep.append(data_state_ep)
    
    # final dataframe
    data_ep = pd.concat(list_data_ep, axis=1, join='outer')#.fillna(method='ffill')
    
    #remove columns with all values nan 
    data_ep = data_ep.dropna(axis =1, how = 'all')
    
    return data_ep 

def predictors_clim_state(state):
    '''
    This function is used to organize in a table the climate predictors related to a specific state    
    :params state: str. Two leters code 
    '''
    
    # get climate factors 
    df_clim = pd.read_parquet(f'../data/climate/{state}_climate.parquet',
                         columns = predictors_clim.append('geocodigo'))

    df_clim = df_clim.loc[df_clim.index.year >= 2010]
    
    del df_clim['index']

    df_clim['temp_mean'] = (df_clim.temp_max+df_clim.temp_min)/2

    df_clim['pressao_mean'] = (df_clim.pressao_max+df_clim.pressao_min)/2

    df_clim['umid_mean'] = (df_clim.umid_max+df_clim.umid_min)/2

    df_clim['temp_amp'] = df_clim.temp_max-df_clim.temp_min
        # Rainy days
    df_clim['rainy_days'] = df_clim.precip_max > 0
        # Humidity amplitude
    df_clim['umid_amp'] = df_clim.umid_max - df_clim.umid_min

    # link the geocode and the health macroregion code 
    
    df_clim = df_clim.reset_index().merge(dfs[['code_macro', 'geocode']].rename(columns = {'geocode':'geocodigo'}),
                          on = 'geocodigo').set_index('date')

    del df_clim['geocodigo']

    # resample the data based of the macroregion 
    df_clim = df_clim.groupby('code_macro').resample('W-SUN').agg({'temp_min':np.mean, 'temp_max': np.mean,

                                                                'umid_min':np.mean, 'umid_max': np.mean,
                                                                'pressao_min':np.mean, 'pressao_max': np.mean,
                                                                'precip_tot':np.sum, 'rainy_days': np.sum,
                                                                'temp_mean':np.mean, 'temp_amp':np.mean,
                                                                'umid_mean': np.mean,'umid_amp': np.mean,
                                                                'pressao_mean':np.mean}).reset_index().set_index('date')

    # transform in column the data of each predictor by macroregion
    list_data_clim = []

    for m in df_clim.code_macro.unique():

        list_data_clim.append(transform_data(df_clim, m, 'code_macro'))
    

    #data_state_clim = df_clim_copy.resample('W-SUN').agg({'temp_min':np.mean, 'temp_max': np.mean,

     #                                                           'umid_min':np.mean, 'umid_max': np.mean,
      #                                                          'pressao_min':np.mean, 'pressao_max': np.mean,
       #                                                         'precip_tot':np.sum, 'rainy_days': np.sum,
        #                                                        'temp_mean':np.mean, 'temp_amp':np.mean,
         #                                                       'umid_mean': np.mean,'umid_amp': np.mean,
          #                                                      'pressao_mean':np.mean}).reset_index().set_index('date')

    #data_state_clim.columns = data_state_clim.columns + f'_{state}'

    #list_data_clim.append(data_state_clim)

    # final dataframe
    data_clim = pd.concat(list_data_clim, axis=1, join='outer').ffill()#.fillna(method='ffill')
    
    #remove columns with all values nan 
    data_clim = data_clim.dropna(axis =1, how = 'all')
    
    return data_clim 


def get_data_state(state):
    '''
    This function is used to organize in a table the climate and epidemiological predictors 
    related to a specific state.
    
    :params macro: int. A four digit number
    '''
    
    data_ep = predictors_ep_state(state)
    
    data_clim = predictors_clim_state(state)
    
    data_full = pd.concat([data_ep, data_clim], axis = 1, join = 'outer')#.fillna(method = 'ffill')
    
    data_full = add_new_columns(data_full)
    
    data_full = data_full.dropna()
    
    return data_full


Get data for all macro in MG: 

In [15]:
macro = 3524
df1 = get_data_macro(macro)
df1 = df1.dropna()
df1 = df1.loc[df1.index <= '2024-03-17']
df1.tail()

Unnamed: 0,casos_est_3509007,p_rt1_3509007,Rt_3509007,p_inc100k_3509007,casos_est_3509205,p_rt1_3509205,Rt_3509205,p_inc100k_3509205,casos_est_3516309,p_rt1_3516309,...,umid_amp_3528502,pressao_mean_3528502,month,SE,diff_casos_est_3509007,diff_casos_est_3509205,diff_casos_est_3516309,diff_casos_est_3516408,diff_casos_est_3528502,diff_casos_est_3524
2024-02-18,210.0,0.12573,0.896899,212.98175,226.0,0.977364,1.218578,222.6601,121.0,0.991223,...,25.826656,1.002624,2,8,-26.0,121.0,1.0,118.0,192.0,406.0
2024-02-25,99.0,6.67244e-13,0.440525,100.40568,287.0,1.0,1.582338,282.7586,104.0,0.42233,...,31.091501,1.000352,2,9,-111.0,61.0,-17.0,96.0,247.5,276.5
2024-03-03,57.0,0.0,0.312117,57.80933,344.0,1.0,1.58172,338.91626,151.0,0.98551,...,33.230786,1.001952,3,10,-42.0,57.0,47.0,-211.0,73.5,-75.5
2024-03-10,68.5,4.975072e-05,0.562886,69.47262,450.0,1.0,1.587417,443.34976,97.0,0.063349,...,30.157973,1.001821,3,11,11.5,106.0,-54.0,-133.5,295.0,225.0
2024-03-17,50.0,0.005800248,0.637856,50.709938,972.5,1.0,2.554004,958.12805,136.0,0.785357,...,37.898952,0.751208,3,12,-18.5,522.5,39.0,-79.5,470.5,934.0


In [11]:
df1.isnull().sum()[df1.isnull().sum()>0]

Series([], dtype: int64)

In [16]:

# for macro in dfs.loc[dfs.state=='MG'].code_macro.unique():
for macro in dfs.code_macro.unique():

    df1 = get_data_macro(macro)
    
    df1 = df1.dropna()
    
    df1 = df1.loc[df1.index <= '2024-03-17']
    
    df1.to_csv(f'../data/dengue_{macro}.csv.gz')
    
    df1.head()

In [13]:
df1.tail()

Unnamed: 0,casos_est_5300108,p_rt1_5300108,Rt_5300108,p_inc100k_5300108,casos_est_5302,temp_min_5300108,temp_max_5300108,umid_min_5300108,umid_max_5300108,pressao_min_5300108,...,rainy_days_5300108,temp_mean_5300108,temp_amp_5300108,umid_mean_5300108,umid_amp_5300108,pressao_mean_5300108,month,SE,diff_casos_est_5300108,diff_casos_est_5302
2024-02-11,20380.0,1.374456e-13,0.931367,697.14087,20380.0,19.359485,25.761157,68.678619,95.35821,1.000935,...,10.0,22.560321,6.401672,82.018414,26.679591,1.002718,2,7,-1336.0,-1336.0
2024-02-18,21404.0,0.02745462,0.981699,732.16895,21404.0,19.206797,27.725295,55.924935,91.821284,1.0006,...,9.0,23.466046,8.518497,73.87311,35.896349,1.002764,2,8,1024.0,1024.0
2024-02-25,20446.5,0.001106183,0.970408,699.41565,20446.5,19.440655,26.274437,66.258477,94.653935,0.99882,...,8.0,22.857546,6.833782,80.456206,28.395457,1.000956,2,9,-957.5,-957.5
2024-03-03,17725.0,0.0,0.858048,606.321,17725.0,19.100067,29.371403,45.660863,88.992815,0.999035,...,6.0,24.235735,10.271335,67.326839,43.331952,1.00125,3,10,-2721.5,-2721.5
2024-03-10,14927.5,0.0,0.76041,510.62662,14927.5,19.795797,27.24115,59.117753,92.778031,0.999569,...,9.0,23.518473,7.445353,75.947892,33.660278,1.001874,3,11,-2797.5,-2797.5


In [17]:

for state in dfs.state.unique():
    df2 = get_data_state(state)
    
    df2 = df2.loc[df2.index <= '2024-03-17']
    
    df2.to_csv(f'../data/dengue_{state}.csv.gz')

    df2.head()

In [18]:
df2.tail()

Unnamed: 0,casos_est_5302,p_rt1_5302,Rt_5302,casos_est_DF,temp_min_5302,temp_max_5302,umid_min_5302,umid_max_5302,pressao_min_5302,pressao_max_5302,...,rainy_days_5302,temp_mean_5302,temp_amp_5302,umid_mean_5302,umid_amp_5302,pressao_mean_5302,month,SE,diff_casos_est_5302,diff_casos_est_DF
2024-02-18,21404.0,0.027455,0.981699,21404.0,19.206797,27.725295,55.924935,91.821284,1.0006,1.004928,...,9,23.466046,8.518497,73.87311,35.896349,1.002764,2,8,1024.0,1024.0
2024-02-25,20446.5,0.001106,0.970408,20446.5,19.440655,26.274437,66.258477,94.653935,0.99882,1.003091,...,8,22.857546,6.833782,80.456206,28.395457,1.000956,2,9,-957.5,-957.5
2024-03-03,17725.0,0.0,0.858048,17725.0,19.100067,29.371403,45.660863,88.992815,0.999035,1.003466,...,6,24.235735,10.271335,67.326839,43.331952,1.00125,3,10,-2721.5,-2721.5
2024-03-10,14927.5,0.0,0.76041,14927.5,19.795797,27.24115,59.117753,92.778031,0.999569,1.004178,...,9,23.518473,7.445353,75.947892,33.660278,1.001874,3,11,-2797.5,-2797.5
2024-03-17,12494.0,0.0,0.71174,12494.0,-52.865364,27.846359,59.117753,92.778031,0.749358,0.752646,...,7,-12.509503,80.711723,75.947892,40.302713,0.751002,3,12,-2433.5,-2433.5


In [16]:
macro = 1101 

filename_data = f'../data/dengue_{macro}.csv.gz'

df = pd.read_csv(filename_data, index_col='Unnamed: 0')

df

Unnamed: 0,casos_est_1100049,p_rt1_1100049,Rt_1100049,p_inc100k_1100049,casos_est_1100189,p_rt1_1100189,Rt_1100189,p_inc100k_1100189,casos_est_1100288,p_rt1_1100288,...,umid_amp_small,pressao_mean_small,month,SE,diff_casos_est_1100049,diff_casos_est_1100189,diff_casos_est_1100288,diff_casos_est_1100304,diff_casos_est_1101,diff_casos_est_small
2010-01-10,161.0,0.000000,0.000000,187.442520,118.0,0.000000,0.000000,319.947940,210.0,0.000000e+00,...,25.645254,0.996053,1,2,-3.0,66.0,24.0,28.0,171.0,56.0
2010-01-17,136.0,0.000000,0.000000,158.336530,192.0,0.000000,0.000000,520.593260,171.0,0.000000e+00,...,23.866842,0.997954,1,3,-25.0,74.0,-39.0,11.0,22.0,1.0
2010-01-24,75.0,0.000000,0.000000,87.317940,166.0,0.000000,0.000000,450.096250,183.0,0.000000e+00,...,25.236053,0.996823,1,4,-61.0,-26.0,12.0,3.0,-9.0,63.0
2010-01-31,71.0,0.000000,0.000000,82.660990,129.0,0.000000,0.000000,349.773600,142.0,0.000000e+00,...,22.315170,0.995985,1,5,-4.0,-37.0,-41.0,-17.0,-147.0,-48.0
2010-02-07,48.0,0.000084,0.521628,55.883484,107.0,0.001141,0.684689,290.122280,84.0,9.276549e-08,...,25.328183,0.996109,2,6,-23.0,-22.0,-58.0,9.0,-78.0,16.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2024-01-28,31.0,0.999611,3.047740,33.621830,18.0,0.988946,2.584326,48.046124,6.0,7.289441e-01,...,28.899478,0.998104,1,5,19.0,10.0,2.0,8.0,68.0,29.0
2024-02-04,50.0,0.999977,2.869008,54.228760,2.0,0.011027,0.223301,5.338458,31.0,9.999913e-01,...,28.764047,0.998627,2,6,19.0,-16.0,25.0,10.0,21.0,-17.0
2024-02-11,47.0,0.971758,1.547105,50.975033,1.0,0.001075,0.101399,2.669229,39.0,9.998764e-01,...,29.139454,0.998042,2,7,-3.0,-1.0,8.0,-1.0,66.0,63.0
2024-02-18,53.0,0.863190,1.250976,57.482483,0.0,0.000000,0.000000,0.000000,201.0,1.000000e+00,...,28.582970,0.998960,2,8,6.0,-1.0,162.0,1.0,285.0,117.0


In [17]:
df.shape[0]

736

In [18]:
s = ['AC', 'AL', 'AP', 'DF', 'RN', 'RO', 'RR', 'SE', 'TO']

In [19]:
'AC' in s

True

In [15]:
pd.read_csv('forecast_tables/forecast_5302.csv.gz')

Unnamed: 0.1,Unnamed: 0,date,lower_2_5,lower_25,forecast,upper_75,upper_97_5,macroregion,prob_high,prob_low,HT,LT,HTinc,LTinc
0,0,2024-02-04,8392.25998,10541.763995,11164.928782,11749.171762,12911.138032,5302,100.0,0.0,658.800198,252.089879,21.563603,8.251312
1,1,2024-02-11,8764.017637,10505.634319,11326.954672,12074.482518,13420.374424,5302,100.0,0.0,666.253411,239.835754,21.807559,7.850215
2,2,2024-02-18,8977.330297,10390.177082,11399.544155,12325.275662,13396.019853,5302,100.0,0.0,787.000132,260.607695,25.759795,8.530114
3,3,2024-02-25,8958.071949,10279.996686,11047.885728,11608.991509,13148.232289,5302,100.0,0.0,908.66331,331.099791,29.742029,10.837435
4,4,2024-03-03,8427.867945,10234.003049,11027.127584,12017.441482,13755.418635,5302,100.0,0.0,984.478794,388.902864,32.223593,12.729424
5,5,2024-03-10,7801.468246,9295.84437,10424.235618,11170.054217,12463.743739,5302,100.0,0.0,1065.511249,441.752748,34.875918,14.459287
6,6,2024-03-17,7289.096145,8862.899973,10040.468176,10724.493884,12162.54129,5302,100.0,0.0,1378.318612,520.621707,45.114612,17.040796
7,7,2024-03-24,6917.312025,8499.924577,9082.556561,9687.321029,10947.436401,5302,100.0,0.0,1588.558081,577.431732,51.996092,18.900281
8,8,2024-03-31,6307.542885,7556.473094,8390.133066,9165.049009,10114.666764,5302,100.0,0.0,1784.461285,650.966682,58.408323,21.307199
9,9,2024-04-07,5636.194779,6930.381885,7577.106821,8243.608063,9495.361902,5302,100.0,0.0,1978.927462,715.63191,64.773517,23.423797


In [None]:
2.375.151