In [None]:
import numpy as np
import pandas as pd

# Conectando ao Google Drive
from google.colab import drive
drive.mount('/content/drive', force_remount = True)

PATH = '/content/drive/MyDrive/MC936/P2/data/raw/synthea'

Mounted at /content/drive


# Preparação dos dados

Como pode ser notado nas discussões anteriores, o Synthea gera um conjunto de dados amplamente rico e detalhado, o que possibilita os mais diversos tipos de análise. Focando no nosso objetivo inicial de prever a morte, até um mês após a data de diagnóstico, de indivíduos infectados pelo COVID-19, acreditamos que algumas das tabelas fornecidas não se fazem necessárias. Desse modo, decidimos eliminar do modelo as tabelas: `allergies`, `careplans`, `claims`, `claims_transactions`, `devices`,  `imaging_studies`, `medications`, `observations`, `organizations`, `payer_transitions`, `payers.csv`, `providers` e `supplies`. 

Assim, faremos uso de somente 5 tabelas: `conditions`, `encounters`, `immunizations`, `patients`, e `procedures`.


In [None]:
#define até qual cenário será utilizado
scenario_limit = 4 #Deve-se usar um numero a mais, scenario_limit=4 então serão usados os primeiros 3 cenários

In [None]:
#Abre todas as tabelas a serem usadas de todos os cenarios desejados
selected_data = ['conditions',
                 'encounters',
                 'immunizations',
                 'patients',
                 'procedures']

all_data = dict()

for data_source in selected_data:
  all_data[data_source] = list()

#Intera sobre os cenarios
for scenario in range(1, scenario_limit):
  for data_source in selected_data:
    all_data[data_source].append(pd.read_csv(f'{PATH}/scenario0{scenario}/csv/{data_source}.csv'))

Partindo da tabela `conditions`, que apresenta os diagnósticos e demais condições dos pacientes, selecionamos os IDs dos pacientes com COVID-19, chegando a 86 indivíduos com esse quadro clínico no primeiro cenário, 96 no segundo e 950 no terceiro cenário.

In [None]:
#Recupera todos os ids dos pacientes diagnosticados com COVID-19
covid_patients_ids = dict()
for scenario in range(scenario_limit-1):
  covid_patients_ids[f'scenario0{scenario+1}'] = all_data['conditions'][scenario].query('DESCRIPTION == "COVID-19"')['PATIENT'].unique().tolist()

In [None]:
for scenario in range(1,scenario_limit):
  print(f'Números de pacientes diagnosticados com COVID-19 no cenário 0{scenario}: %d' %len(covid_patients_ids[f'scenario0{scenario}']))

Números de pacientes diagnosticados com COVID-19 no cenário 01: 86
Números de pacientes diagnosticados com COVID-19 no cenário 02: 96
Números de pacientes diagnosticados com COVID-19 no cenário 03: 950


Tendo conhecimento desses pacientes, filtramos os dados presentes nas demais tabelas visando reduzir o escopo a somente esses indivíduos.

In [None]:
#Seleciona dados dos pacientes com COVID-19
covid_patients_data = dict()

for data_source in selected_data:
  covid_patients_data[data_source] = list()

for data_source in selected_data:
  for scenario in range(1, scenario_limit):
    ids = covid_patients_ids[f'scenario0{scenario}'] 
    if(data_source=='patients'):
      covid_patients_data[data_source].append(all_data[data_source][scenario-1].query('Id == @ids'))
    else:
      covid_patients_data[data_source].append(all_data[data_source][scenario-1].query('PATIENT == @ids'))


Mesmo com esse número reduzido de datasets, acreditamos que uma parcela significativa dos dados presentes em cada tabela não irão agregar no modelo proposto, dessa forma, realizamos a exclusão de algumas das colunas das tabelas. Como resultado final temos que as tabelas `conditions`, `immunizations` e `procedures` devem apresentar somente as colunas PATIENT e DESCRIPTION. Já a tabela `patients` deve trazer os dados:
Id, BIRTHDATE, DEATHDATE, RACE, ETHNICITY, GENDER, CITY, STATE. Por fim, da tabela de `encounters` devemos utilizar somente os dados de START, PATIENT e DESCRIPTION.


In [None]:
#Elimina colunas desnecessarias das tabelas
remove = dict()
remove['conditions'] = ['START', 'STOP', 'ENCOUNTER', 'CODE']
remove['encounters'] = ['Id', 'STOP', 'CODE','ORGANIZATION', 'PROVIDER', 'PAYER', 'ENCOUNTERCLASS', 'BASE_ENCOUNTER_COST', 'TOTAL_CLAIM_COST', 'PAYER_COVERAGE', 'REASONCODE', 'REASONDESCRIPTION']
remove['immunizations'] = ['BASE_COST', 'DATE', 'ENCOUNTER', 'CODE']
remove['patients'] = ['SSN', 'DRIVERS', 'PASSPORT', 'PREFIX', 'FIRST', 'LAST', 'SUFFIX', 'MAIDEN', 'MARITAL', 'BIRTHPLACE', 'ADDRESS', 'COUNTY', 'ZIP', 'LAT', 'LON', 'HEALTHCARE_EXPENSES', 'HEALTHCARE_COVERAGE']
remove['procedures'] = ['START', 'STOP', 'ENCOUNTER', 'CODE', 'REASONCODE', 'REASONDESCRIPTION', 'BASE_COST']

for data_source in selected_data:
  for scenario in range(1, scenario_limit):
    covid_patients_data[data_source][scenario-1] = covid_patients_data[data_source][scenario-1].drop(columns = remove[data_source]).copy(deep=True)
    covid_patients_data[data_source][scenario-1].reset_index(drop = True, inplace = True)

Também, julgamos prático existir uma coluna contendo a idade dos pacientes, desse modo, partindo do ano de nascimento e do ano atual geramos uma nova coluna para a tabela de `patients`, apresentando a idade do indivíduo.

In [None]:
#Adiciona a coluna de idade a tabela de patients
import datetime

for scenario in range(1, scenario_limit):
  covid_patients_data['patients'][scenario-1]['BIRTHDATE'] = pd.to_datetime(covid_patients_data['patients'][scenario-1]['BIRTHDATE'] ,format = '%Y-%m-%d')
  today = datetime.datetime.today()
  covid_patients_data['patients'][scenario-1]['AGE'] = today.year - pd.DatetimeIndex(covid_patients_data['patients'][scenario-1]['BIRTHDATE']).year 

Como nosso objetivo visa analisar a morte de pacientes diagnosticados com COVID-19 até um mês após o diagnóstico, será adicionada à tabela `patients` algumas colunas para auxiliar em análises envolvendo esse tema. Entre elas, uma coluna contendo a data do diagnóstico, uma coluna contendo a data de morte ou a data da última consulta realizada pelo paciente e uma coluna booleana, apresentando valor 1 se o paciente morreu dentro do prazo de um mês e 0 caso contrário. Além dessas colunas, também vamos adicionar uma coluna time (days), contendo o número de dias entre o diagnóstico de COVID-19 e a morte de um determinado paciente, para aqueles que não vieram a óbito usamos a diferença entre o diagnóstico de COVID-19 e a data do sua última consulta registrada, limitando o valor máximo como 30 dias. Todas essas adições visam permitir a realização de uma análise de sobrevivência para validação do modelo.

In [None]:
#Adiciona coluna com a data do diagnostico de COVID-19 a tabela de pacientes
for scenario in range(1, scenario_limit):
  aux = all_data['conditions'][scenario-1].query('DESCRIPTION == "COVID-19"').copy(deep=True)
  aux.drop(columns=['STOP', 'ENCOUNTER', 'CODE', 'DESCRIPTION'], inplace=True)

  aux['START'] = pd.to_datetime(aux['START'] ,format = '%Y-%m-%d')
  aux = aux.sort_values('START').drop_duplicates('PATIENT', keep='first')

  covid_patients_data['patients'][scenario-1].rename(columns = {'Id':'PATIENT'}, inplace = True)
  aux.rename(columns = {'START':'COVID DIAGNOSIS DATE'}, inplace = True)

  covid_patients_data['patients'][scenario-1] = pd.merge(covid_patients_data['patients'][scenario-1], aux, how = 'outer', on = ['PATIENT'])

In [None]:
#Adiciona coluna com data da ultima consulta
for scenario in range(1, scenario_limit):
  
  covid_patients_data['encounters'][scenario-1]['START'] = covid_patients_data['encounters'][scenario-1]['START'].astype(str).str[0:10]
  covid_patients_data['encounters'][scenario-1]['START'] = pd.to_datetime(covid_patients_data['encounters'][scenario-1]['START'] ,format = '%Y-%m-%d')
  covid_patients_data['encounters'][scenario-1] = covid_patients_data['encounters'][scenario-1].sort_values('START').drop_duplicates('PATIENT', keep='last')

  covid_patients_data['encounters'][scenario-1].rename(columns = {'START':'LAST ENCOUNTERS OR DIED DATE'}, inplace = True)
  covid_patients_data['patients'][scenario-1] = pd.merge(covid_patients_data['patients'][scenario-1], covid_patients_data['encounters'][scenario-1].drop(columns=['DESCRIPTION']), how = 'outer', on = ['PATIENT'])

In [None]:
#Adiciona a coluna 'DIED IN ONE MONTH' com 1 se o paciente faleceu em até um mês apos o diagnostico de COVID-19 ou 0 caso contrário
import datetime
covid_patients_death = dict()


for scenario in range(1, scenario_limit):
  #Seleciona pacientes que tiveram covid e vieram a obto
  covid_patients_death[f'scenario0{scenario}'] = all_data['encounters'][scenario-1].query('DESCRIPTION == "Death Certification" & REASONDESCRIPTION == "COVID-19"')
  died_patients_covid = covid_patients_data['patients'][scenario-1][covid_patients_data['patients'][scenario-1]['PATIENT'].isin(covid_patients_death[f'scenario0{scenario}']['PATIENT'])].copy(deep=True)
  died_patients_covid.drop(columns=['BIRTHDATE', 'RACE', 'ETHNICITY', 'GENDER', 'CITY', 'STATE', 'AGE', 'COVID DIAGNOSIS DATE', 'LAST ENCOUNTERS OR DIED DATE'], inplace=True)
  died_patients_covid.rename(columns = {'Id':'PATIENT'}, inplace = True)

  #Recupera data do diagnostico da covid
  conditions_covid = pd.DataFrame()
  for id in died_patients_covid['PATIENT']:
    aux = all_data['conditions'][scenario-1].query('DESCRIPTION == "COVID-19" & PATIENT==@id')
    conditions_covid = conditions_covid.append( aux, ignore_index=True )
  conditions_covid.drop(columns=['STOP', 'ENCOUNTER', 'CODE', 'DESCRIPTION'], inplace=True)

  #Tranformar tipo das colunas para datetime
  conditions_covid = pd.merge (conditions_covid, died_patients_covid, how = 'outer', on = ['PATIENT'])
  conditions_covid['START'] = pd.to_datetime(conditions_covid['START'] ,format = '%Y-%m-%d')
  conditions_covid['DEATHDATE'] = pd.to_datetime(conditions_covid['DEATHDATE'] ,format = '%Y-%m-%d')

  #Altera coluna com a data da ultima consulta para a data de morte daqueles pacientes que vieram a obto
  for i in range(len(conditions_covid['PATIENT'])):
    covid_patients_data['patients'][scenario-1].loc[(covid_patients_data['patients'][scenario-1][covid_patients_data['patients'][scenario-1]['PATIENT'] == conditions_covid['PATIENT'][i]].index),'LAST ENCOUNTERS OR DIED DATE'] = conditions_covid['DEATHDATE'][i] 

  #Adicionar nova coluna a tabela de patients com a informação se o paciente morreu em até um mês após o diagnóstico
  conditions_covid['DIED IN ONE MONTH'] = conditions_covid['DEATHDATE'] - conditions_covid['START'] <= datetime.timedelta(days=30)
  conditions_covid.drop(conditions_covid[conditions_covid['DIED IN ONE MONTH'] == False].index, inplace=True)
  
  covid_patients_data['patients'][scenario-1]['DIED IN ONE MONTH'] = 0


  for id in covid_patients_death[f'scenario0{scenario}']['PATIENT']:
    covid_patients_data['patients'][scenario-1].loc[(covid_patients_data['patients'][scenario-1][covid_patients_data['patients'][scenario-1]['PATIENT'] == id].index),'DIED IN ONE MONTH'] = 1

  display(covid_patients_data['patients'][scenario-1])

Unnamed: 0,PATIENT,BIRTHDATE,DEATHDATE,RACE,ETHNICITY,GENDER,CITY,STATE,AGE,COVID DIAGNOSIS DATE,LAST ENCOUNTERS OR DIED DATE,DIED IN ONE MONTH
0,c87c02ef-6b7a-224c-4513-1b85e19573b9,1998-11-21,,white,nonhispanic,F,Everett,Massachusetts,24,2020-11-25,2021-11-13,0
1,4868d84d-7a09-477a-da7c-3fbb8edf3e19,1989-10-28,,white,nonhispanic,M,Lowell,Massachusetts,33,2021-01-01,2021-04-03,0
2,f2e5bd39-dc31-0471-1028-adee47891760,1976-05-17,,white,nonhispanic,F,South Hadley,Massachusetts,46,2020-11-26,2021-10-05,0
3,77e53fde-d641-fa26-5792-7a92af4fa260,2018-10-19,,white,nonhispanic,M,Mansfield,Massachusetts,4,2021-02-17,2022-03-25,0
4,850c346b-9bed-1a8b-c452-165705841a8b,1995-01-10,,white,nonhispanic,M,Revere,Massachusetts,27,2020-11-26,2020-11-26,0
...,...,...,...,...,...,...,...,...,...,...,...,...
81,3bf00271-066d-4e4a-3dbe-11dad8f3d685,1984-08-31,,white,nonhispanic,F,Stoughton,Massachusetts,38,2021-01-17,2022-01-28,0
82,393030d3-4eb2-9ecd-a4c3-4770ea39da00,1954-09-03,,white,nonhispanic,F,Worcester,Massachusetts,68,2021-01-02,2021-08-14,0
83,9d24509c-42e9-b71a-6466-fbe64f52855c,1947-10-18,2021-02-06,white,nonhispanic,F,Revere,Massachusetts,75,2021-01-18,2021-02-06,1
84,b4625a05-a191-6a29-0f4c-4dfda1c96a9f,1996-10-27,,white,hispanic,M,Cambridge,Massachusetts,26,2021-04-07,2022-01-02,0


Unnamed: 0,PATIENT,BIRTHDATE,DEATHDATE,RACE,ETHNICITY,GENDER,CITY,STATE,AGE,COVID DIAGNOSIS DATE,LAST ENCOUNTERS OR DIED DATE,DIED IN ONE MONTH
0,f7add965-c9db-08c8-d5af-ab7077806c8f,1971-07-24,,hawaiian,nonhispanic,F,Juneau,Alaska,51,2021-03-19,2022-03-19,0
1,a9db9737-d8bb-a995-2686-2e915848c8b0,1973-08-27,,white,nonhispanic,M,Juneau,Alaska,49,2020-06-29,2022-04-11,0
2,33e93efd-d739-814d-1a1e-d64d4f4aba11,1989-04-17,,white,nonhispanic,F,Juneau,Alaska,33,2021-02-16,2022-04-04,0
3,cd3ea872-5043-f706-cb3a-cdbd16407452,1963-04-08,,native,nonhispanic,F,Juneau,Alaska,59,2020-08-22,2022-01-24,0
4,65ba83d6-8042-7789-f9cb-a60c0c4e7daa,1967-01-08,,white,nonhispanic,F,Juneau,Alaska,55,2020-05-21,2022-02-06,0
...,...,...,...,...,...,...,...,...,...,...,...,...
91,31a2418e-0501-045f-9471-c27f0d649034,2006-04-21,,white,nonhispanic,M,Juneau,Alaska,16,2020-07-20,2021-08-26,0
92,c157e973-3a07-ef2b-cced-01eea3351ccd,1938-07-27,2021-02-18,native,nonhispanic,F,Juneau,Alaska,84,2021-02-01,2021-02-18,1
93,2e8b6a1b-cc23-85d9-de8f-88f00f32a508,1963-06-06,,white,nonhispanic,M,Juneau,Alaska,59,2020-11-09,2021-07-22,0
94,4323a18a-a47c-c670-98a9-be3f3ab86484,1969-07-18,,white,nonhispanic,F,Juneau,Alaska,53,2021-01-30,2021-07-30,0


Unnamed: 0,PATIENT,BIRTHDATE,DEATHDATE,RACE,ETHNICITY,GENDER,CITY,STATE,AGE,COVID DIAGNOSIS DATE,LAST ENCOUNTERS OR DIED DATE,DIED IN ONE MONTH
0,2e5444e7-9172-5042-9a0a-d279ce78a299,1958-04-17,,white,nonhispanic,M,Longview Heights,Washington,64,2020-06-14,2021-07-01,0
1,97d16c67-23b2-5fd8-e976-6f66b7a33456,1971-01-10,,white,nonhispanic,M,Tacoma,Washington,51,2020-12-02,2022-04-03,0
2,5aacafce-7bf1-c124-c1d1-a21f5497d572,1983-10-06,,white,nonhispanic,M,Seattle,Washington,39,2021-09-08,2022-03-24,0
3,42a4de55-9855-cf3f-3b5d-314b3d0f5dbb,1976-09-13,,native,nonhispanic,F,Burlington,Washington,46,2020-12-08,2021-10-15,0
4,24c13445-12aa-9efc-ae8b-66fa5aa46d19,1959-11-16,,native,nonhispanic,M,Silverdale,Washington,63,2021-03-23,2021-07-05,0
...,...,...,...,...,...,...,...,...,...,...,...,...
945,e6dea0dd-d981-fa6d-be80-11a10ac3d0e7,1964-08-21,,white,nonhispanic,M,Vancouver,Washington,58,2021-09-03,2021-10-01,0
946,4b8582c3-2173-d038-d48e-3c112f005dbb,2014-06-13,,white,nonhispanic,M,Spokane,Washington,8,2020-12-02,2021-06-04,0
947,a46d104f-a2a7-2d88-0315-0a5b946cce27,2003-07-04,,white,nonhispanic,F,Snohomish,Washington,19,2021-01-02,2021-09-14,0
948,aa4276a8-263e-f172-22f7-c1fe1a2ad8f3,1968-09-14,,asian,nonhispanic,M,Seattle,Washington,54,2021-01-07,2022-03-05,0


In [None]:
#Calcula diferença em dias entre o diagnostido de covid e a data de morte ou da ultima consulta, limitando o valor até 30
for scenario in range(1, scenario_limit):
  covid_patients_data['patients'][scenario-1]['TIME (DAYS)'] = covid_patients_data['patients'][scenario-1]['LAST ENCOUNTERS OR DIED DATE'] - covid_patients_data['patients'][scenario-1]['COVID DIAGNOSIS DATE']
  covid_patients_data['patients'][scenario-1]['TIME (DAYS)'] = covid_patients_data['patients'][scenario-1]['TIME (DAYS)'].dt.days
  #Mantem valores no intervalo de 0 a 30
  covid_patients_data['patients'][scenario-1]['TIME (DAYS)'].where( covid_patients_data['patients'][scenario-1]['TIME (DAYS)']<30, 30, inplace=True )



Essa etapa consiste somente em uma preparação inicial dos dados, facilitando o tratamento a ser realizado usando o software [Orange](https://orangedatamining.com/). Assim, para os modelos usados, tabelas com colunas booleanas são mais amigáveis, desse modo, para cada cenário será gerada uma grande tabela, onde as colunas corresponderão a situações específicas daquele paciente, como a aplicação de uma determinada vacina, e apresentará o resultado como 0, o paciente não recebeu a vacina, ou 1, o paciente recebeu a vacina.

In [None]:
#Transforma tabelas para o formato booleano
selected_data_bool = ["conditions",
                      "immunizations",
                      "procedures"]

aux_data = dict()

for data_source in selected_data_bool:
  aux_data[data_source] = list()

for data_source in selected_data_bool:
  for scenario in range(1, scenario_limit):
    aux_data[data_source].append( pd.DataFrame() )
    aux = covid_patients_data[data_source][scenario-1].join(covid_patients_data[data_source][scenario-1].DESCRIPTION.str.get_dummies()).copy(deep=True)
    aux.drop(columns=['DESCRIPTION'], inplace=True)
    for id in covid_patients_ids[f'scenario0{scenario}']:
      aux_id = aux.query('PATIENT == @id')
      if(not aux_id.empty):
        aux_id = aux_id.sum(axis=0)
        aux_id[1:len(aux_id)][aux_id[1:len(aux_id)]>1] = 1
       
        aux_id[0] = id

        aux_data[data_source][scenario-1] = aux_data[data_source][scenario-1].append( aux_id, ignore_index=True )

        aux_data[data_source][scenario-1].loc[:, aux_data[data_source][scenario-1].columns != 'PATIENT'] = aux_data[data_source][scenario-1].loc[:, aux_data[data_source][scenario-1].columns != 'PATIENT'].astype(np.int64)

In [None]:
#Adiciona dados presentes na tabela de pacientes as tabelas booleanas
final_all_data = list()

for scenario in range(1, scenario_limit):
  covid_patients_data['patients'][scenario-1].columns = covid_patients_data['patients'][scenario-1].columns.str.replace('Id', 'PATIENT')

  final_all_data.append( aux_data['conditions'][scenario-1] )
  for i in range(len(selected_data_bool)-1):
    final_all_data[scenario-1] = pd.merge(final_all_data[scenario-1], aux_data[selected_data_bool[i+1]][scenario-1],how = 'outer', on = ['PATIENT'])
    
  final_all_data[scenario-1] = final_all_data[scenario-1].replace(np.nan, 0)
  final_all_data[scenario-1].loc[:, final_all_data[scenario-1].columns != 'PATIENT'] = final_all_data[scenario-1].loc[:, final_all_data[scenario-1].columns != 'PATIENT'].astype(np.int64)
  final_all_data[scenario-1] = pd.merge(final_all_data[scenario-1], covid_patients_data['patients'][scenario-1],how = 'outer', on = ['PATIENT'])

As tabelas finais devem ter o formado mostrado abaixo:

In [None]:
final_all_data[0]

Unnamed: 0,PATIENT,Acquired coagulation disorder (disorder),Acute allergic reaction,Acute bacterial sinusitis (disorder),Acute bronchitis (disorder),Acute deep venous thrombosis (disorder),Acute myeloid leukemia disease (disorder),Acute pulmonary embolism (disorder),Acute respiratory distress syndrome (disorder),Acute respiratory failure (disorder),...,RACE,ETHNICITY,GENDER,CITY,STATE,AGE,COVID DIAGNOSIS DATE,LAST ENCOUNTERS OR DIED DATE,DIED IN ONE MONTH,TIME (DAYS)
0,c87c02ef-6b7a-224c-4513-1b85e19573b9,0,0,0,0,0,1,0,0,0,...,white,nonhispanic,F,Everett,Massachusetts,24,2020-11-25,2021-11-13,0,30
1,4868d84d-7a09-477a-da7c-3fbb8edf3e19,0,0,0,0,0,0,0,0,0,...,white,nonhispanic,M,Lowell,Massachusetts,33,2021-01-01,2021-04-03,0,30
2,f2e5bd39-dc31-0471-1028-adee47891760,0,0,0,0,0,0,0,0,0,...,white,nonhispanic,F,South Hadley,Massachusetts,46,2020-11-26,2021-10-05,0,30
3,77e53fde-d641-fa26-5792-7a92af4fa260,0,0,0,0,0,1,0,0,0,...,white,nonhispanic,M,Mansfield,Massachusetts,4,2021-02-17,2022-03-25,0,30
4,850c346b-9bed-1a8b-c452-165705841a8b,0,0,0,1,0,0,0,0,0,...,white,nonhispanic,M,Revere,Massachusetts,27,2020-11-26,2020-11-26,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
81,3bf00271-066d-4e4a-3dbe-11dad8f3d685,0,0,0,0,0,0,1,0,0,...,white,nonhispanic,F,Stoughton,Massachusetts,38,2021-01-17,2022-01-28,0,30
82,393030d3-4eb2-9ecd-a4c3-4770ea39da00,0,0,0,1,0,0,1,0,0,...,white,nonhispanic,F,Worcester,Massachusetts,68,2021-01-02,2021-08-14,0,30
83,9d24509c-42e9-b71a-6466-fbe64f52855c,1,0,0,1,0,0,1,1,1,...,white,nonhispanic,F,Revere,Massachusetts,75,2021-01-18,2021-02-06,1,19
84,b4625a05-a191-6a29-0f4c-4dfda1c96a9f,0,0,0,0,0,0,0,0,0,...,white,hispanic,M,Cambridge,Massachusetts,26,2021-04-07,2022-01-02,0,30


In [None]:
# # Salva tabelas tratadas localmente
# from google.colab import files
# for scenario in range(1, scenario_limit):
#   final_all_data[scenario-1].to_csv(f'concatenated_data_scenario0{scenario}.csv', encoding = 'utf-8-sig', index=False) 
#   files.download(f'concatenated_data_scenario0{scenario}.csv')  

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [None]:
# #Salva tabelas tratadas no Google Drive
# SAVE_PATH = '/content/drive/MyDrive/MC936/P2/data/'


# for scenario in range(1, scenario_limit):
#   final_all_data[scenario-1].to_csv(f'{SAVE_PATH}/processed/scenario0{scenario}/concatenated_data_scenario0{scenario}.csv', 
#                       encoding = 'utf-8-sig', index = False)