In [1]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler

In [2]:
! git clone https://github.com/Klemersoncastro/google_trends_covid
%cd google_trends_covid/dados

Cloning into 'google_trends_covid'...
remote: Enumerating objects: 49, done.[K
remote: Counting objects: 100% (49/49), done.[K
remote: Compressing objects: 100% (37/37), done.[K
remote: Total 86 (delta 20), reused 38 (delta 12), pack-reused 37[K
Unpacking objects: 100% (86/86), done.
/content/google_trends_covid/dados


In [11]:
# Lendo as bases de dados
df_casosCovidDiario_br = pd.read_csv('sub-dados/2. trends + casos covid/Covid + Trends diário - Brasil.csv', index_col=0)
df_casosCovidSemanal_br = pd.read_csv('sub-dados/2. trends + casos covid/Covid + Trends semanais - Brasil.csv', index_col=0)
df_casosCovidDiario_am = pd.read_csv('sub-dados/2. trends + casos covid/Covid + Trends diário - Amazonas.csv', index_col=0)
df_casosCovidSemanal_am = pd.read_csv('sub-dados/2. trends + casos covid/Covid + Trends semanais - Amazonas.csv', index_col=0)

In [12]:
# Visualizando os casos diários de covid no brasil
df_casosCovidDiario_br

Unnamed: 0,date,covid,coronavirus,falta_de_ar,febre,tosse,perda_paladar,perda_olfato,new_confirmed,new_deaths
0,2020-01-01,0,0,18.0,26.0,25.0,0.0,0.0,0,0
1,2020-01-02,0,0,23.0,32.0,30.0,0.0,0.0,0,0
2,2020-01-03,0,0,23.0,27.0,25.0,0.0,5.0,0,0
3,2020-01-04,0,0,21.0,39.0,29.0,0.0,5.0,0,0
4,2020-01-05,0,0,20.0,33.0,34.0,0.0,0.0,0,0
...,...,...,...,...,...,...,...,...,...,...
419,2021-02-23,83,56,59.0,88.0,75.0,17.0,26.0,63588,1358
420,2021-02-24,85,61,61.0,87.0,81.0,35.0,34.0,65387,1446
421,2021-02-25,88,69,51.0,94.0,75.0,41.0,38.0,68141,1576
422,2021-02-26,96,65,50.0,73.0,73.0,29.0,18.0,63530,1340


### Normalizando as colunas _new_confirmed_ e _new_deaths_

$$x = \frac{x - min(x)}{max(x)-min(x)}$$

In [13]:
'''
Função para normalizar colunas de um dataframe
Parâmetros:
  min: limite mínimo da normalização
  max: limite máximo da normalização
  dataframe: dataframe que possui os dados que serão normalizados
  colunas: array de str com os nomes das colunas que serão normalizadas
Return:
  retorna um dataframe com as colunas normalizadas
'''
def normalizar(min, max, dataframe, colunas) :
  scaler=MinMaxScaler(feature_range=(min,max))
  scaled_data = scaler.fit_transform(dataframe[colunas])
  list_nome_colunas_normalizadas = []
  df = pd.DataFrame(scaled_data, columns=colunas)
  return df 

In [14]:
# Normalizando as colunas new_confirmed e new_deaths de todos os dataframes
df_casosCovidDiario_br[['new_confirmed_n','new_deaths_n']] = normalizar(0, 100, df_casosCovidDiario_br, ['new_confirmed','new_deaths'])
df_casosCovidSemanal_br[['new_confirmed_n','new_deaths_n']] = normalizar(0, 100, df_casosCovidSemanal_br, ['new_confirmed','new_deaths'])
df_casosCovidDiario_am[['new_confirmed_n','new_deaths_n']] = normalizar(0, 100, df_casosCovidDiario_am, ['new_confirmed','new_deaths'])
df_casosCovidSemanal_am[['new_confirmed_n','new_deaths_n']] = normalizar(0, 100, df_casosCovidSemanal_am, ['new_confirmed','new_deaths'])

In [21]:
# Salvando os dataframe em arquivos csv
df_casosCovidDiario_br.to_csv('/content/Covid + trend diário Brasil - normalizado.csv')
df_casosCovidSemanal_br.to_csv('/content/Covid + trend semanal Brasil - normalizado.csv')
df_casosCovidDiario_am.to_csv('/content/Covid + trend diário Amazonas - normalizado.csv')
df_casosCovidSemanal_am.to_csv('/content/Covid + trend semanal Amazonas - normalizado.csv')

In [16]:
df_casosCovidDiario_br

Unnamed: 0,date,covid,coronavirus,falta_de_ar,febre,tosse,perda_paladar,perda_olfato,new_confirmed,new_deaths,new_confirmed_n,new_deaths_n
0,2020-01-01,0,0,18.0,26.0,25.0,0.0,0.0,0,0,0.000000,0.000000
1,2020-01-02,0,0,23.0,32.0,30.0,0.0,0.0,0,0,0.000000,0.000000
2,2020-01-03,0,0,23.0,27.0,25.0,0.0,5.0,0,0,0.000000,0.000000
3,2020-01-04,0,0,21.0,39.0,29.0,0.0,5.0,0,0,0.000000,0.000000
4,2020-01-05,0,0,20.0,33.0,34.0,0.0,0.0,0,0,0.000000,0.000000
...,...,...,...,...,...,...,...,...,...,...,...,...
419,2021-02-23,83,56,59.0,88.0,75.0,17.0,26.0,63588,1358,70.835144,86.167513
420,2021-02-24,85,61,61.0,87.0,81.0,35.0,34.0,65387,1446,72.839176,91.751269
421,2021-02-25,88,69,51.0,94.0,75.0,41.0,38.0,68141,1576,75.907050,100.000000
422,2021-02-26,96,65,50.0,73.0,73.0,29.0,18.0,63530,1340,70.770533,85.025381


In [19]:
df_casosCovidDiario_am

Unnamed: 0,date,covid,coronavirus,falta_de_ar,febre,tosse,perda_paladar,perda_olfato,new_confirmed,new_deaths,new_confirmed_n,new_deaths_n
0,2020-01-01,0,0,0.0,0.0,0.0,0.0,0.0,0,0,0.000000,1.315789
1,2020-01-02,0,0,0.0,15.0,0.0,0.0,0.0,0,0,0.000000,1.315789
2,2020-01-03,0,0,0.0,0.0,0.0,0.0,0.0,0,0,0.000000,1.315789
3,2020-01-04,0,0,0.0,33.0,16.0,0.0,0.0,0,0,0.000000,1.315789
4,2020-01-05,0,0,0.0,0.0,36.0,0.0,0.0,0,0,0.000000,1.315789
...,...,...,...,...,...,...,...,...,...,...,...,...
419,2021-02-23,24,5,23.0,23.0,0.0,0.0,0.0,2147,51,42.862847,23.684211
420,2021-02-24,24,5,0.0,23.0,0.0,77.0,0.0,1608,69,32.102216,31.578947
421,2021-02-25,22,16,0.0,12.0,12.0,0.0,0.0,1420,86,28.348972,39.035088
422,2021-02-26,22,16,0.0,22.0,12.0,0.0,0.0,1572,56,31.383510,25.877193


In [17]:
df_casosCovidSemanal_br.iloc[[0,1,2,3,-4,-3,-2,-1]]

Unnamed: 0,date,covid,coronavirus,falta_de_ar,febre,tosse,perda_paladar,perda_olfato,new_confirmed,new_deaths,new_confirmed_n,new_deaths_n
0,2020-01-05,0,0,26,39,32,1,1,0,0,0.0,0.0
1,2020-01-12,0,0,24,38,30,2,4,0,0,0.0,0.0
2,2020-01-19,0,3,25,48,24,3,0,0,0,0.0,0.0
3,2020-01-26,0,14,27,43,23,1,2,0,0,0.0,0.0
56,2021-01-31,68,2,33,38,24,36,34,322189,7116,84.699428,85.838359
57,2021-02-07,65,2,24,39,25,25,30,314475,7573,82.671514,91.351025
58,2021-02-14,75,3,32,42,28,33,29,327047,7345,85.976535,88.600724
59,2021-02-21,92,3,34,47,35,43,35,380391,8290,100.0,100.0


In [20]:
df_casosCovidSemanal_am.iloc[[0,1,2,3,-4,-3,-2,-1]]

Unnamed: 0,date,covid,coronavirus,falta_de_ar,febre,tosse,perda_paladar,perda_olfato,new_confirmed,new_deaths,new_confirmed_n,new_deaths_n
0,2020-01-05,0,0,15,45,34,0,0,0,0,0.0,0.0
1,2020-01-12,0,0,23,20,44,0,0,0,0,0.0,0.0
2,2020-01-19,0,5,5,40,9,0,0,0,0,0.0,0.0
3,2020-01-26,0,20,10,9,46,0,0,0,0,0.0,0.0
56,2021-01-31,56,4,41,60,19,0,10,15760,916,82.108992,94.725957
57,2021-02-07,47,4,16,36,46,0,0,12568,885,65.478795,91.520165
58,2021-02-14,36,3,13,40,20,10,11,10928,590,56.934459,61.013444
59,2021-02-21,27,2,24,28,34,9,10,10349,422,53.917891,43.640124
