# Juntando as Peças 🧩

## (Fontes de Dados 🚰 ➕ Manipulação 🪡 ➕ Análise de Dados 🕵🏽) <sup>Alto Desempenho 🚀 🧞‍♂️</sup>

### Funções Auxiliares

In [1]:
!pip install --upgrade pandas --quiet


[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip available: [0m[31;49m22.3.1[0m[39;49m -> [0m[32;49m23.2.1[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpython3.11 -m pip install --upgrade pip[0m


In [2]:
import os, json, sqlite3, pandas as pd

In [3]:
def build_path(subfolder = 'raw'):
    folderpath = os.path.join(os.getcwd(), os.pardir, 
                              'project', 'data', subfolder)
    folderpath = os.path.abspath(folderpath)
    if not os.path.exists(folderpath):
        os.makedirs(folderpath)
    return folderpath

#### csv

In [4]:
def get_estados_georreferenciamento(
        filename='estados_georreferenciamento.csv'):
    filepath = os.path.join(build_path(), filename)
    
    return pd.read_csv(filepath)

def get_municipios_georreferenciamento(
        filename='municipios_georreferenciamento.csv'):
    filepath = os.path.join(build_path(), filename)
    return pd.read_csv(filepath)

def get_covid_infections_and_deaths(
        filename='ALL_HIST_PAINEL_COVID.csv'):
    filepath = os.path.join(build_path(), filename)
    
    return pd.read_csv(filepath, sep=';', parse_dates = ['data'],\
                       dtype={'codmun': 'Int64', \
                              'codRegiaoSaude': 'Int64', \
                              'populacaoTCU2019': 'Int64', \
                              'casosAcumulado': 'Int64', \
                              'Recuperadosnovos': 'Int64', \
                              'emAcompanhamentoNovos': 'Int64', \
                              'interior/metropolitana': 'Int64'}, \
                       encoding='utf-8')

#### json

In [5]:
#fail
def get_estados_codigos(filename='estados_codigos.json'):
    filepath = os.path.join(build_path(), filename)
    
    return pd.read_json(filepath)

#solution
def get_estados_codigos(filename='estados_codigos.json'):
    filepath = os.path.join(build_path(), filename)
    
    with open(filepath) as jsonfile:
        return pd.json_normalize(json.load(jsonfile))

#fail
def get_estados_caracteristicas(filename='estados_caracteristicas.json'):
    filepath = os.path.join(build_path(), filename)

    return pd.read_json(filepath)

#solution
def get_estados_caracteristicas(filename='estados_caracteristicas.json'):
    filepath = os.path.join(build_path(), filename)

    with open(filepath) as jsonfile:
        return pd.json_normalize(json.load(jsonfile), \
                                 record_path='characteristics', \
                                 record_prefix='characteristics_', \
                                 meta='state',\
                                 meta_prefix='state_')

#big fail
def get_estados_vacinacao(filename='estados_vacinacao.json'):
    filepath = os.path.join(build_path(), filename)

    return pd.read_json(filepath)

#solution
def get_estados_vacinacao(filename='estados_vacinacao.json'):
    filepath = os.path.join(build_path(), filename)

    with open(filepath) as jsonfile:
        df = pd.json_normalize(json.load(jsonfile)['Paciente_Estado']['buckets'], \
                               record_path=['Data_Aplicacao_Vacina', ['buckets']], \
                               meta=['key', 'doc_count'], \
                               record_prefix='vacinacao_', \
                               meta_prefix='estado_').convert_dtypes()
        df['vacinacao_key'] = pd.to_datetime(df['vacinacao_key'], unit='ms')
        return df

#big fail
def get_municipios_vacinacao(filename='municipios_vacinacao.json'):
    filepath = os.path.join(build_path(), filename)

    return pd.read_json(filepath)

#solution
def get_municipios_vacinacao(filename='municipios_vacinacao.json'):
    filepath = os.path.join(build_path(), filename)

    with open(filepath) as jsonfile:
        df = pd.json_normalize(json.load(jsonfile)['Paciente_Municipio']['buckets'], \
                               record_path=['Data_Aplicacao_Vacina', ['buckets']], \
                               meta=['key', 'doc_count'], \
                               record_prefix='vacinacao_', \
                               meta_prefix='municipio_',).convert_dtypes()
        df['vacinacao_key'] = pd.to_datetime(df['vacinacao_key'], unit='ms')
        return df

#### sqlite

In [6]:
def get_municipios_codigos(filename='municipios_codigos.db'):
    filepath = os.path.join(build_path(), filename)

    conn = sqlite3.connect(filepath)
    municipios_codigos = pd.read_sql_query('select * from ibge', conn, index_col='id')

    return municipios_codigos

### Descrição dos Dados 🎲🕵️‍♀️

#### Dados de Georreferenciamento

##### Unidades Federativas

In [7]:
estados_georreferenciamento = get_estados_georreferenciamento()

In [8]:
estados_georreferenciamento.head()

Unnamed: 0,codigo_uf,uf,nome,latitude,longitude,regiao
0,11,RO,Rondônia,-10.83,-63.34,Norte
1,12,AC,Acre,-8.77,-70.55,Norte
2,13,AM,Amazonas,-3.47,-65.1,Norte
3,14,RR,Roraima,1.99,-61.33,Norte
4,15,PA,Pará,-3.79,-52.48,Norte


In [9]:
estados_georreferenciamento.dtypes

codigo_uf      int64
uf            object
nome          object
latitude     float64
longitude    float64
regiao        object
dtype: object

In [10]:
estados_georreferenciamento.describe()

Unnamed: 0,codigo_uf,latitude,longitude
count,27.0,27.0,27.0
mean,29.111111,-12.381111,-48.41037
std,13.024631,8.476122,9.291479
min,11.0,-30.17,-70.55
25%,19.0,-18.645,-52.99
50%,27.0,-10.57,-48.26
75%,38.0,-6.205,-41.025
max,53.0,1.99,-36.59


##### Municípios

In [11]:
municipios_georreferenciamento = get_municipios_georreferenciamento()

In [12]:
municipios_georreferenciamento.head()

Unnamed: 0,codigo_ibge,nome,latitude,longitude,capital,codigo_uf,siafi_id,ddd,fuso_horario
0,5200050,Abadia de Goiás,-16.7573,-49.4412,0,52,1050,62,America/Sao_Paulo
1,3100104,Abadia dos Dourados,-18.4831,-47.3916,0,31,4001,34,America/Sao_Paulo
2,5200100,Abadiânia,-16.197,-48.7057,0,52,9201,62,America/Sao_Paulo
3,3100203,Abaeté,-19.1551,-45.4444,0,31,4003,37,America/Sao_Paulo
4,1500107,Abaetetuba,-1.72183,-48.8788,0,15,401,91,America/Sao_Paulo


In [13]:
municipios_georreferenciamento.dtypes

codigo_ibge       int64
nome             object
latitude        float64
longitude       float64
capital           int64
codigo_uf         int64
siafi_id          int64
ddd               int64
fuso_horario     object
dtype: object

In [14]:
municipios_georreferenciamento.describe()

Unnamed: 0,codigo_ibge,latitude,longitude,capital,codigo_uf,siafi_id,ddd
count,5570.0,5570.0,5570.0,5570.0,5570.0,5570.0,5570.0
mean,3253591.0,-16.449101,-46.231012,0.004847,32.377738,4519.878276,57.099461
std,984910.3,8.287274,6.408578,0.069461,9.833862,3050.156056,25.42275
min,1100015.0,-33.6866,-72.8997,0.0,11.0,1.0,11.0
25%,2512126.0,-22.843875,-50.878525,0.0,25.0,1595.5,35.0
50%,3146280.0,-18.0943,-46.5232,0.0,31.0,4382.0,55.0
75%,4119190.0,-8.496445,-41.410775,0.0,41.0,7180.5,82.0
max,5300108.0,4.60314,-32.4107,1.0,53.0,9997.0,99.0


#### Dados de Casos e Óbitos por Covid-19 no Brasil

In [15]:
covid_infections_and_deaths = get_covid_infections_and_deaths()

In [16]:
covid_infections_and_deaths.head()

Unnamed: 0,regiao,estado,municipio,coduf,codmun,codRegiaoSaude,nomeRegiaoSaude,data,semanaEpi,populacaoTCU2019,casosAcumulado,casosNovos,obitosAcumulado,obitosNovos,Recuperadosnovos,emAcompanhamentoNovos,interior/metropolitana
0,Brasil,,,76,,,,2020-08-01,31,210147125,2707877,45392,93563,1088,1865729,748585,
1,Brasil,,,76,,,,2020-08-02,32,210147125,2733677,25800,94104,541,1883677,755896,
2,Brasil,,,76,,,,2020-08-03,32,210147125,2750318,16641,94665,561,1912319,743334,
3,Brasil,,,76,,,,2020-08-04,32,210147125,2801921,51603,95819,1154,1970767,735335,
4,Brasil,,,76,,,,2020-08-05,32,210147125,2857597,55676,97240,1421,2020637,741180,


In [17]:
covid_infections_and_deaths.dtypes

regiao                            object
estado                            object
municipio                         object
coduf                              int64
codmun                             Int64
codRegiaoSaude                     Int64
nomeRegiaoSaude                   object
data                      datetime64[ns]
semanaEpi                          int64
populacaoTCU2019                   Int64
casosAcumulado                     Int64
casosNovos                         int64
obitosAcumulado                    int64
obitosNovos                        int64
Recuperadosnovos                   Int64
emAcompanhamentoNovos              Int64
interior/metropolitana             Int64
dtype: object

In [18]:
covid_infections_and_deaths.describe()

Unnamed: 0,coduf,codmun,codRegiaoSaude,data,semanaEpi,populacaoTCU2019,casosAcumulado,casosNovos,obitosAcumulado,obitosNovos,Recuperadosnovos,emAcompanhamentoNovos,interior/metropolitana
count,7271854.0,7234754.0,7207580.0,7271854,7271854.0,7244680.0,7271854.0,7271854.0,7271854.0,7271854.0,1324.0,1324.0,7207580.0
mean,32.3602,325258.01413,32403.123698,2022-01-02 10:06:07.585595392,26.73846,114403.984855,12088.290272,15.61497,264.0036,0.2913186,21099156.385952,512267.851964,0.0693
min,11.0,110000.0,11001.0,2020-02-25 00:00:00,1.0,781.0,0.0,-336837.0,0.0,-9114.0,0.0,-6206.0,0.0
25%,25.0,251200.0,25010.0,2021-02-13 00:00:00,15.0,5474.0,286.0,0.0,5.0,0.0,7502146.25,121936.25,0.0
50%,31.0,314610.0,31059.0,2022-01-02 00:00:00,27.0,11695.0,884.0,0.0,15.0,0.0,21473002.0,395619.5,0.0
75%,41.0,411915.0,41015.0,2022-11-22 00:00:00,38.0,25765.0,2443.0,1.0,42.0,0.0,34126544.75,738947.25,0.0
max,76.0,530010.0,53001.0,2023-10-11 00:00:00,53.0,210147125.0,37849919.0,336959.0,706142.0,9115.0,37104058.0,11232608.0,1.0
std,9.874132,98535.03101,9836.341989,,14.27105,2969868.733786,363200.396715,721.6812,7759.715,16.68868,13325444.310171,584092.153503,0.253963


##### Características das Unidades da Federação

In [19]:
estados_caracteristicas = get_estados_caracteristicas()

In [20]:
estados_caracteristicas.head()

Unnamed: 0,characteristics_label,characteristics_value,characteristics_measure,state_state
0,Governador,MARCOS JOSÉ ROCHA DOS SANTOS,,ro
1,Capital,Porto Velho,,ro
2,Gentílico,rondoniense ou rondoniano,,ro
3,Área Territorial,"237.754,172",km²,ro
4,População residente,1.581.016,pessoas,ro


In [21]:
estados_caracteristicas.dtypes

characteristics_label      object
characteristics_value      object
characteristics_measure    object
state_state                object
dtype: object

In [22]:
estados_caracteristicas.describe()

Unnamed: 0,characteristics_label,characteristics_value,characteristics_measure,state_state
count,324,324.0,324.0,324
unique,12,320.0,8.0,27
top,Governador,1.01,,ro
freq,27,2.0,108.0,12


#### Dados de Vacinação

##### Unidades Federativas

In [23]:
estados_vacinacao = get_estados_vacinacao()

In [24]:
estados_vacinacao.head()

Unnamed: 0,vacinacao_key_as_string,vacinacao_key,vacinacao_doc_count,estado_key,estado_doc_count
0,2023-10-11T00:00:00.000Z,2023-10-11,1996,SP,167920785
1,2023-10-10T00:00:00.000Z,2023-10-10,2723,SP,167920785
2,2023-10-09T00:00:00.000Z,2023-10-09,3186,SP,167920785
3,2023-10-08T00:00:00.000Z,2023-10-08,28,SP,167920785
4,2023-10-07T00:00:00.000Z,2023-10-07,19734,SP,167920785


In [25]:
estados_vacinacao.dtypes

vacinacao_key_as_string    string[python]
vacinacao_key              datetime64[ms]
vacinacao_doc_count                 Int64
estado_key                 string[python]
estado_doc_count                    Int64
dtype: object

In [26]:
estados_vacinacao.describe()

Unnamed: 0,vacinacao_key,vacinacao_doc_count,estado_doc_count
count,30764,30764.0,30764.0
mean,2021-04-17 22:37:25.845000,18841.48404,23492394.759069
min,1899-12-30 00:00:00,1.0,1665428.0
25%,2021-06-05 00:00:00,244.0,7395969.0
50%,2022-03-17 00:00:00,3438.0,11228172.0
75%,2022-12-27 00:00:00,16077.25,29897599.0
max,2023-10-11 00:00:00,1418737.0,167920785.0
std,,51165.719939,33642764.602491


##### Municípios

In [27]:
municipios_vacinacao = get_municipios_vacinacao()

In [28]:
municipios_vacinacao.head()

Unnamed: 0,vacinacao_key_as_string,vacinacao_key,vacinacao_doc_count,municipio_key,municipio_doc_count
0,2023-10-11T00:00:00.000Z,2023-10-11,451,355030,46503474
1,2023-10-10T00:00:00.000Z,2023-10-10,711,355030,46503474
2,2023-10-09T00:00:00.000Z,2023-10-09,982,355030,46503474
3,2023-10-08T00:00:00.000Z,2023-10-08,18,355030,46503474
4,2023-10-07T00:00:00.000Z,2023-10-07,4278,355030,46503474


In [29]:
municipios_vacinacao.dtypes

vacinacao_key_as_string    string[python]
vacinacao_key              datetime64[ms]
vacinacao_doc_count                 Int64
municipio_key              string[python]
municipio_doc_count                 Int64
dtype: object

In [30]:
municipios_vacinacao.describe()

Unnamed: 0,vacinacao_key,vacinacao_doc_count,municipio_doc_count
count,62159,62159.0,62159.0
mean,2022-02-20 21:04:25.313000,3483.797841,3820674.17748
min,1900-08-21 00:00:00,1.0,1154510.0
25%,2021-08-01 00:00:00,81.0,1383084.0
50%,2022-04-17 00:00:00,983.0,1963638.0
75%,2023-01-06 00:00:00,3436.0,3138390.0
max,2023-10-11 00:00:00,316423.0,46503474.0
std,,10329.912266,6623652.504356


#### Dados de Códigos do IBGE

##### Unidades Federativas

In [31]:
estados_codigos = get_estados_codigos()

In [32]:
estados_codigos.head()

Unnamed: 0,id,sigla,nome,regiao.id,regiao.sigla,regiao.nome
0,11,RO,Rondônia,1,N,Norte
1,12,AC,Acre,1,N,Norte
2,13,AM,Amazonas,1,N,Norte
3,14,RR,Roraima,1,N,Norte
4,15,PA,Pará,1,N,Norte


In [33]:
estados_codigos.dtypes

id               int64
sigla           object
nome            object
regiao.id        int64
regiao.sigla    object
regiao.nome     object
dtype: object

In [34]:
estados_codigos.describe()

Unnamed: 0,id,regiao.id
count,27.0,27.0
mean,29.111111,2.555556
std,13.024631,1.395965
min,11.0,1.0
25%,19.0,1.5
50%,27.0,2.0
75%,38.0,3.5
max,53.0,5.0


##### Municípios

In [35]:
municipios_codigos = get_municipios_codigos()

In [36]:
municipios_codigos.head()

Unnamed: 0_level_0,state,city
id,Unnamed: 1_level_1,Unnamed: 2_level_1
1100015,RO,Alta Floresta D''Oeste
1100379,RO,Alto Alegre dos Parecis
1100403,RO,Alto Paraíso
1100346,RO,Alvorada D''Oeste
1100023,RO,Ariquemes


In [37]:
municipios_codigos.dtypes

state    object
city     object
dtype: object

In [38]:
municipios_codigos.describe()

Unnamed: 0,state,city
count,5570,5570
unique,27,5298
top,MG,Bom Jesus
freq,853,5
