# Etapa de Extração
 1. Acesso das informação da Campanha Nacional de Vacinação contra Covid-19
 2. Mais informações da API em [https://opendatasus.saude.gov.br/dataset/8e0c325d-2586-4b11-8925-4ba51acd6e6d/resource/84707ab9-8497-4f2f-8a0d-b873489a63bf/download/manual_api_vacina_covid-19.pdf](link-URL) 
 3. Dicionario de Dados em [https://opendatasus.saude.gov.br/dataset/8e0c325d-2586-4b11-8925-4ba51acd6e6d/resource/a8308b58-8898-4c6d-8119-400c722c71b5/download/dicionario-de-dados-vacinacao.pdf](link-URL)
 

In [3]:
from pyspark.sql.functions import col,coalesce,year, month
from pyspark.sql import SparkSession
from notebookutils import mssparkutils
import requests
import json


#Pega a Secret no Azure Key vault e splita
auth = mssparkutils.credentials.getSecret("psbcs-key","ApiDataVacin")
auths = auth.split(';')


StatementMeta(sparksmall, 13, 4, Finished, Available)

#### Requisição HTTP

In [11]:
#[0] = Url;[1] = User;[2] = password
response = requests.get(auths[0],auth=(auths[1], auths[2]))
json_data = json.loads(response.text)['hits']['hits']

StatementMeta(sparksmall, 13, 12, Finished, Available)

### Create Dataframe

In [5]:
df = spark.createDataFrame(json_data)

StatementMeta(sparksmall, 13, 6, Finished, Available)

In [12]:
display(df)

StatementMeta(sparksmall, 13, 13, Finished, Available)

SynapseWidget(Synapse.DataFrame, c4dc55d8-04b3-42bf-bafa-94082dae9716)

In [6]:
df.write.mode("overwrite").json("abfss://synapse@wspdatalake2.dfs.core.windows.net/bronze/datasus_data.json")

StatementMeta(sparksmall, 13, 7, Finished, Available)

#### Table

In [7]:
df_tb=df.select(
    col("_source.document_id").alias("cd_registro"),
    col("_source.@timestamp").alias("dt_vacinacao"),
    col("_source.vacina_codigo").alias("cd_vacina"),
    col("_source.pacient_id").alias("cd_paciente"),
    col("_source.vacina_categoria_codigo").alias("cd_categ_vacina"),
    col("_source.dt_deleted").alias("dt_deletado"),
    col("_source.id_sistema_origem").alias("cd_sys_org"),
    col("_source.estabelecimento_valor").alias("cd_estabelecimento"),
    col("_source.vacina_lote").alias("nr_lote_vacina"),
    col("_source.vacina_descricao_dose").alias("ds_dose")  
).dropDuplicates().fillna({'cd_paciente': -1}
).withColumn("year", year(col("dt_vacinacao").cast("timestamp")))\
.withColumn("month", month(col("dt_vacinacao").cast("timestamp")))


StatementMeta(sparksmall, 13, 8, Finished, Available)

##### Write Json

In [8]:
df_tb.write.mode("overwrite").json("abfss://synapse@wspdatalake2.dfs.core.windows.net/silver/datasus_data.json")

StatementMeta(sparksmall, 13, 9, Finished, Available)

#### Write Delta Table

In [9]:
df_tb.write.format('delta').mode('overwrite').saveAsTable('data.datasus')

StatementMeta(sparksmall, 13, 10, Finished, Available)

##### Write Parquet

In [10]:
df_tb.write.partitionBy("year", "month").mode("overwrite").parquet("abfss://synapse@wspdatalake2.dfs.core.windows.net/silver/datasus_data")

StatementMeta(sparksmall, 13, 11, Finished, Available)

#### Analitico Estabelecimento

In [14]:
df_Estabel=df.select(
    col("_source.estabelecimento_municipio_codigo").alias("cd_municipio"),
    col("_source.estabelecimento_valor").alias("cd_estabelecimento"),
    col("_source.estabelecimento_uf").alias("ds_uf"),
    col("_source.estabelecimento_municipio_nome").alias("ds_municipio")
).dropDuplicates()

StatementMeta(sparksmall, 13, 15, Finished, Available)

In [15]:
df_Estabel.write.format('delta').mode('overwrite').saveAsTable('data.estabel')

StatementMeta(sparksmall, 13, 16, Finished, Available)

#### Dados da Vacina

In [13]:
df_vac=df.select(
    col("_source.document_id").alias("cd_registro"),
    col("_source.vacina_codigo").alias("cd_vacina"),
    col("_source.vacina_nome").alias("ds_vacina"),
    col("_source.vacina_categoria_codigo").alias("cd_categ_vacina"),
    col("_source.vacina_lote").alias("nr_lote_vacina"),
    col("_source.vacina_descricao_dose").alias("ds_dose") 
).dropDuplicates()


StatementMeta(sparksmall, 13, 14, Finished, Available)

In [16]:
df_vac.write.format('delta').mode('overwrite').saveAsTable('data.vac_dat')

StatementMeta(sparksmall, 13, 17, Finished, Available)