### Visualização 3 - Óbitos Confirmados

In [9]:
from pyspark.sql.functions import *

In [10]:
from pyspark.sql.types import *

In [11]:
df_covid = spark.read.csv("/user/jessica/dados_covid/*.csv", sep = ";", header = True, inferSchema = True)

In [12]:
df_covid.agg({'data':'max'}).show()

+-------------------+
|          max(data)|
+-------------------+
|2021-07-06 00:00:00|
+-------------------+



In [13]:
last_date = df_covid.agg({'data':'max'}).head()[0]
print(last_date)

2021-07-06 00:00:00


In [14]:
df_covid_filter_date = df_covid.where((df_covid['regiao'] == 'Brasil') & (df_covid['data'] == last_date))

In [15]:
df_covid_filter_date.show()

+------+------+---------+-----+------+--------------+---------------+-------------------+---------+----------------+--------------+----------+---------------+-----------+----------------+---------------------+----------------------+
|regiao|estado|municipio|coduf|codmun|codRegiaoSaude|nomeRegiaoSaude|               data|semanaEpi|populacaoTCU2019|casosAcumulado|casosNovos|obitosAcumulado|obitosNovos|Recuperadosnovos|emAcompanhamentoNovos|interior/metropolitana|
+------+------+---------+-----+------+--------------+---------------+-------------------+---------+----------------+--------------+----------+---------------+-----------+----------------+---------------------+----------------------+
|Brasil|  null|     null|   76|  null|          null|           null|2021-07-06 00:00:00|       27|       210147125|      18855015|     62504|         526892|       1780|        17262646|              1065477|                  null|
+------+------+---------+-----+------+--------------+---------------

In [16]:
df_covid_lethality = df_covid_filter_date.withColumn('Letalidade', \
(df_covid_filter_date.obitosAcumulado/df_covid_filter_date.casosAcumulado)*100)

In [17]:
df_covid_lethality = df_covid_lethality.withColumn('Mortalidade', \
(df_covid_lethality.obitosAcumulado/df_covid_lethality.populacaoTCU2019)*100000)

In [18]:
df_covid_lethality = df_covid_lethality['regiao', 'obitosAcumulado', 'obitosNovos', 'Letalidade', 'Mortalidade']

In [19]:
df_covid_lethality.show()

+------+---------------+-----------+-------------+------------------+
|regiao|obitosAcumulado|obitosNovos|   Letalidade|       Mortalidade|
+------+---------------+-----------+-------------+------------------+
|Brasil|         526892|       1780|2.79443957000|250.72529543290204|
+------+---------------+-----------+-------------+------------------+



In [20]:
df_covid_lethality = df_covid_lethality.withColumnRenamed('regiao','Regiao')\
.withColumnRenamed('obitosAcumulado','Obitos_Acumulado').withColumnRenamed('obitosNovos','Obitos_Novos')

In [21]:
df_covid_lethality = df_covid_lethality.withColumn('Letalidade',\
    format_number(col('Letalidade').cast(FloatType()),1))

In [22]:
df_covid_lethality = df_covid_lethality.withColumn('Mortalidade',\
    format_number(col('Mortalidade').cast(FloatType()),1))

In [23]:
df_covid_lethality.show()

+------+----------------+------------+----------+-----------+
|Regiao|Obitos_Acumulado|Obitos_Novos|Letalidade|Mortalidade|
+------+----------------+------------+----------+-----------+
|Brasil|          526892|        1780|       2.8|      250.7|
+------+----------------+------------+----------+-----------+



### 8. Salvar a visualização do exercício 6 em um tópico no Elastic 

In [2]:
# Intslação do pacote do elasticsearch.
!pip install elasticsearch==7.17



In [5]:
# Instalando o pocote requisitado para fazer a conexão.
!pip install --upgrade requests

Collecting requests
  Downloading requests-2.27.1-py2.py3-none-any.whl (63 kB)
[K     |████████████████████████████████| 63 kB 812 kB/s eta 0:00:011
Collecting charset-normalizer~=2.0.0; python_version >= "3"
  Downloading charset_normalizer-2.0.12-py3-none-any.whl (39 kB)
Installing collected packages: charset-normalizer, requests
  Attempting uninstall: requests
    Found existing installation: requests 2.18.4
    Uninstalling requests-2.18.4:
      Successfully uninstalled requests-2.18.4
Successfully installed charset-normalizer-2.0.12 requests-2.27.1


In [6]:
# Importando a biblioteca.
from elasticsearch import Elasticsearch

In [7]:
# Abrindo conexão com o Elastic.
es = Elasticsearch('host.docker.internal:9200')

In [8]:
# Confirmando que a conexão está ativa.
print(es.info())

{'name': 'node1', 'cluster_name': 'my_cluster', 'cluster_uuid': 'Ffu8-3xxTGiQHmcxvV6VPw', 'version': {'number': '7.9.2', 'build_flavor': 'default', 'build_type': 'docker', 'build_hash': 'd34da0ea4a966c4e49417f2da2f244e3e97b4e6e', 'build_date': '2020-09-23T00:45:33.626720Z', 'build_snapshot': False, 'lucene_version': '8.6.2', 'minimum_wire_compatibility_version': '6.8.0', 'minimum_index_compatibility_version': '6.0.0-beta1'}, 'tagline': 'You Know, for Search'}


In [34]:
# Configuração de leitura.
es_read_config = {"es.nodes":"host.docker.internal", "es.port":"9200", "es.resource":"dados_covid"}

In [35]:
# # Configuração de escrita.
es_write_config = {"es.nodes":"host.docker.internal", "es.port":"9200", "es.resource":"dados_covid"}

In [38]:
# Enviando os dados para o elasticsearch.
df_covid_lethality.write.format("org.elasticsearch.spark.sql").option("es.nodes","host.docker.internal")\
.option("es.port","9200").option('es.resource',"dados_covid").option("es.nodes.wan.only","true").save()