In [2]:
#imports
from pyspark.sql import SparkSession
from pyspark.sql.functions import * 
from pyspark.sql.types import *

In [3]:
#instanciando spark 
spark = SparkSession.builder \
    .master('local[2]') \
    .appName('analise_bovespa') \
    .config('spark.ui.port', '4050') \
    .getOrCreate()

In [4]:
#work directories
stage_data = 'D://caiof//Documents//GIT_Repos//Programs//analise_acoes_ibovespa//stage_data//bovespa_concat.csv'

In [5]:
ibov_df = (
    spark
    .read
    .format('csv')
    .option('header','True')
    .option('inferschema', 'True')
    .load(stage_data)
    
)

ibov_df.show()

+-----------+----------+------------+--------------+------------+------------+----------------+-----------+--------------+
|data_pregao|sigla_acao|   nome_acao|preco_abertura|preco_maximo|preco_minimo|preco_fechamento|qtd_negocio|volume_negocio|
+-----------+----------+------------+--------------+------------+------------+----------------+-----------+--------------+
| 2020-01-02|     AALR3|      ALLIAR|         18.29|        19.0|       18.28|            19.0|   585800.0|   1.0946196E9|
| 2020-01-02|    AAPL34|       APPLE|         120.0|      121.34|       120.0|          121.34|    12700.0|     1.53338E8|
| 2020-01-02|     ABCB4|  ABC BRASIL|          20.0|        20.3|       19.82|            20.3|   870400.0|   1.7457878E9|
| 2020-01-02|     ABEV3|   AMBEV S/A|         18.86|       19.25|       18.78|            19.2|  1.60113E7| 3.06234791E10|
| 2020-01-02|     ADHM3| ADVANCED-DH|          2.55|        2.55|        2.35|            2.48|   193400.0|     4.71927E7|
| 2020-01-02|   

In [12]:
ibov_df.createOrReplaceTempView('ibov_df')

Data maxima e minima do dataset

In [29]:
(
    spark
    .sql(""" 
    select 
        min(data_pregao) as data_minima,
        max(data_pregao) as data_maxima 
    from ibov_df;              

    """)
    .show(truncate=False)
)

+-----------+-----------+
|data_minima|data_maxima|
+-----------+-----------+
|2020-01-02 |2023-06-14 |
+-----------+-----------+



Top 10 ações mais negociadas

In [28]:
(
    spark
    .sql(""" 
    select 
        sigla_acao,
        count(*) as total 
    from ibov_df 
    group by(sigla_acao)
    
    order by(total) desc 
    limit 10;              

    """)
    .show(truncate=False)
)

+----------+-----+
|sigla_acao|total|
+----------+-----+
|ITUB3     |858  |
|MILS3     |858  |
|MYPK3     |858  |
|BRKM5     |858  |
|PRIO3     |858  |
|GGBR4     |858  |
|ENBR3     |858  |
|ELET3     |858  |
|SANB4     |858  |
|KLBN3     |858  |
+----------+-----+



Qual o preço medio de fechamento das ações de itau (ações ordinarias e ações preferenciais)
no primeiro semestre de 2023 

In [55]:
(
    ibov_df
    .filter((col('data_pregao') >= '2023-01-01'))
    .filter((col('sigla_acao') == 'ITUB3') | (col('sigla_acao') == 'ITUB4') )
    .groupBy('sigla_acao')
    .agg(
        round(mean(col('preco_fechamento'))).alias('preco_fechamento_medio')
    )
    .show()
)

+----------+----------------------+
|sigla_acao|preco_fechamento_medio|
+----------+----------------------+
|     ITUB3|                  22.0|
|     ITUB4|                  26.0|
+----------+----------------------+

