# Pandas

In [38]:
import pandas as pd
import numpy as np
import math

def destaca(texto, br=False):
    tamanho_texto = len(texto)
    linha = '=' * 60

    meia_linha = '~' * math.floor(((60 - tamanho_texto) / 2) ) + texto
    final_linha = '~' * (60 - len(meia_linha))
    
    if br:
        print('')
    print(linha)
    print(meia_linha + final_linha)
    print(linha)



In [4]:
def gerador_serie_historica(valor_inicial, volatilidade, periodos, dia_inicial, frequencia = 'M'):
    vetor = [valor_inicial]

    for i in range(periodos-1):
        preco = vetor[i] * (1 + np.random.normal(0, volatilidade))

        vetor.append(preco)

    serie = pd.Series(vetor, index = pd.date_range(dia_inicial, periods= periodos, freq= frequencia))

    return serie

In [5]:
serie_lucro = gerador_serie_historica(2, 0.5, 5, "2022-01-01")
serie_cotacoes = gerador_serie_historica(20, 0.05, 5, "2022-01-01")

df = pd.DataFrame({"cotacao": serie_cotacoes, "lucro_por_acao": serie_lucro})
df['PL'] = df['cotacao'] / df['lucro_por_acao']
df

Unnamed: 0,cotacao,lucro_por_acao,PL
2022-01-31,20.0,2.0,10.0
2022-02-28,20.757738,2.395118,8.666686
2022-03-31,19.238825,4.241611,4.535736
2022-04-30,17.284073,5.159049,3.350244
2022-05-31,16.029491,1.655408,9.683108


In [6]:
dicionario = {
    'nomes': ["Quero Quero", "Alpargatas", "Alpargatas", "Magazine Luiza"],
    'preco_sobre_lucro': [12, 6, 12, 100],
    'volume': [5000, 1000, 4000, 7000]
}

empresas = pd.DataFrame(dicionario, index= ["LJQQ3", "ALPA3", "ALPA4", "MGLU3"])

empresas

Unnamed: 0,nomes,preco_sobre_lucro,volume
LJQQ3,Quero Quero,12,5000
ALPA3,Alpargatas,6,1000
ALPA4,Alpargatas,12,4000
MGLU3,Magazine Luiza,100,7000


In [39]:
#sort index

#crescente
destaca('Sort Crescente', br=True)
empresas = empresas.sort_index()
print(empresas)

destaca('Sort Crescente Datas', br=True)
df = df.sort_index()
print(df)

destaca('Sort Decrescente', br=True)
empresas = empresas.sort_index(ascending=False)
print(empresas)



~~~~~~~~~~~~~~~~~~~~~~~Sort Crescente~~~~~~~~~~~~~~~~~~~~~~~
                nomes  preco_sobre_lucro  volume
ALPA3      Alpargatas                  6    1000
ALPA4      Alpargatas                 12    4000
LJQQ3     Quero Quero                 12    5000
MGLU3  Magazine Luiza                100    7000

~~~~~~~~~~~~~~~~~~~~Sort Crescente Datas~~~~~~~~~~~~~~~~~~~~
              cotacao  lucro_por_acao         PL
2022-01-31  20.000000        2.000000  10.000000
2022-02-28  20.757738        2.395118   8.666686
2022-03-31  19.238825        4.241611   4.535736
2022-04-30  17.284073        5.159049   3.350244
2022-05-31  16.029491        1.655408   9.683108

~~~~~~~~~~~~~~~~~~~~~~Sort Decrescente~~~~~~~~~~~~~~~~~~~~~~
                nomes  preco_sobre_lucro  volume
MGLU3  Magazine Luiza                100    7000
LJQQ3     Quero Quero                 12    5000
ALPA4      Alpargatas                 12    4000
ALPA3      Alpargatas                  6    1000


    - axis = 1 muda o eixo do método
    - 0 por padrao

In [41]:
#ordenar as colunas
destaca('sorteando colunas')
empresas = empresas.sort_index(axis=1, ascending=False)
empresas

~~~~~~~~~~~~~~~~~~~~~sorteando colunas~~~~~~~~~~~~~~~~~~~~~~


Unnamed: 0,volume,preco_sobre_lucro,nomes
MGLU3,7000,100,Magazine Luiza
LJQQ3,5000,12,Quero Quero
ALPA4,4000,12,Alpargatas
ALPA3,1000,6,Alpargatas


In [42]:
#aplicando o axis em testes

dicionario = {'cotacao': [20, 30, 40], 'volume': [100, 200, 300]}
teste_axis = pd.DataFrame(dicionario)

teste_axis

Unnamed: 0,cotacao,volume
0,20,100
1,30,200
2,40,300


In [50]:
destaca('somando as colunas')
print(teste_axis.sum())
destaca('somando as linhas', br = True)
print(teste_axis.sum(axis=1))

~~~~~~~~~~~~~~~~~~~~~somando as colunas~~~~~~~~~~~~~~~~~~~~~
cotacao     90
volume     600
dtype: int64

~~~~~~~~~~~~~~~~~~~~~somando as linhas~~~~~~~~~~~~~~~~~~~~~~
0    120
1    230
2    340
dtype: int64


In [51]:
df = df.sort_index(ascending=False)
df

Unnamed: 0,cotacao,lucro_por_acao,PL
2022-05-31,16.029491,1.655408,9.683108
2022-04-30,17.284073,5.159049,3.350244
2022-03-31,19.238825,4.241611,4.535736
2022-02-28,20.757738,2.395118,8.666686
2022-01-31,20.0,2.0,10.0


In [52]:
#sort values para ordenar por coluna específica
df = df.sort_values(by = "PL")
df

Unnamed: 0,cotacao,lucro_por_acao,PL
2022-04-30,17.284073,5.159049,3.350244
2022-03-31,19.238825,4.241611,4.535736
2022-02-28,20.757738,2.395118,8.666686
2022-05-31,16.029491,1.655408,9.683108
2022-01-31,20.0,2.0,10.0


In [54]:
#sort values com mais de uma coluna
empresas = empresas.sort_values(by = ['preco_sobre_lucro', 'volume'])
empresas

Unnamed: 0,volume,preco_sobre_lucro,nomes
ALPA3,1000,6,Alpargatas
ALPA4,4000,12,Alpargatas
LJQQ3,5000,12,Quero Quero
MGLU3,7000,100,Magazine Luiza


In [55]:
#criando um rank nos dataframes
#metodo rank em uma coluna

#padrao
empresas['ranking_padrao'] = empresas['preco_sobre_lucro'].rank()
empresas

Unnamed: 0,volume,preco_sobre_lucro,nomes,ranking_padrao
ALPA3,1000,6,Alpargatas,1.0
ALPA4,4000,12,Alpargatas,2.5
LJQQ3,5000,12,Quero Quero,2.5
MGLU3,7000,100,Magazine Luiza,4.0


In [56]:
#min e max
empresas['ranking_max'] = empresas['preco_sobre_lucro'].rank(method = 'max')
empresas['ranking_min'] = empresas['preco_sobre_lucro'].rank(method = 'min')
empresas['ranking_dense'] = empresas['preco_sobre_lucro'].rank(method = 'dense')

empresas

Unnamed: 0,volume,preco_sobre_lucro,nomes,ranking_padrao,ranking_max,ranking_min,ranking_dense
ALPA3,1000,6,Alpargatas,1.0,1.0,1.0,1.0
ALPA4,4000,12,Alpargatas,2.5,3.0,2.0,2.0
LJQQ3,5000,12,Quero Quero,2.5,3.0,2.0,2.0
MGLU3,7000,100,Magazine Luiza,4.0,4.0,4.0,3.0


### Desafio

In [57]:
#df base
#df base
dict_desafio = {
    "tickers" : ["WEGE3", "PETR3", "VALE3", "PETR4", "LREN3"],
    "cotacoes": [20, 30, 40, 12, 35],
    "nomes": ['Weg', 'Petro', 'Vale', 'Petro', 'Lojas Renner'],
    "preco_sobre_lucro": [25, 6, 12, 7, 25],
    "volume": [5000, 1000, 4000, 7000, 1200]
    }

df = pd.DataFrame(dict_desafio, index= [1, 2, 3, 4, 5])
df = df.set_index("tickers")
df.columns = ['preco', 'nomes', 'preco_sobre_lucro', 'volume']
df['lucro_por_acao'] = df['preco'] / df['preco_sobre_lucro']
dolar = 5.25
df['preco_em_dolar'] = df['preco'] / dolar
df

Unnamed: 0_level_0,preco,nomes,preco_sobre_lucro,volume,lucro_por_acao,preco_em_dolar
tickers,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
WEGE3,20,Weg,25,5000,0.8,3.809524
PETR3,30,Petro,6,1000,5.0,5.714286
VALE3,40,Vale,12,4000,3.333333,7.619048
PETR4,12,Petro,7,7000,1.714286,2.285714
LREN3,35,Lojas Renner,25,1200,1.4,6.666667


In [58]:
df = df.sort_index()
df

Unnamed: 0_level_0,preco,nomes,preco_sobre_lucro,volume,lucro_por_acao,preco_em_dolar
tickers,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
LREN3,35,Lojas Renner,25,1200,1.4,6.666667
PETR3,30,Petro,6,1000,5.0,5.714286
PETR4,12,Petro,7,7000,1.714286,2.285714
VALE3,40,Vale,12,4000,3.333333,7.619048
WEGE3,20,Weg,25,5000,0.8,3.809524


In [59]:
df = df.sort_values(by = 'preco_sobre_lucro', ascending=False)
df

Unnamed: 0_level_0,preco,nomes,preco_sobre_lucro,volume,lucro_por_acao,preco_em_dolar
tickers,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
LREN3,35,Lojas Renner,25,1200,1.4,6.666667
WEGE3,20,Weg,25,5000,0.8,3.809524
VALE3,40,Vale,12,4000,3.333333,7.619048
PETR4,12,Petro,7,7000,1.714286,2.285714
PETR3,30,Petro,6,1000,5.0,5.714286


In [61]:
df = df.sort_values(by = ['preco_sobre_lucro', 'volume'])
df

Unnamed: 0_level_0,preco,nomes,preco_sobre_lucro,volume,lucro_por_acao,preco_em_dolar
tickers,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
PETR3,30,Petro,6,1000,5.0,5.714286
PETR4,12,Petro,7,7000,1.714286,2.285714
VALE3,40,Vale,12,4000,3.333333,7.619048
LREN3,35,Lojas Renner,25,1200,1.4,6.666667
WEGE3,20,Weg,25,5000,0.8,3.809524


In [62]:
df['rank_lpa'] = df['lucro_por_acao'].rank(method = 'dense')
df

Unnamed: 0_level_0,preco,nomes,preco_sobre_lucro,volume,lucro_por_acao,preco_em_dolar,rank_lpa
tickers,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
PETR3,30,Petro,6,1000,5.0,5.714286,5.0
PETR4,12,Petro,7,7000,1.714286,2.285714,3.0
VALE3,40,Vale,12,4000,3.333333,7.619048,4.0
LREN3,35,Lojas Renner,25,1200,1.4,6.666667,2.0
WEGE3,20,Weg,25,5000,0.8,3.809524,1.0
