In [145]:
import pandas as pd
from sqlalchemy import create_engine, MetaData, Table
import math
from datetime import datetime, date, timedelta

# pd.set_option('display.max_rows', None)     
# pd.set_option('display.max_columns', None)


In [146]:
engine = create_engine(
    'postgresql://postgres:admin@192.168.88.61:5432/yield_debentures')

In [147]:
"""
O que estamos fazendo? 

Calculando o spread da empresa, utilizando o IPCA

Como vamos fazer? 

Pegar os dados para cada empresa (dados_debenture). Pegar os dados da IPCA (curvas_juros). Dividir um pelo outro 
para obter o spread da empresas, em cada um dos vértices
"""

'\nO que estamos fazendo? \n\nCalculando o spread da empresa, utilizando o IPCA\n\nComo vamos fazer? \n\nPegar os dados para cada empresa (dados_debenture). Pegar os dados da IPCA (curvas_juros). Dividir um pelo outro \npara obter o spread da empresas, em cada um dos vértices\n'

In [148]:
# Etapa 1 - Criar coluna dos spreads
empresas = pd.read_sql("SELECT codigo_ativo, data_referencia, taxa_emissao, taxa_indicativa, duration, emissor FROM dados_debenture WHERE grupo = 'IPCA SPREAD' ORDER BY data_referencia DESC", engine)
soberano = pd.read_sql("SELECT maturity, ipca, data_referencia FROM curvas_juros ORDER BY data_referencia DESC", engine)

In [149]:
soberano.loc[1]

maturity           [0.08333333333333333, 0.16666666666666666, 0.2...
ipca               [nan, nan, nan, 4.6984, 6.6196, 7.2503, 7.4622...
data_referencia                                           2025-02-21
Name: 1, dtype: object

In [150]:
empresas.head(4)

Unnamed: 0,codigo_ativo,data_referencia,taxa_emissao,taxa_indicativa,duration,emissor
0,TBLE26,2025-02-24,"6,2515%",7.1347,213.0,ENGIE BRASIL ENERGIA S.A.(*) (**)
1,TCII11,2025-02-24,"6,0594%",8.2124,1615.0,AES TUCANO HOLDING II S.A (*)
2,TEPA12,2025-02-24,"10,0924%",10.7406,892.0,BRASIL TECNOLOGIA E PARTICIPACAO S.A. (*)
3,TIET18,2025-02-24,"6,0215%",7.4309,688.0,AES TIETE ENERGIA S/A


In [151]:
datas_referencia = empresas['data_referencia'].to_list()
datas_referencia = list(dict.fromkeys(datas_referencia))

In [152]:
def interpolar(duration, maturity_menor, maturity_maior, ipca_menor, ipca_maior):
    # Esta função retorna o IPCA interpolado para a determinada maturity da debenture
    x1 = maturity_menor
    x2 = maturity_maior
    y1 = ipca_menor
    y2 = ipca_maior
    
    part1 = (1 + y1 / 100)
    part2 = (1 + y2 / 100) / (1 + y1 / 100)
    exponent = (duration - x1) / (x2 - x1)

    result = ((part1 * (part2 ** exponent)) - 1) * 100
    return result

In [180]:
for data in datas_referencia:
    curva_juros = soberano.loc[soberano['data_referencia'] == data]
    # print(curva_juros)

    if curva_juros.empty:
        continue  # Skip if no data for this date

    cleaned_maturities = []
    cleaned_ipcas = []

    # Iterate through each row of the DataFrame
    for _, row in curva_juros.iterrows():
        maturity_list = row['maturity']
        ipca_list = row['ipca']

        # Ensure they are lists (avoid errors)
        if not (isinstance(maturity_list, list) and isinstance(ipca_list, list)):
            continue

        # Remove NaN values and their corresponding maturity values
        cleaned = [(m, i) for m, i in zip(maturity_list, ipca_list) if not math.isnan(i)]

        if cleaned:
            maturity_clean, ipca_clean = zip(*cleaned)
            cleaned_maturities.append(list(maturity_clean))
            cleaned_ipcas.append(list(ipca_clean))
        else:
            cleaned_maturities.append([])
            cleaned_ipcas.append([])

    maturity = cleaned_maturities[0]
    ipca = cleaned_ipcas[0]
    
    # print(maturity)
    # print(ipca)
    
    empresas_dia = empresas.loc[empresas['data_referencia'] == data]

    taxa_empresas = empresas_dia['taxa_indicativa'].to_list()
    duration = empresas_dia['duration'].to_list()

    # print(len(duration))
    # print(len(empresas_dia))
    spread_empresa = list()
    duration_round = list()

    for i in range(0,len(empresas_dia)):
        d = duration[i]
        # print(d)
        if math.isnan(d):
            spread_empresa.append(None)
            duration_round.append(None)
            continue
            
        taxa_indicativa = taxa_empresas[i]
        
        d100 = round(d/100)*100
        D = d/252
        # d = round(D*2)/2
        indice = duration.index(d)
        # print(indice)

        # print('duration: ',d)

        m1 = [x for x in maturity if x <= D]
        m2 = [x for x in maturity if x > D]
        # print(m1)
        # print(m2)
        maturity1 = max(m1, default = None)
        maturity2 = min(m2, default = None)

        if maturity1 is None or maturity2 is None:
            spread_empresa.append(None)
            duration_round.append(None)
            continue
        
        im1 = maturity.index(maturity1)
        im2 = maturity.index(maturity2)
        ipca1 = ipca[im1]
        ipca2 = ipca[im2]

        # print(maturity1, ipca1)
        # print(maturity2, ipca2)
    
        ipca_especifica = interpolar(D, maturity2, maturity1, ipca1, ipca2)

        spread_e = 100*((100+taxa_indicativa)/(100+ipca_especifica)-1)
        spread_empresa.append(spread_e)
        duration_r = round(d/100)*100
        duration_round.append(duration_r)

        # print(spread_empresa)
        # print(duration_round)
        
    

    # print(empresas_dia) 
    empresas_dia['spread_empresa'] = spread_empresa
    empresas_dia['duration_round'] = duration_round

    empresas_dia.to_sql(name='debenture_spread', con=engine, if_exists='append', index=False)

    #popular o novo banco de dados

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  empresas_dia['spread_empresa'] = spread_empresa
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  empresas_dia['duration_round'] = duration_round
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  empresas_dia['spread_empresa'] = spread_empresa
A value is trying to be set on a copy of a slice from a DataF

KeyboardInterrupt: 

In [177]:
empresas_dia

Unnamed: 0,codigo_ativo,data_referencia,taxa_emissao,taxa_indicativa,duration,emissor,spread_empresa,duration_round
0,TBLE26,2025-02-24,"6,2515%",7.1347,213.0,ENGIE BRASIL ENERGIA S.A.(*) (**),1.510714,200.0
1,TCII11,2025-02-24,"6,0594%",8.2124,1615.0,AES TUCANO HOLDING II S.A (*),0.488365,1600.0
2,TEPA12,2025-02-24,"10,0924%",10.7406,892.0,BRASIL TECNOLOGIA E PARTICIPACAO S.A. (*),2.842709,900.0
3,TIET18,2025-02-24,"6,0215%",7.4309,688.0,AES TIETE ENERGIA S/A,-0.194234,700.0
4,TIET29,2025-02-24,"4,7133%",7.4386,697.0,AES TIETE ENERGIA S/A (*) (**),-0.185120,700.0
...,...,...,...,...,...,...,...,...
508,ECHP11,2025-02-24,"6,9000%",7.3800,650.0,ECHOENERGIA PARTICIPACOES S/A (*),-0.249795,600.0
509,ECHP12,2025-02-24,"4,7500%",7.3758,680.0,ECHOENERGIA PARTICIPACOES S/A (*),-0.247165,700.0
510,ECHP22,2025-02-24,"4,7500%",7.5007,680.0,ECHOENERGIA PARTICIPACOES S/A (*),-0.131132,700.0
511,ECOV16,2025-02-24,"6,0950%",7.7093,1348.0,CONCESSIONÁRIA ECOVIAS DOS IMIGRANTES S/A (*),0.018358,1300.0


In [None]:
## retirar o break e fazer para todas as datas e popular o banco de dados