# Studying the Nubank ETF methodology LVOL11


## Import Libraries

In [2]:
import pandas as pd
import numpy as np

import yfinance as yf
import os

## Uploading data

In [3]:
script_directory = os.getcwd() #getting the script directory path
directory_path = os.path.join(script_directory,'IBOVDia_22-07-24.csv') # Ibovespa index composition downloaded from B3 website

#### Reading Ibovespa composition

In [4]:

df_ibov = pd.read_csv(os.path.join(script_directory,'IBOVDia_22-07-24.csv'), 
                      encoding = 'cp1252',
                      sep = ';',
                      skiprows=1,
                      skipfooter=2,
                      index_col = False,
                      thousands='.',
                      decimal=',',
                      dtype={'Qtde. Teórica': float,  'Part. (%)':float})

df_ibov['tickers'] = df_ibov['Código']+'.SA'

df_ibov.head()

  df_ibov = pd.read_csv(os.path.join(script_directory,'IBOVDia_22-07-24.csv'),


Unnamed: 0,Código,Ação,Tipo,Qtde. Teórica,Part. (%),tickers
0,RRRP3,3R PETROLEUM,ON NM,238441700.0,0.309,RRRP3.SA
1,ALOS3,ALLOS,ON NM,532616600.0,0.551,ALOS3.SA
2,ALPA4,ALPARGATAS,PN N1,166362000.0,0.07,ALPA4.SA
3,ABEV3,AMBEV S/A,ON ATZ,4394246000.0,2.458,ABEV3.SA
4,ARZZ3,AREZZO CO,ON ED NM,62305890.0,0.155,ARZZ3.SA


#### Reading ibovespa tickers historical prices

In [5]:
# setting window period
start_date = '2023-07-15'
end_date = '2024-07-15'
data = yf.download(df_ibov['tickers'].to_list(), start = start_date, end = end_date)['Adj Close']
data['time'] = data.index

data1 = pd.melt(data, id_vars = ['time'], var_name = 'asset', value_name = 'price') # change dataframe wide to long

data1['returns'] = data1.groupby('asset')['price'].pct_change() #calculating daily returns 
data1.head()

[*********************100%%**********************]  86 of 86 completed


Unnamed: 0,time,asset,price,returns
0,2023-07-17,ABEV3.SA,14.083839,
1,2023-07-18,ABEV3.SA,13.941291,-0.010121
2,2023-07-19,ABEV3.SA,13.903277,-0.002727
3,2023-07-20,ABEV3.SA,14.178872,0.019822
4,2023-07-21,ABEV3.SA,14.539996,0.025469


## Calculating the EWMA volatility over the returns

In [13]:
data3 = data1.groupby('asset')['returns'].ewm(span = 252 , adjust=False).std().to_frame().reset_index(level = 0).rename(columns = {'returns':'std_ewma'}) 

data4 = data3.groupby('asset')['std_ewma'].sum().to_frame().reset_index(level = 0).sort_values(by = ['std_ewma']) 

data4['std_ewma_anual'] = data4['std_ewma']* np.sqrt(252) #annualized

percentiles = [0, 0.33, 0.66, 1]

# Use pd.qcut to split the data into 3 parts and assign the part labels
data4['percentile_group'] = pd.qcut(data4['std_ewma_anual'], q=percentiles, labels=["Low", "Medium", "High"])
data4.reset_index(drop = True, inplace = True)

data4['asset'] = data4['asset'].str.replace('.SA','',regex = False) # removint '.SA'

print('Top')
print(data4.head(10))
print('Tail')
print(data4.tail(10))

Top
    asset  std_ewma  std_ewma_anual percentile_group
0  TAEE11  1.652667       26.235267              Low
1   CPFE3  2.248234       35.689607              Low
2   EGIE3  2.253965       35.780591              Low
3   ALOS3  2.334193       37.054169              Low
4   ITSA4  2.381760       37.809268              Low
5   BBAS3  2.582916       41.002524              Low
6   EQTL3  2.605553       41.361873              Low
7  KLBN11  2.726458       43.281181              Low
8   BBSE3  2.749195       43.642122              Low
9  SANB11  2.825068       44.846567              Low
Tail
    asset   std_ewma  std_ewma_anual percentile_group
76  IRBR3   6.942225      110.204407             High
77  MRVE3   6.988044      110.931763             High
78  AZUL4   7.391771      117.340732             High
79  PETZ3   7.492250      118.935783             High
80  COGN3   7.614309      120.873405             High
81  ALPA4   7.922608      125.767507             High
82  CVCB3   8.607674      136.

## Comparison with the LVOL11 composition

In [18]:
df_lvol11 = pd.read_excel(os.path.join(script_directory,'Cesta LVOL11 20240722.xlsx'),
                         skiprows=10)  # reading LVOL11 official tickers composition from the B3
df_lvol11.rename(columns = {'Ativo':'asset','Quantidade':'quantity','Valor de Abertura (R$)':'open_value_reais','Participação no Total da Cesta (%)':'percentage_composition'}, inplace = True)
df_lvol11.head()

Unnamed: 0,asset,quantity,open_value_reais,percentage_composition
0,ABEV3,15758,184842.27,3.736
1,BBAS3,6543,177371.4,3.585
2,BBSE3,6770,230854.93,4.666
3,BRAP4,7765,146993.14,2.971
4,CCRO3,13021,161588.56,3.266


#### Merge the official LVOL11 portifolio composition with the calculated from ibovespa tickers

In [19]:
df_lvol11 = pd.merge(df_lvol11,data4[['asset','percentile_group']], on = 'asset', how = 'left')
df_lvol11

Unnamed: 0,asset,quantity,open_value_reais,percentage_composition,percentile_group
0,ABEV3,15758,184842.27,3.736,Low
1,BBAS3,6543,177371.4,3.585,Low
2,BBSE3,6770,230854.93,4.666,Low
3,BRAP4,7765,146993.14,2.971,Low
4,CCRO3,13021,161588.56,3.266,Low
5,CPFE3,5820,191373.1,3.868,Low
6,CPLE6,17412,175342.88,3.544,Medium
7,EGIE3,4801,218931.22,4.425,Low
8,ENGI11,3301,152831.31,3.089,Low
9,EQTL3,5808,188701.4,3.814,Low
