In [3]:
!pip install dune_client

Collecting dune_client
  Downloading dune_client-1.7.0-py3-none-any.whl (36 kB)
Collecting types-setuptools>=68.2.0.0
  Downloading types_setuptools-69.2.0.20240317-py3-none-any.whl (58 kB)
     -------------------------------------- 58.7/58.7 kB 768.7 kB/s eta 0:00:00
Collecting ndjson>=0.3.1
  Downloading ndjson-0.3.1-py2.py3-none-any.whl (5.3 kB)
Collecting types-python-dateutil>=2.8.19
  Downloading types_python_dateutil-2.9.0.20240316-py3-none-any.whl (9.7 kB)
Collecting Deprecated>=1.2.0
  Downloading Deprecated-1.2.14-py2.py3-none-any.whl (9.6 kB)
Collecting types-PyYAML>=6.0.11
  Downloading types_PyYAML-6.0.12.20240311-py3-none-any.whl (15 kB)
Collecting types-Deprecated>=1.2.9.3
  Downloading types_Deprecated-1.2.9.20240311-py3-none-any.whl (3.5 kB)
Collecting types-requests>=2.28.0
  Downloading types_requests-2.31.0.20240402-py3-none-any.whl (15 kB)
  Downloading types_requests-2.31.0.20240311-py3-none-any.whl (14 kB)
  Downloading types_requests-2.31.0.20240310-py3-none-an

In [6]:
!pip install dataclasses_json


Collecting dataclasses_json
  Downloading dataclasses_json-0.6.4-py3-none-any.whl (28 kB)
Collecting marshmallow<4.0.0,>=3.18.0
  Downloading marshmallow-3.21.1-py3-none-any.whl (49 kB)
     -------------------------------------- 49.4/49.4 kB 832.4 kB/s eta 0:00:00
Collecting typing-inspect<1,>=0.4.0
  Downloading typing_inspect-0.9.0-py3-none-any.whl (8.8 kB)
Installing collected packages: typing-inspect, marshmallow, dataclasses_json
Successfully installed dataclasses_json-0.6.4 marshmallow-3.21.1 typing-inspect-0.9.0


In [3]:
import streamlit as st
import pandas as pd
import requests
import numpy as np
import yfinance as yf
from dune_client.client import DuneClient

In [4]:
@st.cache_data()
def fetch_data_from_api(api_url, params=None):
    response = requests.get(api_url, params=params)
    if response.status_code == 200:
        data = response.json()
        if 'rows' in data['result']:
            return pd.DataFrame(data['result']['rows'])
        return data
    else:
        print(f"Failed to retrieve data: {response.status_code}")
        return pd.DataFrame()  # or an empty dict



In [5]:
def fetch_historical_data(api_url, api_key):
    # Use the API key either as a query parameter or in the headers
    params = {'vs_currency': 'usd', 'days': 'max', 'interval': 'daily', 'x_cg_demo_api_key': api_key}
    headers = {'x-cg-demo-api-key': api_key}  # Alternatively, use this header

    response = requests.get(api_url, params=params, headers=headers)

    if response.status_code == 200:
        # Parse the JSON response
        historical_pricedata = response.json()
        # Extract the 'prices' and 'market_caps' data
        historical_price = historical_pricedata['prices']
        market_cap = pd.DataFrame(historical_pricedata['market_caps'], columns=['date', 'marketcap'])

        # Convert the 'timestamp' column from UNIX timestamps in milliseconds to datetime objects
        history = pd.DataFrame(historical_price, columns=['timestamp', 'price'])
        history['date'] = pd.to_datetime(history['timestamp'], unit='ms')
        history.set_index('date', inplace=True)
        history.drop(columns='timestamp', inplace=True)

        vol = pd.DataFrame(historical_pricedata['total_volumes'], columns=['date', 'volume'])
        vol['date'] = pd.to_datetime(vol['date'], unit='ms')
        vol.set_index('date', inplace=True)
        
        return history, market_cap, vol
    else:
        print(f"Failed to retrieve data: {response.status_code}")
        return pd.DataFrame(), pd.DataFrame(), pd.DataFrame()

In [6]:
api_key_dune = st.secrets["api_key"]
api_key_cg = st.secrets["api_key_cg"]
api_key_FRED = st.secrets["FRED_API_KEY"]

In [7]:
dune = DuneClient(api_key_dune)

In [8]:
def fetch_dune_data(num):
    result = dune.get_latest_result(num)
    return pd.DataFrame(result.result.rows)

In [9]:
pd.options.display.float_format = '{:,.2f}'.format

### First, lets get MakerDAO Financial Statements from https://dune.com/steakhouse/makerdao

Balance Sheet

In [10]:
# Balance Sheet
#bs_raw = dune.get_latest_result(2840463)

In [11]:
#bs_df = pd.DataFrame(bs_raw.result.rows)
#bs_df['period'] = pd.to_datetime(bs_df['period'])
#bs_df.set_index('period', inplace=True)
#bs_df.index = bs_df.index.normalize()
#bs_df = bs_df.sort_index()

In [12]:
#bs_df

In [13]:
bs_path = '../data/csv/bs.csv'

In [14]:
#bs_df.to_csv(bs_path)

In [15]:
bs_csv = pd.read_csv(bs_path, index_col='period', parse_dates=True)

In [16]:
#categorizing items as asset, liability, or equity
def categorize_item(item):
    if item in ['Crypto-Loans', 'Real-World Assets', 'Others assets', 'Stablecoins']:
        return 'Assets'
    elif item in ['DAI','DSR']:  # Assuming DAI represents a liability here; adjust according to your accounting rules
        return 'Liabilities'
    elif item == 'Equity':
        return 'Equity'
    else:
        return 'Other'  # For any item not explicitly categorized

# Assuming 'df' is your DataFrame
bs_csv['category'] = bs_csv['item'].apply(categorize_item)



In [17]:
bs_csv = bs_csv.iloc[::-1]

In [441]:
bs_csv.describe()

Unnamed: 0,balance,normalized
count,8549.0,8549.0
mean,0.0,0.0
std,2676252421.26,0.47
min,-9951190219.65,-0.99
25%,-81183521.56,-0.01
50%,46794.81,0.0
75%,1333547688.4,0.3
max,6135551191.72,1.0


In [19]:
pivoted_balance_sheet = bs_csv.pivot(columns='item', values='balance')

In [20]:
# Percent Changes in account balances 
pivoted_balance_sheet['Crypto-Loans_pct_chg'] = pivoted_balance_sheet['Crypto-Loans'].pct_change()
pivoted_balance_sheet['DAI_pct_chg'] = pivoted_balance_sheet['DAI'].pct_change()
pivoted_balance_sheet['DSR_pct_chg'] = pivoted_balance_sheet['DSR'].pct_change()
pivoted_balance_sheet['Equity_pct_chg'] = pivoted_balance_sheet['Equity'].pct_change()
pivoted_balance_sheet['Others_assets_pct_chg'] = pivoted_balance_sheet['Others assets'].pct_change()  # Assuming this is the correct column name
pivoted_balance_sheet['Real-World_Assets_pct_chg'] = pivoted_balance_sheet['Real-World Assets'].pct_change()
pivoted_balance_sheet['Stablecoins_pct_chg'] = pivoted_balance_sheet['Stablecoins'].pct_change()


In [21]:
# Rolling Averages, Standard Deviation
# Define the window size for rolling calculation
window_size = 7  # for example, a 7-day rolling window

# Calculate rolling averages
pivoted_balance_sheet['Crypto-Loans_7d_rolling_avg'] = pivoted_balance_sheet['Crypto-Loans'].rolling(window=window_size).mean()
pivoted_balance_sheet['DAI_7d_rolling_avg'] = pivoted_balance_sheet['DAI'].rolling(window=window_size).mean()
pivoted_balance_sheet['DSR_7d_rolling_avg'] = pivoted_balance_sheet['DSR'].rolling(window=window_size).mean()
pivoted_balance_sheet['Equity_7d_rolling_avg'] = pivoted_balance_sheet['Equity'].rolling(window=window_size).mean()
pivoted_balance_sheet['Others_assets_7d_rolling_avg'] = pivoted_balance_sheet['Others assets'].rolling(window=window_size).mean()
pivoted_balance_sheet['Real-World_Assets_7d_rolling_avg'] = pivoted_balance_sheet['Real-World Assets'].rolling(window=window_size).mean()
pivoted_balance_sheet['Stablecoins_7d_rolling_avg'] = pivoted_balance_sheet['Stablecoins'].rolling(window=window_size).mean()

# Calculate volatility (standard deviation)
pivoted_balance_sheet['Crypto-Loans_7d_rolling_volatility'] = pivoted_balance_sheet['Crypto-Loans_pct_chg'].rolling(window=window_size).std()
pivoted_balance_sheet['DAI_7d_rolling_volatility'] = pivoted_balance_sheet['DAI_pct_chg'].rolling(window=window_size).std()
pivoted_balance_sheet['DSR_7d_rolling_volatility'] = pivoted_balance_sheet['DSR_pct_chg'].rolling(window=window_size).std()
pivoted_balance_sheet['Equity_7d_rolling_volatility'] = pivoted_balance_sheet['Equity_pct_chg'].rolling(window=window_size).std()
pivoted_balance_sheet['Others_assets_7d_rolling_volatility'] = pivoted_balance_sheet['Others_assets_pct_chg'].rolling(window=window_size).std()
pivoted_balance_sheet['Real-World_Assets_7d_rolling_volatility'] = pivoted_balance_sheet['Real-World_Assets_pct_chg'].rolling(window=window_size).std()
pivoted_balance_sheet['Stablecoins_7d_rolling_volatility'] = pivoted_balance_sheet['Stablecoins_pct_chg'].rolling(window=window_size).std()



In [22]:
# Assuming you've already calculated percent changes (_pct_chg)
window_size = 30  # Adjust based on your analysis needs

# Calculate rolling averages and volatilities based on percent changes
for col in ['Crypto-Loans', 'DAI', 'DSR', 'Equity', 'Others_assets', 'Real-World_Assets', 'Stablecoins']:
    pct_chg_col = f'{col}_pct_chg'  # The column names for percent changes you've calculated
    pivoted_balance_sheet[f'{col}_30d_rolling_avg_pct_chg'] = pivoted_balance_sheet[pct_chg_col].rolling(window=window_size).mean()
    pivoted_balance_sheet[f'{col}_30d_volatility_pct_chg'] = pivoted_balance_sheet[pct_chg_col].rolling(window=window_size).std()


In [23]:
pivoted_balance_sheet.columns = [f'b_s_{col}' if col != 'period' else col for col in pivoted_balance_sheet.columns]

In [24]:
pivoted_balance_sheet.tail()

Unnamed: 0_level_0,b_s_Crypto-Loans,b_s_DAI,b_s_DSR,b_s_Equity,b_s_Others assets,b_s_Real-World Assets,b_s_Stablecoins,b_s_Crypto-Loans_pct_chg,b_s_DAI_pct_chg,b_s_DSR_pct_chg,...,b_s_DSR_rolling_avg_pct_chg,b_s_DSR_volatility_pct_chg,b_s_Equity_rolling_avg_pct_chg,b_s_Equity_volatility_pct_chg,b_s_Others_assets_rolling_avg_pct_chg,b_s_Others_assets_volatility_pct_chg,b_s_Real-World_Assets_rolling_avg_pct_chg,b_s_Real-World_Assets_volatility_pct_chg,b_s_Stablecoins_rolling_avg_pct_chg,b_s_Stablecoins_volatility_pct_chg
period,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2024-03-17 00:00:00+00:00,2567732340.42,-3225717987.93,-1405341920.91,-68936169.75,48533.09,1082824203.9,1049391001.19,-0.02,-0.01,0.03,...,0.01,0.05,0.01,0.04,0.0,0.0,-0.02,0.03,0.03,0.17
2024-03-18 00:00:00+00:00,2487739660.28,-3236587097.37,-1440743920.84,-67073216.58,48533.09,1084684721.77,1171931319.66,-0.03,0.0,0.03,...,0.01,0.05,0.01,0.04,0.0,0.0,-0.02,0.03,0.03,0.17
2024-03-19 00:00:00+00:00,2442828415.48,-3237504446.8,-1399485314.96,-66776806.16,48533.09,1084527447.54,1176362171.82,-0.02,0.0,-0.03,...,0.01,0.05,0.01,0.04,0.0,0.0,-0.02,0.03,0.03,0.17
2024-03-20 00:00:00+00:00,2358443170.28,-3225358948.44,-1373045029.84,-67160889.57,48533.09,1083706110.61,1223367053.87,-0.03,-0.0,-0.02,...,0.01,0.05,0.01,0.04,0.0,0.0,-0.02,0.03,0.03,0.17
2024-03-21 00:00:00+00:00,2358355394.97,-3223114058.66,-1384700394.26,-67156270.08,48533.09,1083706110.61,1232860684.32,-0.0,-0.0,0.01,...,0.01,0.05,0.01,0.04,0.0,0.0,-0.02,0.03,0.03,0.17


In [25]:
pivoted_balance_sheet.shape[0]

1360

MONTHLY Income Statement/PnL (also includes more detailed balance sheet)

In [26]:
#is_df = fetch_dune_data(2641549) 

In [27]:
#is_df.head()

In [28]:
#is_df_wide = is_df.pivot_table(index='period', columns='item', values='value', aggfunc='sum').reset_index()
#is_df_wide = is_df_wide.iloc[::-1]

In [29]:
#is_df_wide.head()

In [30]:
is_path = '../data/csv/is.csv'

In [31]:
#is_df.to_csv(is_path)

In [32]:
is_csv = pd.read_csv(is_path, index_col='period', parse_dates=True)

In [386]:
cleaned_is = is_csv.drop(columns=['Unnamed: 0','year'])

In [442]:
cleaned_is.describe()

Unnamed: 0,expenses,lending_income,liquidation_income,month,net_income,trading_income,value
count,106.0,106.0,106.0,954.0,106.0,106.0,689.0
mean,-2138542.9,4914952.32,851312.42,6.43,3835126.96,207405.12,1346134004.51
std,2317856.06,5635119.85,3083911.34,3.6,6165521.06,387439.0,2368584436.46
min,-13632119.6,2828.77,-5787399.49,1.0,-5601718.87,0.0,-13632119.6
25%,-2990860.0,993839.65,4623.88,3.0,36867.28,0.0,0.0
50%,-1820097.71,2965648.32,42700.73,6.0,2136663.16,9359.24,3044390.27
75%,0.0,7786060.64,205622.81,10.0,6547116.47,229736.22,2038968283.83
max,0.0,25123953.08,18025406.62,12.0,23542837.91,1556244.79,9429355721.67


In [34]:
pivoted_income_statement = is_csv.pivot_table(index='period', 
                            columns='item', 
                            values='value', 
                            aggfunc='sum').reset_index()

In [35]:
pivoted_income_statement['Total Revenues']= pivoted_income_statement[['1 - PnL', '1.1 - Lending Revenues', '1.2 - Liquidations Revenues', '1.3 - Trading Revenues']].sum(axis=1)
pivoted_income_statement['Total Expenses'] = pivoted_income_statement[['1.4 - Lending Expenses', '1.5 - Liquidations Expenses', '1.6 - Workforce Expenses']].sum(axis=1)
pivoted_income_statement['profit_margin'] = pivoted_income_statement['1.9 - Net Income'] / pivoted_income_statement['2.9 - Total Assets']
pivoted_income_statement['ROA'] = pivoted_income_statement['1.9 - Net Income'] / pivoted_income_statement['2.9 - Total Assets']
pivoted_income_statement['ROE'] = pivoted_income_statement['1.9 - Net Income'] / pivoted_income_statement['3.7 - Equity (Surplus Buffer)']
pivoted_income_statement['debt_to_equity'] = pivoted_income_statement['3.1 - Liabilities (DAI)'] / pivoted_income_statement['3.7 - Equity (Surplus Buffer)']
pivoted_income_statement['debt_ratio'] = pivoted_income_statement['3.1 - Liabilities (DAI)'] / pivoted_income_statement['2.9 - Total Assets'] 
pivoted_income_statement['cumulative_revenues'] = pivoted_income_statement['Total Revenues'].cumsum()
pivoted_income_statement['cumulative_expenses'] = pivoted_income_statement['Total Expenses'].cumsum()
pivoted_income_statement['cumulative_net_income'] = pivoted_income_statement['1.9 - Net Income'].cumsum()


In [36]:
pivoted_income_statement.tail()

item,period,1 - PnL,1.1 - Lending Revenues,1.2 - Liquidations Revenues,1.3 - Trading Revenues,1.4 - Lending Expenses,1.5 - Liquidations Expenses,1.6 - Workforce Expenses,1.9 - Net Income,2 - Assets,...,Total Revenues,Total Expenses,profit_margin,ROA,ROE,debt_to_equity,debt_ratio,cumulative_revenues,cumulative_expenses,cumulative_net_income
48,2023-11-01,0.0,29666040.79,455.03,0.0,-6376789.6,0.0,-2974302.25,20315403.96,0.0,...,29666495.81,-9351091.85,0.0,0.0,0.35,90.89,0.99,291977550.62,-133751913.34,158225637.28
49,2023-12-01,0.0,14270261.48,0.0,0.0,-6484200.84,0.0,-2658930.56,5127130.08,0.0,...,14270261.48,-9143131.4,0.0,0.0,0.1,98.65,0.99,306247812.1,-142895044.74,163352767.36
50,2024-01-01,0.0,20751255.62,27376.3,0.0,-5943924.23,0.0,-5669555.33,9165152.35,0.0,...,20778631.92,-11613479.57,0.0,0.0,0.17,91.96,0.99,327026444.02,-154508524.31,172517919.71
51,2024-02-01,0.0,29691662.12,0.0,0.0,-4567709.04,0.0,-1581115.17,23542837.91,0.0,...,29691662.12,-6148824.21,0.0,0.0,0.35,74.02,0.99,356718106.13,-160657348.51,196060757.62
52,2024-03-01,0.0,18130771.84,4623.88,0.0,-7076544.94,0.0,-3857879.46,7200971.32,0.0,...,18135395.72,-10934424.4,0.0,0.0,0.11,68.66,0.99,374853501.85,-171591772.91,203261728.94


In [37]:
# Percent Changes in account balances
window_size = 3  # Three months
# Calculate rolling averages and volatilities based on percent changes
for col in ['Total Revenues', 'Total Expenses', '1.9 - Net Income']:
    pct_chg_col = f'{col}_pct_chg'  # Define the percent change column name
    pivoted_income_statement[pct_chg_col] = pivoted_income_statement[col].pct_change()
    # Use the pct_chg_col variable correctly now
    pivoted_income_statement[f'{col}_3m_rolling_avg_pct_chg'] = pivoted_income_statement[pct_chg_col].rolling(window=window_size).mean()
    pivoted_income_statement[f'{col}_3m_volatility_pct_chg'] = pivoted_income_statement[pct_chg_col].rolling(window=window_size).std()
    for lag in range(1,13):
        pivoted_income_statement[f'{col}_3m_rolling_avg_pct_chg_lag_{lag}'] = pivoted_income_statement[f'{col}_3m_rolling_avg_pct_chg'].shift(lag)
        pivoted_income_statement[f'{col}_3m_volatility_pct_chg_lag_{lag}'] = pivoted_income_statement[f'{col}_3m_volatility_pct_chg'].shift(lag)


In [38]:

# Generate lagged features
for lag in range(1, 13):  # From 1 to 12 months
    pivoted_income_statement[f'Total_Revenues_Lag_{lag}m'] = pivoted_income_statement['Total Revenues'].shift(lag)
    pivoted_income_statement[f'Total_Expenses_Lag_{lag}m'] = pivoted_income_statement['Total Expenses'].shift(lag)
    pivoted_income_statement[f'Net_Income_Lag_{lag}m'] = pivoted_income_statement['1.9 - Net Income'].shift(lag)
    pivoted_income_statement[f'profit_margin_Lag_{lag}m'] = pivoted_income_statement['profit_margin'].shift(lag)
    pivoted_income_statement[f'ROA_Lag_{lag}m'] = pivoted_income_statement['ROA'].shift(lag)
    pivoted_income_statement[f'ROE_Lag_{lag}m'] = pivoted_income_statement['ROE'].shift(lag)
    pivoted_income_statement[f'debt_to_equity_Lag_{lag}m'] = pivoted_income_statement['debt_to_equity'].shift(lag)
    pivoted_income_statement[f'debt_ratio_Lag_{lag}m'] = pivoted_income_statement['debt_ratio'].shift(lag)


  pivoted_income_statement[f'debt_ratio_Lag_{lag}'] = pivoted_income_statement['debt_ratio'].shift(lag)
  pivoted_income_statement[f'Total_Revenues_Lag_{lag}'] = pivoted_income_statement['Total Revenues'].shift(lag)
  pivoted_income_statement[f'Total_Expenses_Lag_{lag}'] = pivoted_income_statement['Total Expenses'].shift(lag)
  pivoted_income_statement[f'Net_Income_Lag_{lag}'] = pivoted_income_statement['1.9 - Net Income'].shift(lag)
  pivoted_income_statement[f'profit_margin_Lag_{lag}'] = pivoted_income_statement['profit_margin'].shift(lag)
  pivoted_income_statement[f'ROA_Lag_{lag}'] = pivoted_income_statement['ROA'].shift(lag)
  pivoted_income_statement[f'ROE_Lag_{lag}'] = pivoted_income_statement['ROE'].shift(lag)
  pivoted_income_statement[f'debt_to_equity_Lag_{lag}'] = pivoted_income_statement['debt_to_equity'].shift(lag)
  pivoted_income_statement[f'debt_ratio_Lag_{lag}'] = pivoted_income_statement['debt_ratio'].shift(lag)
  pivoted_income_statement[f'Total_Revenues_Lag_{lag}'] 

In [39]:
pivoted_income_statement.shape[0]

53

Assets/Revenue Per Type
Coinbase asset type: http://forum.makerdao.com/t/mip81-coinbase-usdc-institutional-rewards/17703/254?u=sebventures


In [40]:
#assets_raw = dune.get_latest_result(58495)

In [41]:
#assets_p_t_ts = pd.DataFrame(assets_raw.result.rows)
#assets_p_t_ts['dt'] = pd.to_datetime(assets_p_t_ts['dt'])
#assets_p_t_ts.set_index('dt', inplace=True)

In [42]:
#assets_p_t_ts.head()

In [43]:
#assets_p_t_ts.to_csv(as_path)

In [44]:
as_path = '../data/csv/as.csv'

In [45]:
as_csv = pd.read_csv(as_path, index_col='dt', parse_dates=True)

In [46]:
as_csv = as_csv.drop(columns=['total_asset'])

Daily Interest Revenues By Vault

In [47]:
#ir_v = fetch_dune_data(3567939) 

In [48]:
#ir_v['period'] = pd.to_datetime(ir_v['period'])
#ir_v.set_index('period', inplace=True)

In [49]:
#ir_v.head()

In [50]:
daily_int_path = '../data/csv/d_int.csv'

In [51]:
#ir_v.to_csv(daily_int_path)

In [52]:
ir_csv = pd.read_csv(daily_int_path, index_col='period', parse_dates=True)

In [53]:
ir_csv.shape[0]

22031

In [54]:

ir_csv = ir_csv.rename_axis('day')

 

In [55]:
ir_csv = ir_csv.rename(columns={'collateral':'ilk'})

In [56]:
ir_csv['ilk'].unique()

array(['ETH-A', 'ETH-B', 'ETH-C', 'WBTC-A', 'WBTC-B', 'WBTC-C',
       'WSTETH-A', 'WSTETH-B', 'RWA002-A', 'RWA013-A', 'DIRECT-SPARK-DAI',
       'RWA014-A', 'RWA005-A', 'RWA012-A', 'RWA015-A', 'RWA007-A',
       'RETH-A', 'RWA003-A', 'GUNIV3DAIUSDC2-A', 'CRVV1ETHSTETH-A',
       'USDC-B', 'LINK-A', 'MATIC-A', 'UNIV2USDCETH-A', 'GNO-A',
       'UNIV2DAIUSDC-A', 'YFI-A', 'RWA004-A', 'GUNIV3DAIUSDC1-A',
       'GUSD-A', 'PAXUSD-A', 'USDC-A', 'DIRECT-AAVEV2-DAI',
       'DIRECT-COMPV2-DAI', 'RWA008-A', 'RENBTC-A', 'MANA-A', 'RWA009-A',
       'RWA001-A', 'UNI-A', 'UNIV2DAIETH-A', 'UNIV2WBTCETH-A',
       'UNIV2WBTCDAI-A', 'RWA-001', 'UNIV2UNIETH-A', 'TUSD-A', 'USDP-A',
       'BAT-A', 'BAL-A', 'ZRX-A', 'COMP-A', 'PSM-GUSD-A', 'AAVE-A',
       'UNIV2LINKETH-A', 'KNC-A', 'LRC-A', 'PSM-USDC-A', 'UNIV2AAVEETH-A',
       'UNIV2DAIUSDT-A', 'UNIV2ETHUSDT-A', 'USDT-A', 'PSM-PAX-A',
       'RWA006-A', 'PAX-A', nan, 'USDC', 'SAI'], dtype=object)

In [57]:
top_vaults = ir_csv.groupby('ilk').sum().sort_values('daily_revenues', ascending=False)

In [58]:
#10 Most Revenue Generating Vaults
top_10_vaults = top_vaults.head(10)

In [59]:
top_10_vaults

Unnamed: 0_level_0,daily_revenues
ilk,Unnamed: 1_level_1
ETH-A,110769338.09
WBTC-A,41088135.6
WSTETH-A,22414721.27
ETH-C,17297706.33
ETH-B,15374386.83
WSTETH-B,11554717.49
USDC-A,7351192.96
RWA013-A,3241981.74
WBTC-C,2697915.32
RWA012-A,2120499.16


Dai Maturity Profile

Step-by-Step Process:
Tracking DAI Movements:

First, all transactions involving DAI are tracked to understand how DAI moves in and out of wallets. This includes both inflows (adding DAI to a wallet) and outflows (removing DAI from a wallet).
Defining Maturity Buckets:

Maturity buckets are predefined categories based on time durations, such as "1-day", "1-week", "1-month", "1-year", etc. Each bucket represents a hypothesis about how long DAI tends to stay put before being moved again.
Assigning Weights to Buckets:

Weights are assigned to each maturity bucket to reflect assumptions or historical observations about the distribution of DAI across these buckets. For example, if historically 30% of DAI is moved or used within a day, then the "1-day" bucket might get a weight of 0.30 (or 30%).
Applying Weights Based on Wallet Types:

DAI can be held in different types of wallets or contracts, each with its own expected behavior. For example, DAI in a savings contract (like DSR) might be considered more long-term ("1-year"), while DAI in a regular wallet might be more liquid ("1-day" or "1-week"). The weights applied to the DAI in these wallets reflect these expectations.
Calculating DAI Amounts per Bucket:

For each wallet (or DAI holding), the total amount of DAI is distributed across the maturity buckets based on the assigned weights. This means if a wallet has 100 DAI and the "1-day" bucket weight is 30%, then 30 DAI is considered to have a 1-day maturity.
The process is repeated for each wallet and each maturity bucket, based on the specific weights for that wallet type and the total DAI it holds.
Aggregating Across the Ecosystem:

Finally, to get the ecosystem-wide view, the amounts of DAI in each maturity bucket from all wallets are aggregated. This provides a snapshot of how much DAI is considered to be in each maturity bucket across the entire MakerDAO system at any given time.

In [60]:
#d_m = fetch_dune_data(907852)

In [61]:
#d_m['dt'] = pd.to_datetime(d_m['dt'])

In [62]:
#d_m

In [63]:

#d_m.to_csv(d_m_path)


In [396]:
d_m_path = '../data/csv/d_m.csv'

In [397]:
d_m_csv = pd.read_csv(d_m_path, index_col='dt', parse_dates=True)

In [400]:
print(d_m_csv.describe())

       Unnamed: 0          outflow  outflow_dai_only  outflow_surplus_buffer  \
count    9,528.00         9,528.00          9,528.00                9,456.00   
mean     4,763.50   730,298,227.76    723,044,786.34            7,308,670.67   
std      2,750.64 1,172,359,348.27  1,153,238,025.26           20,455,276.75   
min          0.00             0.00              0.00           -5,674,219.65   
25%      2,381.75   107,656,317.16    107,636,256.59                    0.00   
50%      4,763.50   323,980,629.04    323,980,629.04                    0.00   
75%      7,145.25   628,916,174.70    628,916,174.70                    0.00   
max      9,527.00 6,014,959,678.79  5,952,992,110.30           83,550,327.25   

           total_period  
count          9,528.00  
mean   4,381,789,366.59  
std    2,882,217,947.64  
min                0.00  
25%    1,093,258,152.24  
50%    5,189,669,321.25  
75%    6,203,283,988.26  
max   10,118,832,636.48  


In [391]:
clean_dm = d_m_csv.drop(columns=['Unnamed: 0'])

In [449]:
clean_dm.describe()

Unnamed: 0,outflow,outflow_dai_only,outflow_surplus_buffer,total_period
count,9528.0,9528.0,9456.0,9528.0
mean,730298227.76,723044786.34,7308670.67,4381789366.59
std,1172359348.27,1153238025.26,20455276.75,2882217947.64
min,0.0,0.0,-5674219.65,0.0
25%,107656317.16,107636256.59,0.0,1093258152.24
50%,323980629.04,323980629.04,0.0,5189669321.25
75%,628916174.7,628916174.7,0.0,6203283988.26
max,6014959678.79,5952992110.3,83550327.25,10118832636.48


In [67]:
dai_maturity_df = d_m_csv.pivot_table(
    index='dt',  # or 'period' if your DataFrame's time column is named 'period'
    columns='maturity',
    values=['outflow', 'outflow_dai_only', 'outflow_surplus_buffer'],
    aggfunc='sum'  # or any other aggregation function that fits your needs
)

# Flatten the MultiIndex columns (optional, for cleaner column names)
dai_maturity_df.columns = ['_'.join(col).strip() for col in dai_maturity_df.columns.values]

# Reset the index if you want 'dt' back as a regular column
dai_maturity_df.reset_index(inplace=True)



In [392]:
dai_maturity_df.tail()

Unnamed: 0_level_0,outflow_1-block,outflow_1-day,outflow_1-month,outflow_1-week,outflow_1-year,outflow_3-months,outflow_dai_only_1-block,outflow_dai_only_1-day,outflow_dai_only_1-month,outflow_dai_only_1-week,outflow_dai_only_1-year,outflow_dai_only_3-months,outflow_surplus_buffer_1-block,outflow_surplus_buffer_1-day,outflow_surplus_buffer_1-month,outflow_surplus_buffer_1-week,outflow_surplus_buffer_1-year,outflow_surplus_buffer_3-months
dt,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1
2024-03-16 00:00:00+00:00,453757132.17,453757132.17,136277964.3,591913771.82,2850066310.4,201065848.96,453757132.17,453757132.17,136277964.3,591913771.82,2781519501.85,201065848.96,0.0,0.0,0.0,0.0,68546808.55,0.0
2024-03-17 00:00:00+00:00,455307007.7,455307007.7,137425908.66,596032698.73,2849349624.78,202759537.36,455307007.7,455307007.7,137425908.66,596032698.73,2780461988.11,202759537.36,0.0,0.0,0.0,0.0,68887636.67,0.0
2024-03-18 00:00:00+00:00,461095932.81,461095932.81,140439203.63,607501207.95,2863252281.53,207205382.4,461095932.81,461095932.81,140439203.63,607501207.95,2796227598.03,207205382.4,0.0,0.0,0.0,0.0,67024683.5,0.0
2024-03-19 00:00:00+00:00,455524838.32,455524838.32,137117144.96,595167015.97,2854314452.33,202303984.36,455524838.32,455524838.32,137117144.96,595167015.97,2787586179.26,202303984.36,0.0,0.0,0.0,0.0,66728273.07,0.0
2024-03-20 00:00:00+00:00,451479837.89,451479837.89,135641898.57,589090233.01,2833931375.49,200127391.33,451479837.89,451479837.89,135641898.57,589090233.01,2766819019.01,200127391.33,0.0,0.0,0.0,0.0,67112356.48,0.0


In [68]:
dai_maturity_df['dt'] = pd.to_datetime(dai_maturity_df['dt'])
dai_maturity_df.set_index('dt', inplace=True)


start_date = dai_maturity_df.index.min()
end_date = dai_maturity_df.index.max()
date_range = pd.date_range(start=start_date, end=end_date, freq='D')  # 'D' for daily frequency




In [69]:
dai_maturity_df_reindexed = dai_maturity_df.reindex(date_range)

# Forward-fill missing values
dai_maturity_df_reindexed.ffill(inplace=True)

# Optionally, reset the index if you want 'dt' back as a column
dai_maturity_df_reindexed.reset_index(inplace=True)
dai_maturity_df_reindexed.rename(columns={'index': 'day'}, inplace=True)

In [70]:
dai_maturity_df_reindexed.columns = [f'dai_maturity_{col}' if col != 'day' and not col.startswith('dai_maturity_') else col for col in dai_maturity_df_reindexed.columns]

In [71]:
dai_maturity_df_reindexed.tail()

Unnamed: 0,day,dai_maturity_outflow_1-block,dai_maturity_outflow_1-day,dai_maturity_outflow_1-month,dai_maturity_outflow_1-week,dai_maturity_outflow_1-year,dai_maturity_outflow_3-months,dai_maturity_outflow_dai_only_1-block,dai_maturity_outflow_dai_only_1-day,dai_maturity_outflow_dai_only_1-month,dai_maturity_outflow_dai_only_1-week,dai_maturity_outflow_dai_only_1-year,dai_maturity_outflow_dai_only_3-months,dai_maturity_outflow_surplus_buffer_1-block,dai_maturity_outflow_surplus_buffer_1-day,dai_maturity_outflow_surplus_buffer_1-month,dai_maturity_outflow_surplus_buffer_1-week,dai_maturity_outflow_surplus_buffer_1-year,dai_maturity_outflow_surplus_buffer_3-months
1585,2024-03-16 00:00:00+00:00,453757132.17,453757132.17,136277964.3,591913771.82,2850066310.4,201065848.96,453757132.17,453757132.17,136277964.3,591913771.82,2781519501.85,201065848.96,0.0,0.0,0.0,0.0,68546808.55,0.0
1586,2024-03-17 00:00:00+00:00,455307007.7,455307007.7,137425908.66,596032698.73,2849349624.78,202759537.36,455307007.7,455307007.7,137425908.66,596032698.73,2780461988.11,202759537.36,0.0,0.0,0.0,0.0,68887636.67,0.0
1587,2024-03-18 00:00:00+00:00,461095932.81,461095932.81,140439203.63,607501207.95,2863252281.53,207205382.4,461095932.81,461095932.81,140439203.63,607501207.95,2796227598.03,207205382.4,0.0,0.0,0.0,0.0,67024683.5,0.0
1588,2024-03-19 00:00:00+00:00,455524838.32,455524838.32,137117144.96,595167015.97,2854314452.33,202303984.36,455524838.32,455524838.32,137117144.96,595167015.97,2787586179.26,202303984.36,0.0,0.0,0.0,0.0,66728273.07,0.0
1589,2024-03-20 00:00:00+00:00,451479837.89,451479837.89,135641898.57,589090233.01,2833931375.49,200127391.33,451479837.89,451479837.89,135641898.57,589090233.01,2766819019.01,200127391.33,0.0,0.0,0.0,0.0,67112356.48,0.0


MakerDAO Stablecoin Ratio
This can give insights into the proportion of assets held in stablecoins (including DAI) relative to other assets. A higher stablecoin ratio might suggest a preference for stability within the MakerDAO system, which can have implications for DAI's stability.

In [72]:
#stablecoin_ratio_df = fetch_dune_data(58136)

In [73]:
#stablecoin_ratio_df['dt'] = pd.to_datetime(stablecoin_ratio_df['dt'])
#stablecoin_ratio_df.set_index('dt', inplace=True)

In [74]:
#stablecoin_ratio_df.to_csv(stablecoin_ratio_path)

In [75]:
stablecoin_ratio_path = '../data/csv/srp.csv'

In [76]:
stablecoin_ratio_csv = pd.read_csv(stablecoin_ratio_path, index_col='dt', parse_dates=True)

In [77]:
stablecoin_ratio_csv.head()

Unnamed: 0_level_0,Unnamed: 0,Unnamed: 2_level_0,assets_size,assets_stablecoins,assets_usdc,stablecoins_ratio,usdc_ratio
dt,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2024-03-21 00:00:00+00:00,0,1.0,4692077547.49,510492745.88,509792133.98,0.11,0.11
2024-03-20 00:00:00+00:00,1,1.0,4665516334.77,451367053.87,450666441.97,0.1,0.1
2024-03-19 00:00:00+00:00,2,1.0,4703718034.84,404362171.82,403655435.55,0.09,0.09
2024-03-18 00:00:00+00:00,3,1.0,4744355701.71,399931319.66,399224583.39,0.08,0.08
2024-03-17 00:00:00+00:00,4,1.0,4699947545.51,632391001.19,631684264.92,0.13,0.13


In [78]:
stable_coin_ratios = stablecoin_ratio_csv[['stablecoins_ratio','usdc_ratio']]

In [79]:
stable_coin_ratios = stable_coin_ratios.rename_axis('day')

In [80]:
stable_coin_ratios.index

DatetimeIndex(['2024-03-21 00:00:00+00:00', '2024-03-20 00:00:00+00:00',
               '2024-03-19 00:00:00+00:00', '2024-03-18 00:00:00+00:00',
               '2024-03-17 00:00:00+00:00', '2024-03-16 00:00:00+00:00',
               '2024-03-15 00:00:00+00:00', '2024-03-14 00:00:00+00:00',
               '2024-03-13 00:00:00+00:00', '2024-03-12 00:00:00+00:00',
               ...
               '2019-11-27 00:00:00+00:00', '2019-11-26 00:00:00+00:00',
               '2019-11-25 00:00:00+00:00', '2019-11-24 00:00:00+00:00',
               '2019-11-23 00:00:00+00:00', '2019-11-22 00:00:00+00:00',
               '2019-11-21 00:00:00+00:00', '2019-11-20 00:00:00+00:00',
               '2019-11-19 00:00:00+00:00', '2019-11-18 00:00:00+00:00'],
              dtype='datetime64[ns, UTC]', name='day', length=1586, freq=None)

Maker Peg Stability Module Stats
Data on the Peg Stability Module, which helps maintain DAI's peg to the USD, can be vital. Insights into the inflows, outflows, and balances within the PSM can directly indicate efforts to stabilize DAI.

In [81]:
#psm_stats_df = fetch_dune_data(17216)

In [82]:
#psm_stats_df['date'] = pd.to_datetime(psm_stats_df['date'])

In [83]:
psm_stats_path = '../data/csv/psm.csv'

In [84]:
#psm_stats_df.to_csv(psm_stats_path)

In [85]:
psm_stats_csv = pd.read_csv(psm_stats_path, index_col='date', parse_dates=True)

In [86]:
psm_stats_csv.columns = [f'psm_{col}' if col != 'dt' and not col.startswith('psm_') else col for col in psm_stats_csv.columns]


In [87]:
psm_stats_csv = psm_stats_csv.drop(columns=['psm_Unnamed: 0'])

In [88]:
psm_stats_csv = psm_stats_csv.rename_axis('day')

In [403]:
psm_stats_csv.columns

Index(['psm_change', 'psm_change_excl_rwa', 'psm_change_excl_rwa_30d_avg',
       'psm_change_excl_rwa_7d_avg', 'psm_fees', 'psm_inflow',
       'psm_inflow_exl_rwa', 'psm_lifetime_fees', 'psm_lifetime_turnover',
       'psm_outflow', 'psm_balance', 'psm_turnover'],
      dtype='object')

In [443]:
psm_stats_csv[['psm_balance','psm_fees','psm_inflow','psm_outflow','psm_turnover']].describe()

Unnamed: 0,psm_balance,psm_fees,psm_inflow,psm_outflow,psm_turnover
count,1179.0,1102.0,1102.0,1102.0,1102.0
mean,2495919311.99,4037.44,45752972.38,45289730.32,91042702.7
std,1628328770.12,17575.39,71460076.01,62717895.8,116889284.8
min,1.0,0.0,0.0,0.0,1.0
25%,673188300.21,0.0,12117870.98,9329900.97,28952809.74
50%,3041213026.5,0.0,26787127.56,29106242.25,59685795.04
75%,3652875373.77,0.32,54102189.62,60721834.75,114257782.07
max,5965535187.98,329095.83,1399943587.76,828699035.76,2005261976.79


Where is dai lets us know how dai is being used; dai in lending could be considered to add to stability 

In [90]:
#where_is_dai_df = fetch_dune_data(54599)

In [91]:
#where_is_dai_df['dt'] = pd.to_datetime(where_is_dai_df['dt'])

In [92]:
wid_path = '../data/csv/wid.csv'

In [93]:
#where_is_dai_df.to_csv(wid_path)

In [94]:
where_is_dai_csv = pd.read_csv(wid_path, index_col='dt', parse_dates=True)

In [95]:
where_is_dai_csv = where_is_dai_csv.rename_axis('day')

In [406]:
print(where_is_dai_csv.describe())

       where_is_dai_balance  where_is_dai_total_balance
count             11,112.00                   11,112.00
mean         623,630,370.48            4,989,042,963.87
std          885,053,602.91            2,513,963,972.77
min                    0.00              113,631,008.03
25%           37,929,286.64            4,426,586,805.76
50%          252,756,050.89            5,195,896,461.42
75%          686,266,921.16            6,378,627,017.74
max        4,078,699,922.32           10,080,785,059.41


In [96]:
where_is_dai_csv.columns = [f'where_is_dai_{col}' if col != 'day' and not col.startswith('where_is_dai_') else col for col in where_is_dai_csv.columns]

In [97]:
where_is_dai_csv = where_is_dai_csv.drop(columns=['where_is_dai_Unnamed: 0'])

In [98]:
where_is_dai_csv.shape[0]

11112

In [99]:
where_is_dai_csv_table = where_is_dai_csv.pivot_table(values='where_is_dai_balance', index='day', columns='where_is_dai_wallet', aggfunc='sum')

In [100]:
where_is_dai_csv_table.describe()

where_is_dai_wallet,Bridge,CeFi,Dai Savings,Dex,EOA,Lending,Other,Treasury
count,1389.0,1389.0,1389.0,1389.0,1389.0,1389.0,1389.0,1389.0
mean,519571584.34,97793318.99,311612990.83,759685160.26,2313033791.12,583815304.12,384965889.61,18564924.6
std,479488038.35,78415911.28,518564427.53,660249624.37,1133804050.66,565104617.62,199111750.5,22963663.97
min,91.54,6032365.7,1167096.23,5969545.93,70539903.52,2382557.63,7624088.2,0.0
25%,254354292.48,22517650.67,12215114.08,242906003.63,1797171880.58,86534442.33,256075295.2,0.0
50%,452459560.65,83466578.34,90963507.53,563571678.17,2869541236.0,379804255.54,384559660.5,19322535.41
75%,611193769.11,161210994.77,253531898.08,1035302796.12,3126200483.96,1051192009.32,539753467.21,27500346.82
max,2131833654.66,311873211.38,1750756886.52,2603174883.49,4078699922.32,1854438972.6,829237365.55,155646703.53


In [101]:
where_is_dai_csv_table.columns = [f'where_is_dai_{col}' if col != 'day' and not col.startswith('where_is_dai_') else col for col in where_is_dai_csv_table.columns]

In [102]:
where_is_dai_csv_table.shape[0]

1389

Daily surplus buffer
Provides information on the surplus buffer in MakerDAO, which is a key financial metric. The surplus buffer acts as a reserve to cover potential system shortfalls and ensures the stability and solvency of the system. This data could be valuable for understanding the financial health and risk management strategies of MakerDAO over time

In [103]:
#daily_surplus_buffer = fetch_dune_data(3567837)

In [104]:
#daily_surplus_buffer['period'] = pd.to_datetime(daily_surplus_buffer['period'])

In [105]:
dsb_path = '../data/csv/dsb.csv'
#daily_surplus_buffer.to_csv(dsb_path)
daily_surplus_buffer_csv = pd.read_csv(dsb_path, index_col='period', parse_dates=True)

In [106]:
daily_surplus_buffer_csv = daily_surplus_buffer_csv.drop(columns=['Unnamed: 0'])

In [444]:
daily_surplus_buffer_csv.describe()

Unnamed: 0,daily_surplus_buffer_delta_30d,daily_surplus_buffer_delta_90d,daily_surplus_buffer
count,1609.0,1609.0,1609.0
mean,11130093.66,10451707.83,42392130.19
std,52938741.6,38867971.04,30073000.8
min,-210344528.52,-93067840.04,-5674219.91
25%,-10263499.22,-9109403.83,4001728.36
50%,2130021.51,2024616.7,50260545.4
75%,41086534.67,30897105.18,69818697.29
max,154842096.11,122533492.58,83553170.97


In [108]:
daily_surplus_buffer_csv.columns = [f'daily_surplus_buffer_{col}' if col != 'period' and not col.startswith('surplus_buffer') else col for col in daily_surplus_buffer_csv.columns]

In [109]:
daily_surplus_buffer_csv = daily_surplus_buffer_csv.rename(columns={'surplus_buffer':'daily_surplus_buffer'})


In [110]:
daily_surplus_buffer_csv = daily_surplus_buffer_csv.rename_axis('day')

In [111]:
daily_surplus_buffer_csv.head()

Unnamed: 0_level_0,daily_surplus_buffer_delta_30d,daily_surplus_buffer_delta_90d,daily_surplus_buffer
day,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2024-03-27 00:00:00+00:00,-118286288.53,-23073880.4,42679254.97
2024-03-26 00:00:00+00:00,-117968788.93,-22860798.3,42972541.24
2024-03-25 00:00:00+00:00,-114559431.4,-21963371.46,43449261.66
2024-03-24 00:00:00+00:00,-110361924.94,-20375056.07,44097497.35
2024-03-23 00:00:00+00:00,-105856419.4,-19278637.62,44733525.24


Stability Fee history and Vault History - Rates Set by DAO
dart = debt balance

In [112]:
#sf_df = fetch_dune_data(3551110)

In [113]:
#sf_df['period'] = pd.to_datetime(sf_df['period'])

In [114]:
sf_path = '../data/csv/sf.csv'
#sf_df.to_csv(sf_path)
sf_history_csv = pd.read_csv(sf_path, index_col='period', parse_dates=True)

In [115]:
sf_history_csv

Unnamed: 0_level_0,Unnamed: 0,annualized,annualized_revenues,dart,ilk,total_ann_revenues
period,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2024-03-21 00:00:00+00:00,0,0.05,0.00,0.00,GNO-A,176643660.32
2024-03-21 00:00:00+00:00,1,0.00,42545.93,70910068.44,GUNIV3DAIUSDC2-A,176643660.32
2024-03-21 00:00:00+00:00,2,0.03,-0.00,-0.00,LINK-A,176643660.32
2024-03-21 00:00:00+00:00,3,0.01,-0.00,-0.00,YFI-A,176643660.32
2024-03-21 00:00:00+00:00,4,0.01,0.00,0.00,COMP-A,176643660.32
...,...,...,...,...,...,...
2019-11-01 00:00:00+00:00,95411,,,,WBTC-A,
2019-11-01 00:00:00+00:00,95412,,,,UNIV2UNIETH-A,
2019-11-01 00:00:00+00:00,95413,,,,RWA005-A,
2019-11-01 00:00:00+00:00,95414,,,,WSTETH-B,


In [116]:
sf_history_csv_reset = sf_history_csv.reset_index()

In [117]:
sf_history_csv_clean = sf_history_csv_reset.drop_duplicates(subset=['period', 'ilk'], keep='last')

In [118]:
sf_history_csv_clean = sf_history_csv_clean.drop(columns='Unnamed: 0')

In [119]:
sf_history_csv_clean['period'] = sf_history_csv_clean['period'].dt.date

In [120]:
sf_history_csv_clean = sf_history_csv_clean.rename(columns={'period':'day'})

DAI Savings Rate Historical - Set by DAO

In [121]:
#dsr_rate = fetch_dune_data(3581248)

In [122]:
dsr_rate_path = '../data/csv/dsr.csv'
#dsr_rate.to_csv(dsr_rate_path)
dsr_rate_csv = pd.read_csv(dsr_rate_path, index_col='dt', parse_dates=True)

In [123]:
dsr_rate_csv['dsr_rate'].describe()

count   1,552.00
mean        0.01
std         0.03
min         0.00
25%         0.00
50%         0.00
75%         0.01
max         0.15
Name: dsr_rate, dtype: float64

In [124]:
dsr_rate_csv = dsr_rate_csv.drop(columns=['Unnamed: 0'])

In [125]:
dsr_rate_csv['dai_percent_in_dsr'] = dsr_rate_csv['dsr_balance'] / dsr_rate_csv['total_balance']
dsr_rate_csv['dai_percent_out_dsr'] = dsr_rate_csv['non_dsr_balance'] / dsr_rate_csv['total_balance']

In [126]:
dsr_rate_csv = dsr_rate_csv.rename_axis('day')

In [408]:
print(dsr_rate_csv.describe())

           dsr_balance  dsr_rate  non_dsr_balance     total_balance  \
count         1,552.00  1,552.00         1,552.00          1,552.00   
mean    293,268,678.28      0.01 4,181,101,606.37  4,474,370,284.65   
std     508,781,220.45      0.03 2,795,421,701.77  2,786,126,412.57   
min       1,167,096.23      0.00    42,257,176.08     72,614,957.58   
25%      13,831,047.69      0.00 1,494,884,857.88  1,501,681,100.28   
50%      78,372,158.21      0.00 4,361,860,196.87  5,130,678,028.50   
75%     208,537,359.71      0.01 6,149,889,671.04  6,183,615,643.32   
max   1,750,756,886.52      0.15 9,944,328,573.43 10,056,865,067.98   

       dai_percent_in_dsr  dai_percent_out_dsr  
count            1,552.00             1,552.00  
mean                 0.13                 0.87  
std                  0.16                 0.16  
min                  0.00                 0.38  
25%                  0.01                 0.72  
50%                  0.02                 0.98  
75%              

dsr flows

In [128]:
#dsr_flows = fetch_dune_data(1753750)

In [129]:
dsr_flows_path='../data/csv/dsr_flows.csv'
#dsr_flows.to_csv(dsr_flows_path)
dsr_flows_csv = pd.read_csv(dsr_flows_path, index_col='period', parse_dates=True)

In [130]:
dsr_flows_csv.shape[0]

581

In [131]:
dsr_flows_csv = dsr_flows_csv.drop(columns=['Unnamed: 0','balance'])
dsr_flows_csv = dsr_flows_csv.rename_axis('day')

In [132]:
dsr_flows_csv.columns = [f'dsr_{col}' if col != 'day' and not col.startswith('surplus_buffer') else col for col in dsr_flows_csv.columns]

In [133]:
dsr_df = dsr_flows_csv.merge(dsr_rate_csv, on=['day'], how='inner')

In [134]:
dsr_df = dsr_df.rename(columns={'total_balance':'dai_total_balance'})
dsr_df = dsr_df.rename(columns={'non_dsr_balance':'dai_circulating'})

In [135]:
dsr_df.describe()

Unnamed: 0,dsr_inflow,dsr_interest,dsr_outflow,dsr_balance,dsr_rate,dai_circulating,dai_total_balance,dai_percent_in_dsr,dai_percent_out_dsr
count,199.0,420.0,126.0,581.0,581.0,581.0,581.0,581.0,581.0
mean,18211028.03,128169.53,17498729.02,632212219.96,0.03,4584444424.95,5216656644.91,0.12,0.88
std,34118088.22,126667.16,43686010.07,689782183.06,0.03,895024557.86,495795966.59,0.13,0.13
min,0.0,0.12,0.01,1167096.23,0.0,3182053005.07,4336565083.61,0.0,0.67
25%,1995736.75,3895.16,625555.75,35775627.77,0.0,3756055150.83,4854191351.0,0.01,0.72
50%,8698729.54,156761.36,6453848.96,109586897.55,0.01,4542041297.79,5174925449.48,0.02,0.98
75%,19085676.64,210194.81,14916349.44,1434744378.05,0.05,5136076330.01,5391627556.52,0.28,0.99
max,298378513.73,591988.67,337868152.03,1750756886.52,0.15,6549164445.71,6550365936.14,0.33,1.0


In [136]:
#cum_bal_and_safetyprice_and_safetyvalue = vault_stats_6_20_through_6_21 

In [137]:
cumbal_stats_path ='../data/csv/cumbal.csv'
#cum_bal_and_safetyprice_and_safetyvalue.to_csv(cumbal_stats_path)
cumbal_csv = pd.read_csv(cumbal_stats_path, index_col = 'day', parse_dates=True)

In [138]:
#debtbal_lpenalty_lratio = fetch_dune_data(3568425)

In [139]:
debtbal_lpenalty_lratio_path = '../data/csv/debtbal_lpenalty_lratio.csv'
#debtbal_lpenalty_lratio.to_csv(debtbal_lpenalty_lratio_path)
debtbal_lpenalty_lratio_path_csv = pd.read_csv(debtbal_lpenalty_lratio_path, index_col = 'day', parse_dates=True)

In [140]:
debtbal_lpenalty_lratio_path_csv.head()

Unnamed: 0_level_0,Unnamed: 0,debt_balance,ilk,liquidation_penalty,liquidation_ratio
day,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2024-03-27,0,0.0,BAL-A,0.0,7.48
2024-03-27,1,-0.0,KNC-A,0.0,12.28
2024-03-27,2,300484322.01,ETH-C,0.13,1.7
2024-03-27,3,0.0,GUSD-A,0.13,15.0
2024-03-27,4,0.0,BAT-A,0.0,47.5


In [141]:
debtbal_lpenalty_lratio_path_csv_reset = debtbal_lpenalty_lratio_path_csv.reset_index()
debtbal_lpenalty_lratio_path_csv_clean = debtbal_lpenalty_lratio_path_csv_reset.drop_duplicates(subset=['day', 'ilk'], keep='last')

In [142]:
debtbal_lpenalty_lratio_path_csv_clean = debtbal_lpenalty_lratio_path_csv_clean.drop(columns='Unnamed: 0')

In [143]:
#dceiling_dfloor_scratio = fetch_dune_data(3568438)

In [144]:
dceiling_dfloor_scratio_path = '../data/csv/dceiling_dfloor_scratio.csv'
#dceiling_dfloor_scratio.to_csv(dceiling_dfloor_scratio_path)
dceiling_dfloor_scratio_csv = pd.read_csv(dceiling_dfloor_scratio_path, index_col = 'day', parse_dates=True)

In [145]:
#vault_market_price = fetch_dune_data(3568453)

In [146]:
vault_market_price_path = '../data/csv/vault_market_price.csv'
#vault_market_price.to_csv(vault_market_price_path)
vault_market_price_csv = pd.read_csv(vault_market_price_path, index_col = 'day', parse_dates=True)

In [147]:
vault_market_price_csv.head()

Unnamed: 0_level_0,Unnamed: 0,ilk,market_price
day,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2024-03-27,0,GUNIV3DAIUSDC2-A,203.39
2024-03-27,1,BAL-A,21.64
2024-03-27,2,LINK-A,6.28
2024-03-27,3,UNIV2WBTCDAI-A,31062885.0
2024-03-27,4,WBTC-A,69767.86


In [148]:
vault_market_price_csv_reset = vault_market_price_csv.reset_index()
vault_market_price_csv_clean = vault_market_price_csv_reset.drop_duplicates(subset=['day', 'ilk'], keep='last')

In [149]:
vault_market_price_csv_clean = vault_market_price_csv_clean.drop(columns=['Unnamed: 0'])

In [150]:
cumbal_csv_reset = cumbal_csv.reset_index()
dceiling_dfloor_scratio_csv_reset = dceiling_dfloor_scratio_csv.reset_index()

In [151]:
cumbal_csv_clean = cumbal_csv_reset.drop_duplicates(subset=['day', 'ilk'], keep='last')
dceiling_dfloor_scratio_csv_clean = dceiling_dfloor_scratio_csv_reset.drop_duplicates(subset=['day', 'ilk'], keep='last')

In [152]:
cumbal_csv_clean = cumbal_csv_clean.drop(columns=['Unnamed: 0'])

In [153]:
dceiling_dfloor_scratio_csv_clean = dceiling_dfloor_scratio_csv_clean.drop(columns=['Unnamed: 0'])

In [154]:
comprehensive_vault_stats = pd.merge(cumbal_csv_clean, dceiling_dfloor_scratio_csv_clean, on=['day', 'ilk'], how='inner')

In [155]:
comprehensive_vault_stats = comprehensive_vault_stats.merge(vault_market_price_csv_clean, on=['day', 'ilk'], how='inner' )

In [156]:
comprehensive_vault_stats = comprehensive_vault_stats.merge(debtbal_lpenalty_lratio_path_csv_clean, on=['day', 'ilk'], how='inner')

In [157]:
comprehensive_vault_stats['day'] = pd.to_datetime(comprehensive_vault_stats['day'])
sf_history_csv_clean['day'] = pd.to_datetime(sf_history_csv_clean['day'])

In [158]:
ir_csv.columns

Index(['ilk', 'daily_revenues'], dtype='object')

In [159]:
comprehensive_vault_stats.columns

Index(['day', 'cumulative_collateral', 'ilk', 'safety_price',
       'usd_safety_value', 'dai_ceiling', 'dai_floor',
       'safety_collateral_ratio', 'market_price', 'debt_balance',
       'liquidation_penalty', 'liquidation_ratio'],
      dtype='object')

In [160]:
comprehensive_vault_stats = comprehensive_vault_stats.merge(ir_csv, on=['day', 'ilk'], how='inner')

In [161]:
comprehensive_vault_stats = comprehensive_vault_stats.merge(sf_history_csv_clean, on=['day', 'ilk'], how='inner')

In [163]:
comprehensive_vault_stats['day'] = comprehensive_vault_stats['day'].dt.tz_convert('UTC')
dai_maturity_df_reindexed['day'] = dai_maturity_df_reindexed['day'].dt.tz_convert('UTC')

In [164]:
# Ensure both 'day' columns are timezone-aware and in UTC
comprehensive_vault_stats['day'] = comprehensive_vault_stats['day'].dt.tz_convert('UTC')
dai_maturity_df_reindexed['day'] = dai_maturity_df_reindexed['day'].dt.tz_convert('UTC')


In [165]:
stable_coin_ratios.index

DatetimeIndex(['2024-03-21 00:00:00+00:00', '2024-03-20 00:00:00+00:00',
               '2024-03-19 00:00:00+00:00', '2024-03-18 00:00:00+00:00',
               '2024-03-17 00:00:00+00:00', '2024-03-16 00:00:00+00:00',
               '2024-03-15 00:00:00+00:00', '2024-03-14 00:00:00+00:00',
               '2024-03-13 00:00:00+00:00', '2024-03-12 00:00:00+00:00',
               ...
               '2019-11-27 00:00:00+00:00', '2019-11-26 00:00:00+00:00',
               '2019-11-25 00:00:00+00:00', '2019-11-24 00:00:00+00:00',
               '2019-11-23 00:00:00+00:00', '2019-11-22 00:00:00+00:00',
               '2019-11-21 00:00:00+00:00', '2019-11-20 00:00:00+00:00',
               '2019-11-19 00:00:00+00:00', '2019-11-18 00:00:00+00:00'],
              dtype='datetime64[ns, UTC]', name='day', length=1586, freq=None)

In [166]:
comprehensive_vault_stats.tail()

Unnamed: 0,day,cumulative_collateral,ilk,safety_price,usd_safety_value,dai_ceiling,dai_floor,safety_collateral_ratio,market_price,debt_balance,liquidation_penalty,liquidation_ratio,daily_revenues,annualized,annualized_revenues,dart,total_ann_revenues
21747,2019-11-15 00:00:00+00:00,0.42,ETH-A,119.74,50.29,0.0,20.0,0.0,,,,,0.0,0.04,,,
21748,2019-11-15 00:00:00+00:00,10.0,BAT-A,0.17,1.66,0.0,20.0,0.0,,,,,0.0,0.04,,,
21749,2019-11-13 00:00:00+00:00,,SAI,0.0,,0.0,0.0,0.0,,,,,,0.0,,,
21750,2019-11-13 00:00:00+00:00,0.42,ETH-A,0.0,0.0,0.0,20.0,0.0,,,,,0.0,0.04,,,
21751,2019-11-13 00:00:00+00:00,10.0,BAT-A,0.0,0.0,0.0,20.0,0.0,,,,,0.0,0.04,,,


In [167]:
comprehensive_vault_stats.columns

Index(['day', 'cumulative_collateral', 'ilk', 'safety_price',
       'usd_safety_value', 'dai_ceiling', 'dai_floor',
       'safety_collateral_ratio', 'market_price', 'debt_balance',
       'liquidation_penalty', 'liquidation_ratio', 'daily_revenues',
       'annualized', 'annualized_revenues', 'dart', 'total_ann_revenues'],
      dtype='object')

In [168]:
comprehensive_vault_stats = comprehensive_vault_stats.rename(columns={'annualized':'annualized stability fee'})

In [169]:
def determine_status(row):
    # The vault is considered closed if 'dai_ceiling' is 0
    # This includes the first day if 'dai_ceiling' starts at 0 or if it drops to 0 from a nonzero value
    if pd.isnull(row['prev_dai_ceiling']) and row['dai_ceiling'] == 0:
        return 'Closed'
    elif row['prev_dai_ceiling'] >= 0 and row['dai_ceiling'] == 0:
        return 'Closed'
    # Check if 'safety_price' is 0
    elif row['safety_price'] == 0:
        return 'Closed'
    else:
        return 'Open'

In [170]:
comprehensive_vault_stats['market_price'] = np.where(comprehensive_vault_stats['liquidation_ratio'].isnull(), comprehensive_vault_stats['safety_price'], comprehensive_vault_stats['market_price'])

In [171]:
comprehensive_vault_stats = comprehensive_vault_stats.sort_values('day')
comprehensive_vault_stats['prev_dai_ceiling'] = comprehensive_vault_stats.groupby('ilk')['dai_ceiling'].shift(1)

In [172]:
comprehensive_vault_stats['status'] = comprehensive_vault_stats.apply(determine_status, axis=1)


In [173]:
comprehensive_vault_stats.columns

Index(['day', 'cumulative_collateral', 'ilk', 'safety_price',
       'usd_safety_value', 'dai_ceiling', 'dai_floor',
       'safety_collateral_ratio', 'market_price', 'debt_balance',
       'liquidation_penalty', 'liquidation_ratio', 'daily_revenues',
       'annualized stability fee', 'annualized_revenues', 'dart',
       'total_ann_revenues', 'prev_dai_ceiling', 'status'],
      dtype='object')

In [174]:
comprehensive_vault_stats['market_collateral_ratio'] = np.where(comprehensive_vault_stats['status']=='Open',((comprehensive_vault_stats['usd_safety_value'] *comprehensive_vault_stats['liquidation_ratio']) / comprehensive_vault_stats['debt_balance']), np.nan)
comprehensive_vault_stats['market_collateral_ratio'] = np.where(comprehensive_vault_stats['debt_balance']==0,0, comprehensive_vault_stats['market_collateral_ratio'])

comprehensive_vault_stats['safety_collateral_ratio'] = np.where(comprehensive_vault_stats['status']=='Open',comprehensive_vault_stats['safety_collateral_ratio'], np.nan)
comprehensive_vault_stats['safety_collateral_ratio'] = np.where(comprehensive_vault_stats['status'] == 'Closed', 0, comprehensive_vault_stats['safety_collateral_ratio'])
comprehensive_vault_stats['safety_collateral_ratio'] = np.where(comprehensive_vault_stats['debt_balance'] <= 0, 0, comprehensive_vault_stats['safety_collateral_ratio'])
comprehensive_vault_stats[['annualized_revenues', 'dart']] = np.where(comprehensive_vault_stats[['annualized_revenues', 'dart']] <= 0, 0, comprehensive_vault_stats[['annualized_revenues', 'dart']])

comprehensive_vault_stats['collateral_usd'] = comprehensive_vault_stats['usd_safety_value'] * comprehensive_vault_stats['liquidation_ratio']
comprehensive_vault_stats['hypothetical_dai_ceiling'] = comprehensive_vault_stats['collateral_usd']* (comprehensive_vault_stats['liquidation_ratio'] / 2)

In [175]:
#replace values when vault is closed to 0?

In [176]:
def clean_small_values(value, threshold=1e-8):
    """
    Set small negative and positive values within a specified threshold to zero.

    Parameters:
    - value: The value to be cleaned.
    - threshold: Values within +/- this threshold will be set to zero.

    Returns:
    - The cleaned value.
    """
    if -threshold < value < threshold:
        return 0.0
    else:
        return value

# Apply this function to the entire DataFrame
comprehensive_vault_stats[['cumulative_collateral','usd_safety_value','collateral_usd','hypothetical_dai_ceiling','debt_balance']] = comprehensive_vault_stats[['cumulative_collateral','usd_safety_value','collateral_usd','hypothetical_dai_ceiling','debt_balance']].applymap(lambda x: clean_small_values(x))


  comprehensive_vault_stats[['cumulative_collateral','usd_safety_value','collateral_usd','hypothetical_dai_ceiling','debt_balance']] = comprehensive_vault_stats[['cumulative_collateral','usd_safety_value','collateral_usd','hypothetical_dai_ceiling','debt_balance']].applymap(lambda x: clean_small_values(x))


In [177]:
comprehensive_vault_stats = comprehensive_vault_stats[comprehensive_vault_stats['ilk'] != 'SAI']

UNIV2ETHUSDT-A is an outlier and will be removed from dataset.  

In [178]:
comprehensive_vault_stats[comprehensive_vault_stats['safety_collateral_ratio'] < 0 ]

Unnamed: 0,day,cumulative_collateral,ilk,safety_price,usd_safety_value,dai_ceiling,dai_floor,safety_collateral_ratio,market_price,debt_balance,...,daily_revenues,annualized stability fee,annualized_revenues,dart,total_ann_revenues,prev_dai_ceiling,status,market_collateral_ratio,collateral_usd,hypothetical_dai_ceiling
12071,2021-08-08 00:00:00+00:00,0.0,UNIV2ETHUSDT-A,137635170.15,0.0,2000000.0,10000.0,-12.95,192689238.21,0.0,...,4.87,0.02,0.0,0.0,38032976.74,2006530.97,Open,-18.13,0.0,0.0
12038,2021-08-09 00:00:00+00:00,0.0,UNIV2ETHUSDT-A,139168140.36,0.0,2000000.0,10000.0,-13.09,194835396.5,0.0,...,4.75,0.02,0.0,0.0,38121293.97,2000000.0,Open,-18.33,0.0,0.0
11980,2021-08-10 00:00:00+00:00,0.0,UNIV2ETHUSDT-A,140063017.89,0.0,2000000.0,10000.0,-13.18,196088225.05,0.0,...,4.75,0.02,0.0,0.0,38691927.83,2000000.0,Open,-18.45,0.0,0.0
11963,2021-08-11 00:00:00+00:00,0.0,UNIV2ETHUSDT-A,141384297.67,0.0,2000000.0,10000.0,-13.3,197938016.73,0.0,...,4.73,0.02,0.0,0.0,38773987.68,2000000.0,Open,-18.62,0.0,0.0
11918,2021-08-12 00:00:00+00:00,0.0,UNIV2ETHUSDT-A,137867966.0,0.0,2000000.0,10000.0,-12.97,193015152.4,0.0,...,4.74,0.02,0.0,0.0,40333600.87,2000000.0,Open,-18.16,0.0,0.0
11864,2021-08-13 00:00:00+00:00,0.0,UNIV2ETHUSDT-A,141650852.35,0.0,2000000.0,10000.0,-13.33,198311193.29,0.0,...,4.74,0.02,0.0,0.0,39855556.08,2000000.0,Open,-18.66,0.0,0.0
11843,2021-08-14 00:00:00+00:00,0.0,UNIV2ETHUSDT-A,142284082.28,0.0,2000000.0,10000.0,-13.39,199197715.19,0.0,...,4.74,0.02,0.0,0.0,40109465.52,2000000.0,Open,-18.74,0.0,0.0
11807,2021-08-15 00:00:00+00:00,0.0,UNIV2ETHUSDT-A,139997139.81,0.0,2000000.0,10000.0,-13.17,195995995.74,0.0,...,4.75,0.02,0.0,0.0,40161597.87,2000000.0,Open,-18.44,0.0,0.0
11765,2021-08-16 00:00:00+00:00,0.0,UNIV2ETHUSDT-A,142001139.88,0.0,2000000.0,10000.0,-13.36,198801595.84,0.0,...,4.74,0.02,0.0,0.0,40275499.78,2000000.0,Open,-18.7,0.0,0.0
11701,2021-08-17 00:00:00+00:00,0.0,UNIV2ETHUSDT-A,140896250.11,0.0,2000000.0,10000.0,-13.26,197254750.15,0.0,...,4.78,0.02,0.0,0.0,40012479.05,2000000.0,Open,-18.56,0.0,0.0


In [179]:
comprehensive_vault_stats = comprehensive_vault_stats[comprehensive_vault_stats['ilk'] != 'UNIV2ETHUSDT-A']

In [180]:
comprehensive_vault_stats['debt_balance'].describe()

count          21,138.00
mean      138,522,602.01
std       387,265,830.49
min            -2,528.51
25%         1,403,187.58
50%        15,818,062.08
75%        88,412,143.20
max     3,361,006,014.83
Name: debt_balance, dtype: float64

In [181]:
#pd.set_option('display.max_columns', None)  # Show all columns
#pd.set_option('display.max_rows', None)  # Show all rows
#pd.set_option('display.max_colwidth', None)  # Show full content of each column

In [182]:
comprehensive_vault_stats['market_collateral_ratio'].describe()

count                    19,378.00
mean        160,876,285,659,620.75
std       7,377,037,911,158,396.00
min     -32,429,545,705,439,232.00
25%                           2.11
50%                           3.14
75%                           4.01
max     324,295,457,054,392,320.00
Name: market_collateral_ratio, dtype: float64

In [183]:
comprehensive_vault_stats['safety_collateral_ratio'].describe()

count                    21,501.00
mean        144,095,140,219,423.31
std       6,659,657,583,570,546.00
min                           0.00
25%                           1.41
50%                           1.94
75%                           2.50
max     308,852,816,242,278,400.00
Name: safety_collateral_ratio, dtype: float64

In [184]:
comprehensive_vault_stats[['day','ilk','market_collateral_ratio']][comprehensive_vault_stats['market_collateral_ratio'] > 3.31 ].sort_values('market_collateral_ratio')

Unnamed: 0,day,ilk,market_collateral_ratio
6980,2022-02-10 00:00:00+00:00,RENBTC-A,3.31
13243,2021-07-06 00:00:00+00:00,RENBTC-A,3.31
20676,2020-08-24 00:00:00+00:00,KNC-A,3.31
845,2023-12-07 00:00:00+00:00,ETH-B,3.31
2408,2023-05-19 00:00:00+00:00,RETH-A,3.31
...,...,...,...
7720,2022-01-05 00:00:00+00:00,RWA003-A,324295457054392320.00
8489,2021-11-30 00:00:00+00:00,RWA003-A,324295457054392320.00
8141,2021-12-18 00:00:00+00:00,RWA003-A,324295457054392320.00
7463,2022-01-15 00:00:00+00:00,RWA003-A,324295457054392320.00


In [185]:
comprehensive_vault_stats[['day','ilk','safety_collateral_ratio']][comprehensive_vault_stats['safety_collateral_ratio'] > 100 ]

Unnamed: 0,day,ilk,safety_collateral_ratio
15803,2021-04-21 00:00:00+00:00,RWA002-A,149.50
15727,2021-04-23 00:00:00+00:00,RWA001-A,32616554.14
15494,2021-04-30 00:00:00+00:00,RWA001-A,32616554.14
15464,2021-05-01 00:00:00+00:00,RWA001-A,32616554.14
15421,2021-05-02 00:00:00+00:00,RWA001-A,32616554.14
...,...,...,...
6015,2022-05-12 00:00:00+00:00,YFI-A,114.43
5992,2022-05-13 00:00:00+00:00,YFI-A,111.95
5637,2022-06-13 00:00:00+00:00,RWA003-A,113.28
4516,2022-09-30 00:00:00+00:00,RWA003-A,9651650507571200.00


In [186]:
comprehensive_vault_stats[['day','ilk','debt_balance']][comprehensive_vault_stats['debt_balance'] < 0 ]

Unnamed: 0,day,ilk,debt_balance
3051,2023-03-11 00:00:00+00:00,DIRECT-AAVEV2-DAI,-2528.51


In [187]:
comprehensive_vault_stats = comprehensive_vault_stats[comprehensive_vault_stats['ilk'] != 'DIRECT-AAVEV2-DAI']

In [188]:
comprehensive_vault_stats[['day','ilk','debt_balance']][comprehensive_vault_stats['debt_balance'] < 0 ]

Unnamed: 0,day,ilk,debt_balance


In [189]:
comprehensive_vault_stats['status'].describe()

count     21499
unique        2
top        Open
freq      19896
Name: status, dtype: object

In [190]:
comprehensive_vault_stats[['liquidation_ratio','liquidation_penalty','annualized stability fee',
       'annualized_revenues', 'dart', 'total_ann_revenues',
       'prev_dai_ceiling']].describe()

Unnamed: 0,liquidation_ratio,liquidation_penalty,annualized stability fee,annualized_revenues,dart,total_ann_revenues,prev_dai_ceiling
count,21004.0,15665.0,21493.0,21136.0,21136.0,21491.0,21445.0
mean,1.49,-0.16,0.04,3167088.52,138541671.93,72830970.89,173188464.6
std,2.2,0.49,0.07,10267730.84,387325542.99,62927201.18,470887247.03
min,0.0,-1.0,0.0,0.0,0.0,767.83,0.0
25%,1.25,-1.0,0.01,4979.85,1402720.7,29094709.62,3306266.02
50%,1.5,0.13,0.03,172814.73,15890305.95,52913177.34,20000000.0
75%,1.75,0.13,0.05,1678781.65,88457397.17,99900414.71,120000000.0
max,100.0,0.13,0.5,142209506.83,3361006014.83,351700458.61,4361006014.83


In [191]:
closed_vaults = comprehensive_vault_stats[comprehensive_vault_stats['status'] == 'Closed']

In [192]:
zero_balances = comprehensive_vault_stats[comprehensive_vault_stats['cumulative_collateral'] == 0]

In [193]:
usdt_a = comprehensive_vault_stats[comprehensive_vault_stats['ilk']=='USDT-A']

In [194]:
usdt_a.head()

Unnamed: 0,day,cumulative_collateral,ilk,safety_price,usd_safety_value,dai_ceiling,dai_floor,safety_collateral_ratio,market_price,debt_balance,...,daily_revenues,annualized stability fee,annualized_revenues,dart,total_ann_revenues,prev_dai_ceiling,status,market_collateral_ratio,collateral_usd,hypothetical_dai_ceiling
20599,2020-09-08 00:00:00+00:00,368.9,USDT-A,0.67,246.01,10000000.0,100.0,1.0,1.0,245.99,...,0.02,0.06,14.76,245.99,1855497.42,,Open,1.5,369.02,276.76
20588,2020-09-09 00:00:00+00:00,0.0,USDT-A,0.67,0.0,10000000.0,100.0,0.0,1.0,0.0,...,0.0,0.06,0.0,0.0,1763209.76,10000000.0,Open,0.0,0.0,0.0
20567,2020-09-12 00:00:00+00:00,200.0,USDT-A,0.67,133.38,10000000.0,100.0,0.0,1.0,0.0,...,0.12,0.06,0.0,0.0,2227239.39,10000000.0,Open,0.0,200.07,150.05
20561,2020-09-13 00:00:00+00:00,426826.22,USDT-A,0.67,284680.54,10000000.0,100.0,1.5,1.0,189858.91,...,20.63,0.06,11391.53,189858.91,2223945.55,10000000.0,Open,2.25,427020.81,320265.61
20558,2020-09-14 00:00:00+00:00,531480.11,USDT-A,0.67,354757.69,10000000.0,100.0,1.42,1.0,249920.0,...,44.86,0.06,14995.2,249920.0,3317027.83,10000000.0,Open,2.13,532136.54,399102.4


In [195]:
zero_balances['ilk'].unique()

array(['USDT-A', 'UNIV2WBTCDAI-A', 'UNIV2DAIUSDT-A', 'RWA008-A'],
      dtype=object)

In [196]:
closed_vaults['ilk'].unique()

array(['BAT-A', 'ETH-A', 'LRC-A', 'COMP-A', 'LINK-A', 'ETH-B', 'BAL-A',
       'GUSD-A', 'TUSD-A', 'UNIV2DAIUSDC-A', 'UNIV2AAVEETH-A',
       'UNIV2WBTCDAI-A', 'UNIV2DAIUSDT-A', 'USDC-A', 'USDT-A', 'PAXUSD-A',
       'USDC-B', 'KNC-A', 'RWA006-A', 'GUNIV3DAIUSDC1-A',
       'UNIV2LINKETH-A', 'ZRX-A', 'AAVE-A', 'CRVV1ETHSTETH-A',
       'UNIV2DAIETH-A', 'UNIV2WBTCETH-A', 'UNI-A', 'RWA009-A', 'RETH-A',
       'RENBTC-A', 'GNO-A', 'GUNIV3DAIUSDC2-A', 'UNIV2USDCETH-A',
       'MATIC-A', 'RWA003-A', 'RWA005-A'], dtype=object)

In [197]:
wbtc = comprehensive_vault_stats[comprehensive_vault_stats['ilk']=='WBTC-A']
eth_a = comprehensive_vault_stats[comprehensive_vault_stats['ilk']=='ETH-A']

In [198]:
eth_a['status']

21750    Closed
21747    Closed
21745    Closed
21743    Closed
21742      Open
          ...  
37         Open
28         Open
18         Open
16         Open
7          Open
Name: status, Length: 1564, dtype: object

In [199]:
eth_a_df = eth_a['dai_ceiling'].to_frame('eth_a_dai_ceiling')

In [200]:
eth_a_zero_dai_ceiling = eth_a[eth_a['dai_ceiling'] == 0]

In [201]:
eth_a_zero_dai_ceiling.head()

Unnamed: 0,day,cumulative_collateral,ilk,safety_price,usd_safety_value,dai_ceiling,dai_floor,safety_collateral_ratio,market_price,debt_balance,...,daily_revenues,annualized stability fee,annualized_revenues,dart,total_ann_revenues,prev_dai_ceiling,status,market_collateral_ratio,collateral_usd,hypothetical_dai_ceiling
21750,2019-11-13 00:00:00+00:00,0.42,ETH-A,0.0,0.0,0.0,20.0,0.0,0.0,,...,0.0,0.04,,,,,Closed,,,
21747,2019-11-15 00:00:00+00:00,0.42,ETH-A,119.74,50.29,0.0,20.0,0.0,119.74,,...,0.0,0.04,,,,0.0,Closed,,,
21745,2019-11-16 00:00:00+00:00,0.42,ETH-A,121.69,51.11,0.0,20.0,0.0,121.69,,...,0.0,0.04,,,,0.0,Closed,,,
21743,2019-11-17 00:00:00+00:00,0.43,ETH-A,123.31,53.02,0.0,20.0,0.0,123.31,,...,0.0,0.04,,,,0.0,Closed,,,


In [202]:
eth_a_df.head()

Unnamed: 0,eth_a_dai_ceiling
21750,0.0
21747,0.0
21745,0.0
21743,0.0
21742,50000000.0


In [203]:
wbtc = wbtc.set_index('day')

In [204]:
comprehensive_vault_stats.columns

Index(['day', 'cumulative_collateral', 'ilk', 'safety_price',
       'usd_safety_value', 'dai_ceiling', 'dai_floor',
       'safety_collateral_ratio', 'market_price', 'debt_balance',
       'liquidation_penalty', 'liquidation_ratio', 'daily_revenues',
       'annualized stability fee', 'annualized_revenues', 'dart',
       'total_ann_revenues', 'prev_dai_ceiling', 'status',
       'market_collateral_ratio', 'collateral_usd',
       'hypothetical_dai_ceiling'],
      dtype='object')

In [205]:
comprehensive_vault_stats['ilk'].unique()

array(['BAT-A', 'ETH-A', 'USDC-A', 'WBTC-A', 'USDC-B', 'TUSD-A', 'ZRX-A',
       'KNC-A', 'MANA-A', 'USDT-A', 'PAXUSD-A', 'LRC-A', 'COMP-A',
       'LINK-A', 'ETH-B', 'BAL-A', 'YFI-A', 'GUSD-A', 'UNI-A', 'RENBTC-A',
       'AAVE-A', 'UNIV2DAIETH-A', 'PSM-USDC-A', 'UNIV2USDCETH-A',
       'UNIV2WBTCETH-A', 'UNIV2DAIUSDC-A', 'UNIV2LINKETH-A',
       'UNIV2UNIETH-A', 'UNIV2AAVEETH-A', 'UNIV2WBTCDAI-A',
       'UNIV2DAIUSDT-A', 'ETH-C', 'RWA002-A', 'RWA001-A', 'RWA003-A',
       'RWA005-A', 'RWA006-A', 'RWA004-A', 'MATIC-A', 'PSM-PAX-A',
       'GUNIV3DAIUSDC1-A', 'WSTETH-A', 'WBTC-B', 'WBTC-C', 'PSM-GUSD-A',
       'GUNIV3DAIUSDC2-A', 'CRVV1ETHSTETH-A', 'WSTETH-B', 'RWA009-A',
       'RETH-A', 'RWA008-A', 'GNO-A', 'RWA013-A', 'RWA012-A'],
      dtype=object)

In [206]:
comprehensive_vault_stats['status'].head()

21751    Closed
21750    Closed
21748    Closed
21747    Closed
21746    Closed
Name: status, dtype: object

In [207]:
no_nan_vaults = comprehensive_vault_stats.copy()
no_nan_vaults = no_nan_vaults.fillna(0)

In [409]:
no_nan_vaults.columns

Index(['day', 'cumulative_collateral', 'ilk', 'safety_price',
       'usd_safety_value', 'dai_ceiling', 'dai_floor',
       'safety_collateral_ratio', 'market_price', 'debt_balance',
       'liquidation_penalty', 'liquidation_ratio', 'daily_revenues',
       'annualized stability fee', 'annualized_revenues', 'dart',
       'total_ann_revenues', 'prev_dai_ceiling', 'status',
       'market_collateral_ratio', 'collateral_usd',
       'hypothetical_dai_ceiling'],
      dtype='object')

In [None]:
print(no_nan_vaults[''].describe())

In [208]:
comprehensive_vault_stats.shape[0]

21499

In [415]:
comprehensive_vault_stats['safety_collateral_ratio'].describe()

count                    21,499.00
mean        144,108,545,041,993.62
std       6,659,967,211,665,183.00
min                           0.00
25%                           1.41
50%                           1.94
75%                           2.50
max     308,852,816,242,278,400.00
Name: safety_collateral_ratio, dtype: float64

In [210]:
comprehensive_vault_stats.columns

Index(['day', 'cumulative_collateral', 'ilk', 'safety_price',
       'usd_safety_value', 'dai_ceiling', 'dai_floor',
       'safety_collateral_ratio', 'market_price', 'debt_balance',
       'liquidation_penalty', 'liquidation_ratio', 'daily_revenues',
       'annualized stability fee', 'annualized_revenues', 'dart',
       'total_ann_revenues', 'prev_dai_ceiling', 'status',
       'market_collateral_ratio', 'collateral_usd',
       'hypothetical_dai_ceiling'],
      dtype='object')

In [211]:
comprehensive_vault_stats[comprehensive_vault_stats['ilk']=='ETH-A']

Unnamed: 0,day,cumulative_collateral,ilk,safety_price,usd_safety_value,dai_ceiling,dai_floor,safety_collateral_ratio,market_price,debt_balance,...,daily_revenues,annualized stability fee,annualized_revenues,dart,total_ann_revenues,prev_dai_ceiling,status,market_collateral_ratio,collateral_usd,hypothetical_dai_ceiling
21750,2019-11-13 00:00:00+00:00,0.42,ETH-A,0.00,0.00,0.00,20.00,0.00,0.00,,...,0.00,0.04,,,,,Closed,,,
21747,2019-11-15 00:00:00+00:00,0.42,ETH-A,119.74,50.29,0.00,20.00,0.00,119.74,,...,0.00,0.04,,,,0.00,Closed,,,
21745,2019-11-16 00:00:00+00:00,0.42,ETH-A,121.69,51.11,0.00,20.00,0.00,121.69,,...,0.00,0.04,,,,0.00,Closed,,,
21743,2019-11-17 00:00:00+00:00,0.43,ETH-A,123.31,53.02,0.00,20.00,0.00,123.31,,...,0.00,0.04,,,,0.00,Closed,,,
21742,2019-11-18 00:00:00+00:00,26313.39,ETH-A,118.39,3115153.99,50000000.00,20.00,2.00,177.58,1558617.22,...,37.68,0.04,62331.01,1558275.09,145487.20,0.00,Open,3.00,4672730.98,3504548.23
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
37,2024-03-17 00:00:00+00:00,444063.07,ETH-A,2504.00,1111933925.89,428458674.37,7500.00,4.51,3630.80,246751192.26,...,135601.75,0.15,37629556.50,246751192.26,209313743.80,428458674.37,Open,6.53,1612304192.54,1168920539.59
28,2024-03-18 00:00:00+00:00,443694.77,ETH-A,2439.83,1082537747.09,428458674.37,7500.00,4.38,3537.75,247201188.46,...,77882.14,0.15,37698180.92,247201188.46,206241667.90,428458674.37,Open,6.35,1569679733.27,1138017806.62
18,2024-03-19 00:00:00+00:00,442822.50,ETH-A,2262.59,1001924077.88,428458674.37,7500.00,4.08,3280.75,245449240.77,...,171181.44,0.15,37431008.90,245449240.77,199379356.93,428458674.37,Open,5.92,1452789912.92,1053272686.87
16,2024-03-20 00:00:00+00:00,444855.53,ETH-A,2402.78,1068888133.11,428458674.37,7500.00,4.35,3484.03,245482433.74,...,136276.39,0.15,37436070.83,245482433.74,203058946.36,428458674.37,Open,6.31,1549887793.01,1123668649.93


In [212]:
no_nan_vaults[no_nan_vaults['ilk']=='WSTETH-A'].head()

Unnamed: 0,day,cumulative_collateral,ilk,safety_price,usd_safety_value,dai_ceiling,dai_floor,safety_collateral_ratio,market_price,debt_balance,...,daily_revenues,annualized stability fee,annualized_revenues,dart,total_ann_revenues,prev_dai_ceiling,status,market_collateral_ratio,collateral_usd,hypothetical_dai_ceiling
9079,2021-10-28 00:00:00+00:00,0.0,WSTETH-A,2697.62,0.0,3000000.0,10000.0,0.0,0.0,0.0,...,0.0,0.04,0.0,0.0,76729520.44,0.0,Open,0.0,0.0,0.0
9075,2021-10-29 00:00:00+00:00,305.63,WSTETH-A,2857.24,873269.68,3000000.0,10000.0,1.38,0.0,632462.53,...,40.06,0.04,25298.5,632462.53,80139053.59,3000000.0,Open,0.0,0.0,0.0
9062,2021-10-30 00:00:00+00:00,674.45,WSTETH-A,2811.58,1896273.33,4078604.29,10000.0,1.7,0.0,1118390.53,...,128.36,0.04,44735.62,1118390.53,81886606.49,3000000.0,Open,0.0,0.0,0.0
9056,2021-10-31 00:00:00+00:00,962.89,WSTETH-A,2730.51,2629187.21,4353719.21,10000.0,1.92,0.0,1368324.39,...,99.96,0.04,54732.98,1368324.39,85368386.53,4078604.29,Open,0.0,0.0,0.0
9043,2021-11-01 00:00:00+00:00,3417.46,WSTETH-A,2831.66,9677082.31,4443895.33,10000.0,2.18,0.0,4442038.79,...,264.64,0.04,177681.56,4442038.79,84031640.03,4353719.21,Open,0.0,0.0,0.0


In [213]:
top_10_ilks = top_10_vaults.index.tolist()

In [214]:
top_10_ilks

['ETH-A',
 'WBTC-A',
 'WSTETH-A',
 'ETH-C',
 'ETH-B',
 'WSTETH-B',
 'USDC-A',
 'RWA013-A',
 'WBTC-C',
 'RWA012-A']

In [215]:
topvaults = no_nan_vaults[no_nan_vaults['ilk'].isin(top_10_ilks)]

In [216]:
wbtc_a_vault = no_nan_vaults[no_nan_vaults['ilk']=='WBTC-A']
eth_a_vault = no_nan_vaults[no_nan_vaults['ilk']=='ETH-A']
wsteth_a_vault = no_nan_vaults[no_nan_vaults['ilk']=='WSTETH-A']
eth_c_vault = no_nan_vaults[no_nan_vaults['ilk']=='ETH-C']
eth_b_vault = no_nan_vaults[no_nan_vaults['ilk']=='ETH-B']

In [217]:
#We start with eth-a vault, which has long history and has generated most revenues 
eth_a_vault.head()

Unnamed: 0,day,cumulative_collateral,ilk,safety_price,usd_safety_value,dai_ceiling,dai_floor,safety_collateral_ratio,market_price,debt_balance,...,daily_revenues,annualized stability fee,annualized_revenues,dart,total_ann_revenues,prev_dai_ceiling,status,market_collateral_ratio,collateral_usd,hypothetical_dai_ceiling
21750,2019-11-13 00:00:00+00:00,0.42,ETH-A,0.0,0.0,0.0,20.0,0.0,0.0,0.0,...,0.0,0.04,0.0,0.0,0.0,0.0,Closed,0.0,0.0,0.0
21747,2019-11-15 00:00:00+00:00,0.42,ETH-A,119.74,50.29,0.0,20.0,0.0,119.74,0.0,...,0.0,0.04,0.0,0.0,0.0,0.0,Closed,0.0,0.0,0.0
21745,2019-11-16 00:00:00+00:00,0.42,ETH-A,121.69,51.11,0.0,20.0,0.0,121.69,0.0,...,0.0,0.04,0.0,0.0,0.0,0.0,Closed,0.0,0.0,0.0
21743,2019-11-17 00:00:00+00:00,0.43,ETH-A,123.31,53.02,0.0,20.0,0.0,123.31,0.0,...,0.0,0.04,0.0,0.0,0.0,0.0,Closed,0.0,0.0,0.0
21742,2019-11-18 00:00:00+00:00,26313.39,ETH-A,118.39,3115153.99,50000000.0,20.0,2.0,177.58,1558617.22,...,37.68,0.04,62331.01,1558275.09,145487.2,0.0,Open,3.0,4672730.98,3504548.23


In [218]:
eth_a_vault.describe()

Unnamed: 0,cumulative_collateral,safety_price,usd_safety_value,dai_ceiling,dai_floor,safety_collateral_ratio,market_price,debt_balance,liquidation_penalty,liquidation_ratio,daily_revenues,annualized stability fee,annualized_revenues,dart,total_ann_revenues,prev_dai_ceiling,market_collateral_ratio,collateral_usd,hypothetical_dai_ceiling
count,1564.0,1564.0,1564.0,1564.0,1564.0,1564.0,1564.0,1564.0,1564.0,1564.0,1564.0,1564.0,1564.0,1564.0,1564.0,1564.0,1564.0,1564.0,1564.0
mean,1458734.7,1180.31,1674815093.22,854497895.35,7344.54,2.66,1724.99,693477623.79,0.07,1.47,70427.29,0.03,21123475.17,693604670.68,51565245.82,854223944.79,3.91,2461159737.81,1808848479.57
std,867527.31,785.15,1760759531.51,836076226.2,5719.6,0.59,1141.89,788907642.18,0.16,0.08,97692.23,0.02,27873569.54,789103240.0,53855258.0,836286080.97,0.84,2584183106.69,1897827173.0
min,0.42,0.0,0.0,0.0,20.0,0.0,0.0,0.0,-1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
25%,517750.16,396.93,522930296.41,350558569.73,500.0,2.23,595.39,179038619.21,0.0,1.45,10818.52,0.01,3031349.4,179038619.21,12664699.17,350558569.73,3.3,758614754.09,550071332.69
50%,1733410.48,1171.43,673651523.57,421276896.85,7500.0,2.56,1708.09,216232858.05,0.13,1.45,26452.76,0.02,7708878.63,216270428.76,35828999.87,421276896.85,3.75,992240131.57,726560471.73
75%,2075231.7,1624.46,3121970537.42,1494643974.46,15000.0,3.03,2372.86,1340082473.97,0.13,1.5,108883.03,0.04,33206894.43,1340082473.97,73813092.27,1494643974.46,4.42,4612763346.09,3407265929.2
max,2966468.34,3298.34,7428423394.6,3395556201.44,15000.0,5.27,4782.6,3077919054.99,0.13,1.5,1475897.04,0.15,142209506.83,3077919054.99,351700458.61,3395556201.44,7.63,11142635091.9,8356976318.93


In [219]:
eth_a_vault.columns

Index(['day', 'cumulative_collateral', 'ilk', 'safety_price',
       'usd_safety_value', 'dai_ceiling', 'dai_floor',
       'safety_collateral_ratio', 'market_price', 'debt_balance',
       'liquidation_penalty', 'liquidation_ratio', 'daily_revenues',
       'annualized stability fee', 'annualized_revenues', 'dart',
       'total_ann_revenues', 'prev_dai_ceiling', 'status',
       'market_collateral_ratio', 'collateral_usd',
       'hypothetical_dai_ceiling'],
      dtype='object')

In [220]:
#7 day MA
eth_a_vault['market_price_7d_ma'] = eth_a_vault['market_price'].rolling(window=7).mean()
eth_a_vault['collateral_usd_7d_ma'] = eth_a_vault['collateral_usd'].rolling(window=7).mean()
eth_a_vault['debt_balance_7d_ma'] = eth_a_vault['debt_balance'].rolling(window=7).mean()
eth_a_vault['safety_collateral_ratio_7d_ma'] = eth_a_vault['safety_collateral_ratio'].rolling(window=7).mean()
eth_a_vault['market_collateral_ratio_7d_ma'] = eth_a_vault['market_collateral_ratio'].rolling(window=7).mean()
eth_a_vault['daily_revenues_7d_ma'] = eth_a_vault['daily_revenues'].rolling(window=7).mean()

# Calculate 30-day moving averages
eth_a_vault['market_price_30d_ma'] = eth_a_vault['market_price'].rolling(window=30).mean()
eth_a_vault['collateral_usd_30d_ma'] = eth_a_vault['collateral_usd'].rolling(window=30).mean()
eth_a_vault['debt_balance_30d_ma'] = eth_a_vault['debt_balance'].rolling(window=30).mean()
eth_a_vault['cumulative_collateral_30d_ma'] = eth_a_vault['cumulative_collateral'].rolling(window=30).mean()
eth_a_vault['safety_collateral_ratio_30d_ma'] = eth_a_vault['safety_collateral_ratio'].rolling(window=30).mean()
eth_a_vault['market_collateral_ratio_30d_ma'] = eth_a_vault['market_collateral_ratio'].rolling(window=30).mean()
eth_a_vault['daily_revenues_30d_ma'] = eth_a_vault['daily_revenues'].rolling(window=30).mean()

#calculate 90 day ma stability fee
eth_a_vault['annualized stability fee_90d_ma'] = eth_a_vault['annualized stability fee'].rolling(window=90).mean()

# Display the head of the DataFrame to verify the new columns
print(eth_a_vault[['market_price', 'market_price_7d_ma', 'market_price_30d_ma', 
                   'collateral_usd', 'collateral_usd_7d_ma', 'collateral_usd_30d_ma',
                   'debt_balance', 'debt_balance_7d_ma', 'debt_balance_30d_ma']].tail())

    market_price  market_price_7d_ma  market_price_30d_ma   collateral_usd  \
37      3,630.80            3,808.92             3,441.50 1,612,304,192.54   
28      3,537.75            3,736.15             3,466.79 1,569,679,733.27   
18      3,280.75            3,642.18             3,481.45 1,452,789,912.92   
16      3,484.03            3,565.54             3,498.75 1,549,887,793.01   
7       3,420.85            3,511.24             3,513.14 1,450,111,134.54   

    collateral_usd_7d_ma  collateral_usd_30d_ma   debt_balance  \
37      1,697,607,736.84       1,510,593,119.23 246,751,192.26   
28      1,661,773,903.88       1,522,773,914.13 247,201,188.46   
18      1,618,456,795.85       1,530,134,155.59 245,449,240.77   
16      1,583,861,447.21       1,538,972,026.37 245,482,433.74   
7       1,549,619,887.15       1,544,225,805.81 216,024,445.89   

    debt_balance_7d_ma  debt_balance_30d_ma  
37      247,744,408.07       226,233,711.18  
28      247,374,304.56       227,487,785.2

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  eth_a_vault['market_price_7d_ma'] = eth_a_vault['market_price'].rolling(window=7).mean()
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  eth_a_vault['collateral_usd_7d_ma'] = eth_a_vault['collateral_usd'].rolling(window=7).mean()
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  eth_a_vault['debt_balan

In [221]:
# Calculate percent change for selected columns
for column in ['debt_balance', 'cumulative_collateral', 
               'safety_price', 'safety_collateral_ratio', 
               'market_collateral_ratio','annualized stability fee','daily_revenues']:
    eth_a_vault[f'{column}_pct_change'] = eth_a_vault[column].pct_change()

# Calculate volatility (standard deviation) of the percent changes over a 7-day rolling window
for column in ['debt_balance_pct_change', 'cumulative_collateral_pct_change', 
               'safety_price_pct_change', 'safety_collateral_ratio_pct_change', 
               'market_collateral_ratio_pct_change','annualized stability fee_pct_change','daily_revenues_pct_change']:
    eth_a_vault[f'{column}_volatility_7d'] = eth_a_vault[column].rolling(window=7).std()

# Calculate lag for selected columns (1-day lag as an example)
for column in ['debt_balance', 'cumulative_collateral', 
               'safety_price', 'safety_collateral_ratio', 
               'market_collateral_ratio','annualized stability fee','daily_revenues']:
    eth_a_vault[f'{column}_lag30'] = eth_a_vault[column].shift(30)


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  eth_a_vault[f'{column}_pct_change'] = eth_a_vault[column].pct_change()
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  eth_a_vault[f'{column}_pct_change'] = eth_a_vault[column].pct_change()
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  eth_a_vault[f'{column}_pct_change'] = eth_a_vault[column].pct_c

In [222]:
eth_a_vault.columns = [f'eth_a_vault_{col}' if col != 'period' and not col.startswith('day') else col for col in eth_a_vault.columns]

In [223]:
eth_a_vault.set_index('day', inplace=True)
eth_a_vault = eth_a_vault.drop(columns=['eth_a_vault_ilk'])

In [None]:
eth_a_vault['']

In [224]:
eth_a_vault.columns

Index(['eth_a_vault_cumulative_collateral', 'eth_a_vault_safety_price',
       'eth_a_vault_usd_safety_value', 'eth_a_vault_dai_ceiling',
       'eth_a_vault_dai_floor', 'eth_a_vault_safety_collateral_ratio',
       'eth_a_vault_market_price', 'eth_a_vault_debt_balance',
       'eth_a_vault_liquidation_penalty', 'eth_a_vault_liquidation_ratio',
       'eth_a_vault_daily_revenues', 'eth_a_vault_annualized stability fee',
       'eth_a_vault_annualized_revenues', 'eth_a_vault_dart',
       'eth_a_vault_total_ann_revenues', 'eth_a_vault_prev_dai_ceiling',
       'eth_a_vault_status', 'eth_a_vault_market_collateral_ratio',
       'eth_a_vault_collateral_usd', 'eth_a_vault_hypothetical_dai_ceiling',
       'eth_a_vault_market_price_7d_ma', 'eth_a_vault_collateral_usd_7d_ma',
       'eth_a_vault_debt_balance_7d_ma',
       'eth_a_vault_safety_collateral_ratio_7d_ma',
       'eth_a_vault_market_collateral_ratio_7d_ma',
       'eth_a_vault_daily_revenues_7d_ma', 'eth_a_vault_market_price_30d_m

In [225]:
dai_maturity_df_reindexed.head()

Unnamed: 0,day,dai_maturity_outflow_1-block,dai_maturity_outflow_1-day,dai_maturity_outflow_1-month,dai_maturity_outflow_1-week,dai_maturity_outflow_1-year,dai_maturity_outflow_3-months,dai_maturity_outflow_dai_only_1-block,dai_maturity_outflow_dai_only_1-day,dai_maturity_outflow_dai_only_1-month,dai_maturity_outflow_dai_only_1-week,dai_maturity_outflow_dai_only_1-year,dai_maturity_outflow_dai_only_3-months,dai_maturity_outflow_surplus_buffer_1-block,dai_maturity_outflow_surplus_buffer_1-day,dai_maturity_outflow_surplus_buffer_1-month,dai_maturity_outflow_surplus_buffer_1-week,dai_maturity_outflow_surplus_buffer_1-year,dai_maturity_outflow_surplus_buffer_3-months
0,2019-11-13 00:00:00+00:00,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,2019-11-14 00:00:00+00:00,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,2019-11-15 00:00:00+00:00,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,2019-11-16 00:00:00+00:00,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,2019-11-17 00:00:00+00:00,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [226]:
eth_a_vault = eth_a_vault.merge(dai_maturity_df_reindexed, on=['day'], how='inner')

In [227]:
stable_coin_ratios.head()

Unnamed: 0_level_0,stablecoins_ratio,usdc_ratio
day,Unnamed: 1_level_1,Unnamed: 2_level_1
2024-03-21 00:00:00+00:00,0.11,0.11
2024-03-20 00:00:00+00:00,0.1,0.1
2024-03-19 00:00:00+00:00,0.09,0.09
2024-03-18 00:00:00+00:00,0.08,0.08
2024-03-17 00:00:00+00:00,0.13,0.13


In [228]:
#eth_a_vault = eth_a_vault.merge(stable_coin_ratios, on=['day'], how='inner')

In [229]:
eth_a_vault.head()

Unnamed: 0,day,eth_a_vault_cumulative_collateral,eth_a_vault_safety_price,eth_a_vault_usd_safety_value,eth_a_vault_dai_ceiling,eth_a_vault_dai_floor,eth_a_vault_safety_collateral_ratio,eth_a_vault_market_price,eth_a_vault_debt_balance,eth_a_vault_liquidation_penalty,...,dai_maturity_outflow_dai_only_1-month,dai_maturity_outflow_dai_only_1-week,dai_maturity_outflow_dai_only_1-year,dai_maturity_outflow_dai_only_3-months,dai_maturity_outflow_surplus_buffer_1-block,dai_maturity_outflow_surplus_buffer_1-day,dai_maturity_outflow_surplus_buffer_1-month,dai_maturity_outflow_surplus_buffer_1-week,dai_maturity_outflow_surplus_buffer_1-year,dai_maturity_outflow_surplus_buffer_3-months
0,2019-11-13 00:00:00+00:00,0.42,0.0,0.0,0.0,20.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,2019-11-15 00:00:00+00:00,0.42,119.74,50.29,0.0,20.0,0.0,119.74,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,2019-11-16 00:00:00+00:00,0.42,121.69,51.11,0.0,20.0,0.0,121.69,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,2019-11-17 00:00:00+00:00,0.43,123.31,53.02,0.0,20.0,0.0,123.31,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,2019-11-18 00:00:00+00:00,26313.39,118.39,3115153.99,50000000.0,20.0,2.0,177.58,1558617.22,0.0,...,44042.45,201006.77,1173199.67,64980.67,0.0,0.0,0.0,0.0,90.79,0.0


In [230]:
start_date = eth_a_vault['day'].min()
end_date = eth_a_vault['day'].max()
date_range = pd.date_range(start=start_date, end=end_date)

psm_full_range_df = pd.DataFrame(index=date_range)



In [231]:
psm_full_range_df.head()

2019-11-13 00:00:00+00:00
2019-11-14 00:00:00+00:00
2019-11-15 00:00:00+00:00
2019-11-16 00:00:00+00:00
2019-11-17 00:00:00+00:00


In [232]:


psm_columns = ['psm_change', 'psm_change_excl_rwa', 'psm_change_excl_rwa_30d_avg', 'psm_change_excl_rwa_7d_avg', 'psm_fees', 'psm_inflow', 'psm_inflow_exl_rwa', 'psm_lifetime_fees', 'psm_lifetime_turnover', 'psm_outflow', 'psm_balance', 'psm_turnover']
for column in psm_columns:
    psm_full_range_df[column] = psm_stats_csv[column]

# Fill missing values with zeros
psm_full_range_df.fillna(0, inplace=True)

In [233]:
eth_a_vault.describe()

Unnamed: 0,eth_a_vault_cumulative_collateral,eth_a_vault_safety_price,eth_a_vault_usd_safety_value,eth_a_vault_dai_ceiling,eth_a_vault_dai_floor,eth_a_vault_safety_collateral_ratio,eth_a_vault_market_price,eth_a_vault_debt_balance,eth_a_vault_liquidation_penalty,eth_a_vault_liquidation_ratio,...,dai_maturity_outflow_dai_only_1-month,dai_maturity_outflow_dai_only_1-week,dai_maturity_outflow_dai_only_1-year,dai_maturity_outflow_dai_only_3-months,dai_maturity_outflow_surplus_buffer_1-block,dai_maturity_outflow_surplus_buffer_1-day,dai_maturity_outflow_surplus_buffer_1-month,dai_maturity_outflow_surplus_buffer_1-week,dai_maturity_outflow_surplus_buffer_1-year,dai_maturity_outflow_surplus_buffer_3-months
count,1563.0,1563.0,1563.0,1563.0,1563.0,1563.0,1563.0,1563.0,1563.0,1563.0,...,1563.0,1563.0,1563.0,1563.0,1563.0,1563.0,1563.0,1563.0,1563.0,1563.0
mean,1459396.78,1179.55,1675246787.68,854770473.22,7344.44,2.66,1723.91,693783096.07,0.07,1.47,...,121829547.87,534720838.07,2643717876.3,179748513.26,0.0,0.0,0.0,0.0,43165723.38,0.0
std,867409.61,784.84,1761240268.84,836274295.12,5721.43,0.59,1141.45,789067601.73,0.16,0.08,...,86900656.56,372490872.45,1759401866.73,128214083.44,0.0,0.0,0.0,0.0,30383977.1,0.0
min,0.42,0.0,0.0,0.0,20.0,0.0,0.0,0.0,-1.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-5674219.65,0.0
25%,517894.21,396.45,522790141.46,350558569.73,500.0,2.23,594.67,179036518.3,0.0,1.45,...,42015130.91,170882210.19,570286315.21,61989537.4,0.0,0.0,0.0,0.0,4001819.24,0.0
50%,1734175.26,1171.1,673540872.03,421276896.85,7500.0,2.56,1707.98,216249923.32,0.13,1.45,...,118349364.98,539666807.68,3212265572.5,174613817.18,0.0,0.0,0.0,0.0,53218504.18,0.0
75%,2075622.02,1622.45,3122870291.31,1496694216.25,15000.0,3.03,2370.73,1340283261.88,0.13,1.5,...,168159893.31,742722843.39,3705032007.03,248104760.62,0.0,0.0,0.0,0.0,69846462.07,0.0
max,2966468.34,3298.34,7428423394.6,3395556201.44,15000.0,5.27,4782.6,3077919054.99,0.13,1.5,...,314069539.85,1342922289.47,5952992110.3,463381288.3,0.0,0.0,0.0,0.0,83550327.25,0.0


In [362]:
eth_a_vault['status']

KeyError: 'status'

In [235]:
psm_full_range_df = psm_full_range_df.rename_axis('day')

In [236]:
psm_full_range_df.duplicated().sum()

424

In [237]:
eth_a_vault= eth_a_vault.merge(psm_full_range_df, on=['day'], how='inner')

In [238]:
eth_a_vault[['day','psm_change']].tail()

Unnamed: 0,day,psm_change
1558,2024-03-16 00:00:00+00:00,6819615.5
1559,2024-03-17 00:00:00+00:00,64591276.21
1560,2024-03-18 00:00:00+00:00,-232459681.52
1561,2024-03-19 00:00:00+00:00,4430852.16
1562,2024-03-20 00:00:00+00:00,47004882.05


In [239]:
where_is_dai_csv_table_full = pd.DataFrame(index=date_range)


where_is_dai_csv_table_columns = where_is_dai_csv_table.columns
for column in where_is_dai_csv_table_columns:
    where_is_dai_csv_table_full[column] = where_is_dai_csv_table[column]

# Fill missing values with zeros
where_is_dai_csv_table_full.fillna(0, inplace=True)

In [240]:
where_is_dai_csv_table_full = where_is_dai_csv_table_full.rename_axis('day')

In [241]:
where_is_dai_csv_table_full.head()

Unnamed: 0_level_0,where_is_dai_Bridge,where_is_dai_CeFi,where_is_dai_Dai Savings,where_is_dai_Dex,where_is_dai_EOA,where_is_dai_Lending,where_is_dai_Other,where_is_dai_Treasury
day,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2019-11-13 00:00:00+00:00,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2019-11-14 00:00:00+00:00,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2019-11-15 00:00:00+00:00,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2019-11-16 00:00:00+00:00,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2019-11-17 00:00:00+00:00,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [242]:
eth_a_vault= eth_a_vault.merge(where_is_dai_csv_table_full, on=['day'], how='inner')

In [243]:
eth_a_vault.head()

Unnamed: 0,day,eth_a_vault_cumulative_collateral,eth_a_vault_safety_price,eth_a_vault_usd_safety_value,eth_a_vault_dai_ceiling,eth_a_vault_dai_floor,eth_a_vault_safety_collateral_ratio,eth_a_vault_market_price,eth_a_vault_debt_balance,eth_a_vault_liquidation_penalty,...,psm_balance,psm_turnover,where_is_dai_Bridge,where_is_dai_CeFi,where_is_dai_Dai Savings,where_is_dai_Dex,where_is_dai_EOA,where_is_dai_Lending,where_is_dai_Other,where_is_dai_Treasury
0,2019-11-13 00:00:00+00:00,0.42,0.0,0.0,0.0,20.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,2019-11-15 00:00:00+00:00,0.42,119.74,50.29,0.0,20.0,0.0,119.74,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,2019-11-16 00:00:00+00:00,0.42,121.69,51.11,0.0,20.0,0.0,121.69,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,2019-11-17 00:00:00+00:00,0.43,123.31,53.02,0.0,20.0,0.0,123.31,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,2019-11-18 00:00:00+00:00,26313.39,118.39,3115153.99,50000000.0,20.0,2.0,177.58,1558617.22,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [244]:
daily_surplus_buffer_csv = daily_surplus_buffer_csv.fillna(0)

In [245]:
eth_a_vault= eth_a_vault.merge(daily_surplus_buffer_csv, on=['day'], how='inner')

In [363]:
eth_a_vault['eth_a_vault_status']

0       Closed
1       Closed
2       Closed
3       Closed
4         Open
         ...  
1558      Open
1559      Open
1560      Open
1561      Open
1562      Open
Name: eth_a_vault_status, Length: 1563, dtype: object

In [247]:
dsr_df.fillna(0, inplace=True)

In [248]:
dsr_df.head()

Unnamed: 0_level_0,dsr_inflow,dsr_interest,dsr_outflow,dsr_balance,dsr_rate,dai_circulating,dai_total_balance,dai_percent_in_dsr,dai_percent_out_dsr
day,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
2024-03-27,5288164.43,146319.97,0.0,1545849069.62,0.15,3198754360.76,4744603430.38,0.33,0.67
2024-03-26,24182507.4,570070.21,0.0,1510137336.77,0.15,3243870446.94,4754007783.71,0.32,0.68
2024-03-25,0.0,591988.67,79508005.52,1485384759.15,0.15,3238774686.95,4724159446.11,0.31,0.69
2024-03-24,32601790.65,589190.34,0.0,1564300776.0,0.15,3258196717.18,4822497493.18,0.32,0.68
2024-03-23,42441802.19,577016.24,0.0,1531109795.0,0.15,3245174169.16,4776283964.16,0.32,0.68


In [249]:
dsr_df.reset_index(inplace=True)
eth_a_vault.reset_index(inplace=True)
# Remove timezone information from both 'day' columns
eth_a_vault['day'] = eth_a_vault['day'].dt.tz_localize(None)
dsr_df['day'] = dsr_df['day'].dt.tz_localize(None)

In [250]:
dsr_df_full = pd.DataFrame(index=date_range)
dsr_df_full = dsr_df_full.rename_axis('day')

dsr_df_full.reset_index(inplace=True)
dsr_df_full['day'] = dsr_df_full['day'].dt.tz_localize(None)


In [251]:
dsr_df_full = dsr_df_full.merge(dsr_df, on=['day'], how='outer')

In [252]:
dsr_df_full.fillna(0, inplace=True)

In [253]:
dsr_df_full

Unnamed: 0,day,dsr_inflow,dsr_interest,dsr_outflow,dsr_balance,dsr_rate,dai_circulating,dai_total_balance,dai_percent_in_dsr,dai_percent_out_dsr
0,2019-11-13,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00
1,2019-11-14,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00
2,2019-11-15,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00
3,2019-11-16,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00
4,2019-11-17,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00
...,...,...,...,...,...,...,...,...,...,...
1592,2024-03-25,0.00,591988.67,79508005.52,1485384759.15,0.15,3238774686.95,4724159446.11,0.31,0.69
1593,2024-03-24,32601790.65,589190.34,0.00,1564300776.00,0.15,3258196717.18,4822497493.18,0.32,0.68
1594,2024-03-23,42441802.19,577016.24,0.00,1531109795.00,0.15,3245174169.16,4776283964.16,0.32,0.68
1595,2024-03-22,80719471.60,542875.90,0.00,1488090976.57,0.15,3217359661.11,4705450637.68,0.32,0.68


In [254]:
dsr_df_full = dsr_df_full.set_index('day')

In [255]:
#dsr_df_full = dsr_df_full.drop(columns=['level_0','index'])

In [256]:
dsr_df_full.head()

Unnamed: 0_level_0,dsr_inflow,dsr_interest,dsr_outflow,dsr_balance,dsr_rate,dai_circulating,dai_total_balance,dai_percent_in_dsr,dai_percent_out_dsr
day,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
2019-11-13,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2019-11-14,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2019-11-15,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2019-11-16,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2019-11-17,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [257]:
eth_a_vault= eth_a_vault.merge(dsr_df_full, on=['day'], how='inner')

In [258]:
eth_a_vault = eth_a_vault.drop(columns=['index'])

In [259]:
eth_a_vault.head()

Unnamed: 0,day,eth_a_vault_cumulative_collateral,eth_a_vault_safety_price,eth_a_vault_usd_safety_value,eth_a_vault_dai_ceiling,eth_a_vault_dai_floor,eth_a_vault_safety_collateral_ratio,eth_a_vault_market_price,eth_a_vault_debt_balance,eth_a_vault_liquidation_penalty,...,daily_surplus_buffer,dsr_inflow,dsr_interest,dsr_outflow,dsr_balance,dsr_rate,dai_circulating,dai_total_balance,dai_percent_in_dsr,dai_percent_out_dsr
0,2019-11-13,0.42,0.0,0.0,0.0,20.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,2019-11-15,0.42,119.74,50.29,0.0,20.0,0.0,119.74,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,2019-11-16,0.42,121.69,51.11,0.0,20.0,0.0,121.69,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,2019-11-17,0.43,123.31,53.02,0.0,20.0,0.0,123.31,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,2019-11-18,26313.39,118.39,3115153.99,50000000.0,20.0,2.0,177.58,1558617.22,0.0,...,90.78,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [260]:
pivoted_balance_sheet = pivoted_balance_sheet.rename_axis('day')

In [261]:
pivoted_income_statement.rename(columns={'period':'day'}, inplace=True)

In [262]:
pivoted_balance_sheet.reset_index(inplace=True)
pivoted_balance_sheet['day'] = pivoted_balance_sheet['day'].dt.tz_localize(None)

In [263]:
pivoted_balance_sheet = pivoted_balance_sheet.set_index('day')

In [264]:
pivoted_balance_sheet = pivoted_balance_sheet.fillna(0)

In [382]:
print(pivoted_balance_sheet.describe())

       b_s_Crypto-Loans           b_s_DAI           b_s_DSR     b_s_Equity  \
count          1,360.00          1,360.00          1,360.00       1,360.00   
mean   2,290,165,150.68 -4,741,579,818.97   -317,918,673.72 -50,907,047.75   
std    1,259,469,727.42  2,522,050,832.59    522,223,094.31  26,391,917.31   
min      144,805,383.35 -9,951,190,219.65 -1,750,756,886.52 -83,598,090.64   
25%    1,334,072,649.50 -6,326,898,796.77   -258,531,477.03 -70,867,009.35   
50%    2,176,102,429.51 -4,846,370,252.24    -92,004,368.45 -58,130,985.74   
75%    2,843,872,505.76 -3,656,086,113.00    -12,338,379.66 -40,493,992.85   
max    6,135,551,191.72   -108,067,101.21     -1,167,096.23    -200,158.70   

       b_s_Others assets  b_s_Real-World Assets  b_s_Stablecoins  \
count           1,360.00               1,360.00         1,360.00   
mean           30,392.83         580,607,673.57 2,239,602,323.36   
std            23,045.94         880,997,103.86 1,686,843,867.50   
min                 0.00 

In [266]:
eth_a_vault= eth_a_vault.merge(pivoted_balance_sheet, on=['day'], how='left') 

In [267]:
eth_a_vault = eth_a_vault.fillna(0)

In [268]:
eth_a_vault.shape[0]

1563

In [269]:
print(list(eth_a_vault.columns))

['day', 'eth_a_vault_cumulative_collateral', 'eth_a_vault_safety_price', 'eth_a_vault_usd_safety_value', 'eth_a_vault_dai_ceiling', 'eth_a_vault_dai_floor', 'eth_a_vault_safety_collateral_ratio', 'eth_a_vault_market_price', 'eth_a_vault_debt_balance', 'eth_a_vault_liquidation_penalty', 'eth_a_vault_liquidation_ratio', 'eth_a_vault_daily_revenues', 'eth_a_vault_annualized stability fee', 'eth_a_vault_annualized_revenues', 'eth_a_vault_dart', 'eth_a_vault_total_ann_revenues', 'eth_a_vault_prev_dai_ceiling', 'eth_a_vault_status', 'eth_a_vault_market_collateral_ratio', 'eth_a_vault_collateral_usd', 'eth_a_vault_hypothetical_dai_ceiling', 'eth_a_vault_market_price_7d_ma', 'eth_a_vault_collateral_usd_7d_ma', 'eth_a_vault_debt_balance_7d_ma', 'eth_a_vault_safety_collateral_ratio_7d_ma', 'eth_a_vault_market_collateral_ratio_7d_ma', 'eth_a_vault_daily_revenues_7d_ma', 'eth_a_vault_market_price_30d_ma', 'eth_a_vault_collateral_usd_30d_ma', 'eth_a_vault_debt_balance_30d_ma', 'eth_a_vault_cumulati

In [270]:
eth_a_vault = eth_a_vault.sort_values(by='day')
pivoted_income_statement = pivoted_income_statement.sort_values(by='day')

In [271]:
pivoted_income_statement.tail()

item,day,1 - PnL,1.1 - Lending Revenues,1.2 - Liquidations Revenues,1.3 - Trading Revenues,1.4 - Lending Expenses,1.5 - Liquidations Expenses,1.6 - Workforce Expenses,1.9 - Net Income,2 - Assets,...,debt_to_equity_Lag_11,debt_ratio_Lag_11,Total_Revenues_Lag_12,Total_Expenses_Lag_12,Net_Income_Lag_12,profit_margin_Lag_12,ROA_Lag_12,ROE_Lag_12,debt_to_equity_Lag_12,debt_ratio_Lag_12
48,2023-11-01,0.0,29666040.79,455.03,0.0,-6376789.6,0.0,-2974302.25,20315403.96,0.0,...,67.65,0.99,1169462.77,-1941130.27,-771667.5,-0.0,-0.0,-0.01,68.99,0.99
49,2023-12-01,0.0,14270261.48,0.0,0.0,-6484200.84,0.0,-2658930.56,5127130.08,0.0,...,69.23,0.99,1364508.64,-1849641.34,-485132.7,-0.0,-0.0,-0.01,67.65,0.99
50,2024-01-01,0.0,20751255.62,27376.3,0.0,-5943924.23,0.0,-5669555.33,9165152.35,0.0,...,69.7,0.99,1540089.06,-2003227.02,-463137.96,-0.0,-0.0,-0.01,69.23,0.99
51,2024-02-01,0.0,29691662.12,0.0,0.0,-4567709.04,0.0,-1581115.17,23542837.91,0.0,...,72.85,0.99,1750712.42,-2422673.84,-671961.42,-0.0,-0.0,-0.01,69.7,0.99
52,2024-03-01,0.0,18130771.84,4623.88,0.0,-7076544.94,0.0,-3857879.46,7200971.32,0.0,...,68.99,0.99,2286044.04,-4456169.85,-2170125.82,-0.0,-0.0,-0.03,72.85,0.99


In [272]:
total_vault_data = pd.merge_asof(eth_a_vault, pivoted_income_statement, on='day')

In [273]:
print(list(total_vault_data.columns))

['day', 'eth_a_vault_cumulative_collateral', 'eth_a_vault_safety_price', 'eth_a_vault_usd_safety_value', 'eth_a_vault_dai_ceiling', 'eth_a_vault_dai_floor', 'eth_a_vault_safety_collateral_ratio', 'eth_a_vault_market_price', 'eth_a_vault_debt_balance', 'eth_a_vault_liquidation_penalty', 'eth_a_vault_liquidation_ratio', 'eth_a_vault_daily_revenues', 'eth_a_vault_annualized stability fee', 'eth_a_vault_annualized_revenues', 'eth_a_vault_dart', 'eth_a_vault_total_ann_revenues', 'eth_a_vault_prev_dai_ceiling', 'eth_a_vault_status', 'eth_a_vault_market_collateral_ratio', 'eth_a_vault_collateral_usd', 'eth_a_vault_hypothetical_dai_ceiling', 'eth_a_vault_market_price_7d_ma', 'eth_a_vault_collateral_usd_7d_ma', 'eth_a_vault_debt_balance_7d_ma', 'eth_a_vault_safety_collateral_ratio_7d_ma', 'eth_a_vault_market_collateral_ratio_7d_ma', 'eth_a_vault_daily_revenues_7d_ma', 'eth_a_vault_market_price_30d_ma', 'eth_a_vault_collateral_usd_30d_ma', 'eth_a_vault_debt_balance_30d_ma', 'eth_a_vault_cumulati

### Now for CoinGecko Crypto Market Data

In [274]:
#lets get price feeds for accepted collateral types

ir_csv['ilk'].unique()

array(['ETH-A', 'ETH-B', 'ETH-C', 'WBTC-A', 'WBTC-B', 'WBTC-C',
       'WSTETH-A', 'WSTETH-B', 'RWA002-A', 'RWA013-A', 'DIRECT-SPARK-DAI',
       'RWA014-A', 'RWA005-A', 'RWA012-A', 'RWA015-A', 'RWA007-A',
       'RETH-A', 'RWA003-A', 'GUNIV3DAIUSDC2-A', 'CRVV1ETHSTETH-A',
       'USDC-B', 'LINK-A', 'MATIC-A', 'UNIV2USDCETH-A', 'GNO-A',
       'UNIV2DAIUSDC-A', 'YFI-A', 'RWA004-A', 'GUNIV3DAIUSDC1-A',
       'GUSD-A', 'PAXUSD-A', 'USDC-A', 'DIRECT-AAVEV2-DAI',
       'DIRECT-COMPV2-DAI', 'RWA008-A', 'RENBTC-A', 'MANA-A', 'RWA009-A',
       'RWA001-A', 'UNI-A', 'UNIV2DAIETH-A', 'UNIV2WBTCETH-A',
       'UNIV2WBTCDAI-A', 'RWA-001', 'UNIV2UNIETH-A', 'TUSD-A', 'USDP-A',
       'BAT-A', 'BAL-A', 'ZRX-A', 'COMP-A', 'PSM-GUSD-A', 'AAVE-A',
       'UNIV2LINKETH-A', 'KNC-A', 'LRC-A', 'PSM-USDC-A', 'UNIV2AAVEETH-A',
       'UNIV2DAIUSDT-A', 'UNIV2ETHUSDT-A', 'USDT-A', 'PSM-PAX-A',
       'RWA006-A', 'PAX-A', nan, 'USDC', 'SAI'], dtype=object)

In [275]:
#need to use yfinance instead, coingecko clocked to 1 year historical




In [276]:
sp = yf.Ticker("^GSPC")

In [277]:
sp_from_nov = sp.history(period="41mo")

In [278]:
sp_from_nov = sp_from_nov.drop(columns=['Dividends','Stock Splits','Open','Low','High'])

In [279]:
sp_from_nov.columns = [f's&p_500_market_{col}' if col != 'Date' else col for col in sp_from_nov.columns]

In [280]:
sp_from_nov

Unnamed: 0_level_0,s&p_500_market_Close,s&p_500_market_Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1
2020-11-04 00:00:00-05:00,3443.44,4790400000
2020-11-05 00:00:00-05:00,3510.45,4858150000
2020-11-06 00:00:00-05:00,3509.44,4842460000
2020-11-09 00:00:00-05:00,3550.50,8570510000
2020-11-10 00:00:00-05:00,3545.53,6037470000
...,...,...
2024-03-27 00:00:00-04:00,5248.49,3850500000
2024-03-28 00:00:00-04:00,5254.35,3998270000
2024-04-01 00:00:00-04:00,5243.77,3325930000
2024-04-02 00:00:00-04:00,5205.81,3886590000


In [281]:
btc = yf.Ticker('BTC-USD')

In [282]:
btc_from_nov = btc.history(period='41mo')
btc_from_nov = btc_from_nov.drop(columns=['Dividends','Stock Splits','Open','Low','High'])

In [283]:
btc_from_nov.columns = [f'btc_market_{col}' if col != 'Date' else col for col in btc_from_nov.columns]

In [448]:
btc_from_nov.describe()

Unnamed: 0,btc_market_Close,btc_market_Volume
count,1248.0,1248.0
mean,35553.3,32292779698.55
std,13662.83,20048116628.58
min,13950.3,5331172801.0
25%,23947.6,19294405680.75
50%,33548.94,28575169162.5
75%,44350.21,39321396767.0
max,73083.5,350967941479.0


In [284]:
eth = yf.Ticker('ETH-USD')

In [285]:
eth_from_nov = eth.history(period='41mo')
eth_from_nov = eth_from_nov.drop(columns=['Dividends','Stock Splits','Open','Low','High'])

In [286]:
eth_from_nov.columns = [f'eth_market_{col}' if col != 'Date' else col for col in eth_from_nov.columns]

In [447]:
eth_from_nov.describe()

Unnamed: 0,eth_market_Close,eth_market_Volume
count,1248.0,1248.0
mean,2166.39,16651333920.49
std,920.07,10978619745.88
min,387.6,2081625742.0
25%,1583.44,8624262973.75
50%,1880.81,14662045135.0
75%,2763.88,21285832540.25
max,4812.09,84482912776.0


In [288]:
mkr = yf.Ticker('MKR-USD')

In [289]:
mkr_from_nov = mkr.history(period='41mo')
mkr_from_nov = mkr_from_nov.drop(columns=['Dividends','Stock Splits','Open','Low','High'])

In [290]:
mkr_from_nov.columns = [f'mkr_market_{col}' if col != 'Date' else col for col in mkr_from_nov.columns]

In [446]:
mkr_from_nov.describe()

Unnamed: 0,mkr_market_Close,mkr_market_Volume
count,1248.0,1248.0
mean,1663.97,142485960.73
std,993.41,535871504.52
min,502.06,10290023.0
25%,775.08,46659678.75
50%,1407.76,80113085.0
75%,2257.46,136346638.5
max,6012.46,13754958457.0


In [292]:
dai = yf.Ticker('DAI-USD')

In [293]:
dai_from_nov = dai.history(period='41mo')
dai_from_nov = dai_from_nov.drop(columns=['Dividends','Stock Splits','Open','Low','High'])

In [294]:
dai_from_nov.columns = [f'dai_market_{col}' if col != 'Date' else col for col in dai_from_nov.columns]

In [445]:
dai_from_nov.describe()

Unnamed: 0,dai_market_Close,dai_market_Volume,deviation,abs_deviation
count,1248.0,1248.0,1248.0,1248.0
mean,1.0,339442488.21,0.0,0.0
std,0.0,287632250.25,0.0,0.0
min,0.97,45847952.0,-0.03,0.0
25%,1.0,160987684.5,-0.0,0.0
50%,1.0,285155008.0,0.0,0.0
75%,1.0,420790948.0,0.0,0.0
max,1.01,4642451631.0,0.01,0.03


In [440]:
dai_from_nov['deviation'] = dai_from_nov['dai_market_Close'] - 1
dai_from_nov['abs_deviation'] = dai_from_nov['deviation'].abs()

average_deviation = dai_from_nov['abs_deviation'].mean()
standard_deviation = dai_from_nov['dai_market_Close'].std()

print(f"Average Deviation from $1: {average_deviation}")
print(f"Standard Deviation of DAI Price: {standard_deviation}")

Average Deviation from $1: 0.0008210278569887846
Standard Deviation of DAI Price: 0.001636980960230382


In [296]:
crypto_market_data = pd.merge(dai_from_nov, eth_from_nov, on=['Date'], how='inner')

In [297]:
crypto_market_data = crypto_market_data.merge(btc_from_nov, on=['Date'], how='inner')

In [298]:
crypto_market_data = crypto_market_data.merge(mkr_from_nov, on=['Date'], how='inner')

In [299]:
sp_from_nov.index = sp_from_nov.index.tz_convert ('UTC')

In [424]:
sp_from_nov.describe()

Unnamed: 0,s&p_500_market_Close,s&p_500_market_Volume
count,857.0,857.0
mean,4250.12,4345673150.53
std,365.86,923082409.97
min,3443.44,1639500000.0
25%,3962.71,3812470000.0
50%,4224.79,4151200000.0
75%,4487.46,4687320000.0
max,5254.35,9976520000.0


In [420]:
print(sp_from_nov.describe())

       s&p_500_market_Close  s&p_500_market_Volume
count                857.00                 857.00
mean               4,250.12       4,345,673,150.53
std                  365.86         923,082,409.97
min                3,443.44       1,639,500,000.00
25%                3,962.71       3,812,470,000.00
50%                4,224.79       4,151,200,000.00
75%                4,487.46       4,687,320,000.00
max                5,254.35       9,976,520,000.00


In [301]:
crypto_market_data.index

DatetimeIndex(['2020-11-03 00:00:00+00:00', '2020-11-04 00:00:00+00:00',
               '2020-11-05 00:00:00+00:00', '2020-11-06 00:00:00+00:00',
               '2020-11-07 00:00:00+00:00', '2020-11-08 00:00:00+00:00',
               '2020-11-09 00:00:00+00:00', '2020-11-10 00:00:00+00:00',
               '2020-11-11 00:00:00+00:00', '2020-11-12 00:00:00+00:00',
               ...
               '2024-03-25 00:00:00+00:00', '2024-03-26 00:00:00+00:00',
               '2024-03-27 00:00:00+00:00', '2024-03-28 00:00:00+00:00',
               '2024-03-29 00:00:00+00:00', '2024-03-30 00:00:00+00:00',
               '2024-03-31 00:00:00+00:00', '2024-04-01 00:00:00+00:00',
               '2024-04-02 00:00:00+00:00', '2024-04-03 00:00:00+00:00'],
              dtype='datetime64[ns, UTC]', name='Date', length=1248, freq=None)

In [303]:
sp_from_nov_normalized = sp_from_nov.index.normalize()
crypto_market_data_normalized = crypto_market_data.index.normalize()

In [304]:
sp_from_nov.index = sp_from_nov_normalized
crypto_market_data.index = crypto_market_data_normalized

In [425]:
sp_from_nov.describe()

Unnamed: 0,s&p_500_market_Close,s&p_500_market_Volume
count,857.0,857.0
mean,4250.12,4345673150.53
std,365.86,923082409.97
min,3443.44,1639500000.0
25%,3962.71,3812470000.0
50%,4224.79,4151200000.0
75%,4487.46,4687320000.0
max,5254.35,9976520000.0


In [305]:
crypto_market_data = crypto_market_data.merge(sp_from_nov, on=['Date'], how='inner')

In [307]:
for col in crypto_market_data:
    crypto_market_data[f'{col}_daily_returns'] = crypto_market_data[col].pct_change()
    crypto_market_data[f'{col}_7d_ma']= crypto_market_data[col].rolling(window=7).mean()
    crypto_market_data[f'{col}_30d_ma'] = crypto_market_data[col].rolling(window=30).mean()



In [308]:
crypto_market_data.rename_axis('day',inplace=True)

In [309]:
crypto_market_data = crypto_market_data.fillna(0)

In [310]:
nan_rows = crypto_market_data[crypto_market_data.isna().any(axis=1)]
print(nan_rows)


Empty DataFrame
Columns: [dai_market_Close, dai_market_Volume, eth_market_Close, eth_market_Volume, btc_market_Close, btc_market_Volume, mkr_market_Close, mkr_market_Volume, s&p_500_market_Close, s&p_500_market_Volume, dai_market_Close_daily_returns, dai_market_Close_7d_ma, dai_market_Close_30d_ma, dai_market_Volume_daily_returns, dai_market_Volume_7d_ma, dai_market_Volume_30d_ma, eth_market_Close_daily_returns, eth_market_Close_7d_ma, eth_market_Close_30d_ma, eth_market_Volume_daily_returns, eth_market_Volume_7d_ma, eth_market_Volume_30d_ma, btc_market_Close_daily_returns, btc_market_Close_7d_ma, btc_market_Close_30d_ma, btc_market_Volume_daily_returns, btc_market_Volume_7d_ma, btc_market_Volume_30d_ma, mkr_market_Close_daily_returns, mkr_market_Close_7d_ma, mkr_market_Close_30d_ma, mkr_market_Volume_daily_returns, mkr_market_Volume_7d_ma, mkr_market_Volume_30d_ma, s&p_500_market_Close_daily_returns, s&p_500_market_Close_7d_ma, s&p_500_market_Close_30d_ma, s&p_500_market_Volume_daily_

In [311]:
nan_rows.columns

Index(['dai_market_Close', 'dai_market_Volume', 'eth_market_Close',
       'eth_market_Volume', 'btc_market_Close', 'btc_market_Volume',
       'mkr_market_Close', 'mkr_market_Volume', 's&p_500_market_Close',
       's&p_500_market_Volume', 'dai_market_Close_daily_returns',
       'dai_market_Close_7d_ma', 'dai_market_Close_30d_ma',
       'dai_market_Volume_daily_returns', 'dai_market_Volume_7d_ma',
       'dai_market_Volume_30d_ma', 'eth_market_Close_daily_returns',
       'eth_market_Close_7d_ma', 'eth_market_Close_30d_ma',
       'eth_market_Volume_daily_returns', 'eth_market_Volume_7d_ma',
       'eth_market_Volume_30d_ma', 'btc_market_Close_daily_returns',
       'btc_market_Close_7d_ma', 'btc_market_Close_30d_ma',
       'btc_market_Volume_daily_returns', 'btc_market_Volume_7d_ma',
       'btc_market_Volume_30d_ma', 'mkr_market_Close_daily_returns',
       'mkr_market_Close_7d_ma', 'mkr_market_Close_30d_ma',
       'mkr_market_Volume_daily_returns', 'mkr_market_Volume_7d_ma',
  

for column in crypto_market_data[f'{col}_daily_returns']:
    crypto_market_data[f'{column}_volatility_7d'] = eth_a_vault[column].rolling(window=7).std()



# Calculate lag for selected columns (1-day lag as an example)
for column in ['debt_balance', 'cumulative_collateral', 
               'safety_price', 'safety_collateral_ratio', 
               'market_collateral_ratio','annualized stability fee','daily_revenues']:
    eth_a_vault[f'{column}_lag30'] = eth_a_vault[column].shift(30)

## Now for Macro Economic Data from FRED Api

In [312]:
def fetch_and_process_tbill_data(api_url, data_key, date_column, value_column, date_format='datetime'):
    # Retrieve the API key from Streamlit secrets
    api_key = st.secrets["FRED_API_KEY"]

    # Append the API key to the URL
    api_url_with_key = f"{api_url}&api_key={api_key}"

    response = requests.get(api_url_with_key)
    if response.status_code == 200:
        data = response.json()
        df = pd.DataFrame(data[data_key])
        
        if date_format == 'datetime':
            df[date_column] = pd.to_datetime(df[date_column])
        
        df.set_index(date_column, inplace=True)
        df[value_column] = df[value_column].astype(float)
        return df
    else:
        print(f"Failed to retrieve data: {response.status_code}")
        return pd.DataFrame()  # Return an empty DataFrame in case of failure

In [313]:
three_month_tbill_historical_api = "https://api.stlouisfed.org/fred/series/observations?series_id=TB3MS&file_type=json"
three_month_tbill = fetch_and_process_tbill_data(three_month_tbill_historical_api, "observations", "date", "value")


In [314]:
three_month_tbill = three_month_tbill[three_month_tbill.index >= '2019-11-01']

In [315]:
three_month_tbill['3m_tbill'] = three_month_tbill['value'] / 100

In [316]:
three_month_tbill

Unnamed: 0_level_0,realtime_start,realtime_end,value,3m_tbill
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2019-11-01,2024-04-03,2024-04-03,1.54,0.02
2019-12-01,2024-04-03,2024-04-03,1.54,0.02
2020-01-01,2024-04-03,2024-04-03,1.52,0.02
2020-02-01,2024-04-03,2024-04-03,1.52,0.02
2020-03-01,2024-04-03,2024-04-03,0.29,0.0
2020-04-01,2024-04-03,2024-04-03,0.14,0.0
2020-05-01,2024-04-03,2024-04-03,0.13,0.0
2020-06-01,2024-04-03,2024-04-03,0.16,0.0
2020-07-01,2024-04-03,2024-04-03,0.13,0.0
2020-08-01,2024-04-03,2024-04-03,0.1,0.0


In [317]:
tmo_path = '../data/csv/3mo_tbill.csv'
three_month_tbill.to_csv(tmo_path)
three_month_tbill_csv = pd.read_csv(tmo_path, index_col='date', parse_dates=True)

In [318]:
three_month_tbill_csv.describe()

Unnamed: 0_level_0,realtime_start,realtime_end,value,3m_tbill
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2023-11-01,2024-04-03,2024-04-03,5.27,0.05
2023-12-01,2024-04-03,2024-04-03,5.24,0.05
2024-01-01,2024-04-03,2024-04-03,5.22,0.05
2024-02-01,2024-04-03,2024-04-03,5.24,0.05
2024-03-01,2024-04-03,2024-04-03,5.24,0.05


In [320]:
#sticky_price_consumer_index

spi_path = "../data/csv/sticky_price_consumer_price_index.csv"
sticky_index = pd.read_csv(spi_path)

In [380]:
print(sticky_index.describe())

       sticky_cpi
count       52.00
mean         3.86
std          1.72
min          1.40
25%          2.31
50%          3.71
75%          5.40
max          6.54


In [321]:
eff_rate_path =  "../data/csv/effective_federal_funds_rate.csv"
fed_funds_rate = pd.read_csv(eff_rate_path)

In [378]:
fed_funds_rate.describe()

Unnamed: 0,date,effective_funds_rate
count,1152,1152.0
unique,,34.0
top,,5.33
freq,,171.0
mean,2022-01-15 12:00:00+00:00,
min,2019-11-01 00:00:00+00:00,
25%,2020-12-08 18:00:00+00:00,
50%,2022-01-15 12:00:00+00:00,
75%,2023-02-22 06:00:00+00:00,
max,2024-04-01 00:00:00+00:00,


In [322]:
fed_funds_rate=fed_funds_rate.rename(columns={'DATE':'date'})
fed_funds_rate['date'] = pd.to_datetime(fed_funds_rate['date'])

In [323]:
fed_funds_rate['date'] = fed_funds_rate['date'].dt.tz_localize('UTC')

In [377]:
fed_funds_rate.describe()

Unnamed: 0,date,effective_funds_rate
count,1152,1152.0
unique,,34.0
top,,5.33
freq,,171.0
mean,2022-01-15 12:00:00+00:00,
min,2019-11-01 00:00:00+00:00,
25%,2020-12-08 18:00:00+00:00,
50%,2022-01-15 12:00:00+00:00,
75%,2023-02-22 06:00:00+00:00,
max,2024-04-01 00:00:00+00:00,


In [327]:
three_month_tbill_csv.index = three_month_tbill_csv.index.tz_localize('UTC')

In [329]:
three_month_tbill_csv.tail()

Unnamed: 0_level_0,3_m_tbill_yield,3m_tbill
date,Unnamed: 1_level_1,Unnamed: 2_level_1
2023-11-01 00:00:00+00:00,5.27,0.05
2023-12-01 00:00:00+00:00,5.24,0.05
2024-01-01 00:00:00+00:00,5.22,0.05
2024-02-01 00:00:00+00:00,5.24,0.05
2024-03-01 00:00:00+00:00,5.24,0.05


In [330]:
three_month_tbill_csv = three_month_tbill_csv.rename(columns={'value':'3_m_tbill_yield'})

In [381]:
print(three_month_tbill_csv.describe())

       3_m_tbill_yield  3m_tbill
count            53.00     53.00
mean              2.05      0.02
std               2.20      0.02
min               0.02      0.00
25%               0.09      0.00
50%               0.98      0.01
75%               4.65      0.05
max               5.34      0.05


In [332]:
fed_funds_rate = fed_funds_rate.rename(columns={'EFFR':'effective_funds_rate'})

In [333]:
macro_data = pd.merge(fed_funds_rate,three_month_tbill_csv, on=['date'],how='inner' )

In [334]:
macro_data['effective_funds_rate'] = macro_data['effective_funds_rate'].replace('.', np.nan)

# Convert the column to a numeric type (float), necessary for ffill to work after replacing '.' with np.nan
macro_data['effective_funds_rate'] = pd.to_numeric(macro_data['effective_funds_rate'], errors='coerce')

# Now apply front filling
macro_data['effective_funds_rate'].ffill(inplace=True)

# Display the results


0    1.57
1    1.57
2    0.06
3    0.05
4    0.05
5    0.08
6    0.09
7    0.09
8    0.09
9    0.09
10   0.08
11   0.07
12   0.07
13   0.06
14   0.10
15   0.08
16   0.08
17   0.08
18   0.08
19   0.08
20   0.08
21   0.33
22   0.83
23   1.58
24   2.33
25   2.33
26   3.08
27   3.83
28   4.33
29   4.58
30   4.83
31   5.08
32   5.33
33   5.33
34   5.33
35   5.33
36   5.33
37   5.33
38   5.33
Name: effective_funds_rate, dtype: float64


In [379]:
print(macro_data['effective_funds_rate'].describe())

count   38.00
mean     1.83
std      2.17
min      0.05
25%      0.08
50%      0.22
75%      4.21
max      5.33
Name: effective_funds_rate, dtype: float64


In [335]:
sticky_index.rename(columns={'DATE': 'date', 'CORESTICKM159SFRBATL': 'sticky_cpi'}, inplace=True)

In [336]:
sticky_index.tail()

Unnamed: 0,date,sticky_cpi
47,2023-10-01,4.88
48,2023-11-01,4.69
49,2023-12-01,4.55
50,2024-01-01,4.6
51,2024-02-01,4.4


In [337]:
sticky_index['date']= pd.to_datetime(sticky_index['date'])

In [338]:
sticky_index['date'] = sticky_index['date'].dt.tz_localize('UTC')

In [339]:
macro_data = macro_data.merge(sticky_index, on=['date'],how='inner')

In [340]:
macro_data = macro_data.drop(columns=['3m_tbill'])

In [341]:
macro_data.set_index('date',inplace=True)

In [342]:
macro_data.rename_axis('day',inplace=True)

In [343]:
macro_data

Unnamed: 0_level_0,effective_funds_rate,3_m_tbill_yield,sticky_cpi
day,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2019-11-01 00:00:00+00:00,1.57,1.54,2.77
2020-01-01 00:00:00+00:00,1.57,1.52,2.79
2020-04-01 00:00:00+00:00,0.06,0.14,2.18
2020-05-01 00:00:00+00:00,0.05,0.13,2.01
2020-06-01 00:00:00+00:00,0.05,0.16,1.99
2020-07-01 00:00:00+00:00,0.08,0.13,2.28
2020-09-01 00:00:00+00:00,0.09,0.11,1.98
2020-10-01 00:00:00+00:00,0.09,0.1,1.75
2020-12-01 00:00:00+00:00,0.09,0.09,1.6
2021-01-01 00:00:00+00:00,0.09,0.08,1.4


In [344]:
crypto_market_data = crypto_market_data.sort_values(by='day')
macro_data = macro_data.sort_values(by='day')



In [345]:
macro_data.tail()

Unnamed: 0_level_0,effective_funds_rate,3_m_tbill_yield,sticky_cpi
day,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2023-09-01 00:00:00+00:00,5.33,5.32,4.96
2023-11-01 00:00:00+00:00,5.33,5.27,4.69
2023-12-01 00:00:00+00:00,5.33,5.24,4.55
2024-01-01 00:00:00+00:00,5.33,5.22,4.6
2024-02-01 00:00:00+00:00,5.33,5.24,4.4


In [346]:
macro_and_crypto = pd.merge_asof(crypto_market_data, macro_data, on='day')


In [347]:
macro_and_crypto.shape[0]

857

In [348]:
macro_and_crypto['day'] = macro_and_crypto['day'].dt.tz_convert('UTC')

In [350]:
total_vault_data['day'] = total_vault_data['day'].dt.tz_localize('UTC')

In [351]:
dataset = total_vault_data.merge(macro_and_crypto, on=['day'],how='inner')

In [352]:
nan_rows = dataset[dataset.isna().any(axis=1)]
print(nan_rows)


                          day  eth_a_vault_cumulative_collateral  \
0   2020-11-04 00:00:00+00:00                       2,558,593.17   
1   2020-11-05 00:00:00+00:00                       2,557,079.27   
2   2020-11-06 00:00:00+00:00                       2,557,674.49   
3   2020-11-09 00:00:00+00:00                       2,448,558.85   
4   2020-11-10 00:00:00+00:00                       2,434,900.11   
..                        ...                                ...   
349 2022-03-25 00:00:00+00:00                       1,764,503.58   
350 2022-03-28 00:00:00+00:00                       1,763,916.77   
351 2022-03-29 00:00:00+00:00                       1,765,298.96   
352 2022-03-30 00:00:00+00:00                       1,762,742.40   
353 2022-03-31 00:00:00+00:00                       1,412,615.00   

     eth_a_vault_safety_price  eth_a_vault_usd_safety_value  \
0                      268.33                686,538,775.41   
1                      275.63                704,816,281.

In [353]:
# Checking for columns with NaN values and their count of NaNs
nan_columns = dataset.isna().sum()
print(nan_columns[nan_columns > 0])


Total Revenues_rolling_avg_pct_chg_lag_10       18
Total Revenues_volatility_pct_chg_lag_10        18
Total Revenues_rolling_avg_pct_chg_lag_11       40
Total Revenues_volatility_pct_chg_lag_11        40
Total Revenues_rolling_avg_pct_chg_lag_12       59
Total Revenues_volatility_pct_chg_lag_12        59
Total Expenses_pct_chg                          40
Total Expenses_rolling_avg_pct_chg             101
Total Expenses_volatility_pct_chg              101
Total Expenses_rolling_avg_pct_chg_lag_1       122
Total Expenses_volatility_pct_chg_lag_1        122
Total Expenses_rolling_avg_pct_chg_lag_2       142
Total Expenses_volatility_pct_chg_lag_2        142
Total Expenses_rolling_avg_pct_chg_lag_3       164
Total Expenses_volatility_pct_chg_lag_3        164
Total Expenses_rolling_avg_pct_chg_lag_4       185
Total Expenses_volatility_pct_chg_lag_4        185
Total Expenses_rolling_avg_pct_chg_lag_5       207
Total Expenses_volatility_pct_chg_lag_5        207
Total Expenses_rolling_avg_pct_

In [354]:
dataset_no_nan = dataset.fillna(0)

In [355]:
dataset_no_nan.isna().any().sum()

0

In [356]:
print(list(dataset_no_nan['dai_maturity_outflow_surplus_buffer_1-block']))

[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,

In [357]:
print(list(dataset_no_nan.columns))

['day', 'eth_a_vault_cumulative_collateral', 'eth_a_vault_safety_price', 'eth_a_vault_usd_safety_value', 'eth_a_vault_dai_ceiling', 'eth_a_vault_dai_floor', 'eth_a_vault_safety_collateral_ratio', 'eth_a_vault_market_price', 'eth_a_vault_debt_balance', 'eth_a_vault_liquidation_penalty', 'eth_a_vault_liquidation_ratio', 'eth_a_vault_daily_revenues', 'eth_a_vault_annualized stability fee', 'eth_a_vault_annualized_revenues', 'eth_a_vault_dart', 'eth_a_vault_total_ann_revenues', 'eth_a_vault_prev_dai_ceiling', 'eth_a_vault_status', 'eth_a_vault_market_collateral_ratio', 'eth_a_vault_collateral_usd', 'eth_a_vault_hypothetical_dai_ceiling', 'eth_a_vault_market_price_7d_ma', 'eth_a_vault_collateral_usd_7d_ma', 'eth_a_vault_debt_balance_7d_ma', 'eth_a_vault_safety_collateral_ratio_7d_ma', 'eth_a_vault_market_collateral_ratio_7d_ma', 'eth_a_vault_daily_revenues_7d_ma', 'eth_a_vault_market_price_30d_ma', 'eth_a_vault_collateral_usd_30d_ma', 'eth_a_vault_debt_balance_30d_ma', 'eth_a_vault_cumulati

In [364]:
dataset_no_nan['eth_a_vault_status']

0      Open
1      Open
2      Open
3      Open
4      Open
       ... 
831    Open
832    Open
833    Open
834    Open
835    Open
Name: eth_a_vault_status, Length: 836, dtype: object

## Running Correlations for Feature Engineering

In [365]:
numeric_dataset = dataset_no_nan.select_dtypes(include=[np.number])
correlations = numeric_dataset.corr()


In [367]:
# Assuming 'df' is your DataFrame
non_numeric_columns = dataset_no_nan.select_dtypes(exclude=[np.number])

# This will show you the columns that do not contain numeric data
print(non_numeric_columns.columns)


Index(['day', 'eth_a_vault_status'], dtype='object')


In [369]:
correlations = numeric_dataset.corr()

collateral_target_correlations = correlations['eth_a_vault_cumulative_collateral'].drop('eth_a_vault_cumulative_collateral')
collateral_sorted_correlations = collateral_target_correlations.sort_values(ascending=False)

# Display the sorted correlations
print(collateral_sorted_correlations)

eth_a_vault_cumulative_collateral_30d_ma   0.99
eth_a_vault_cumulative_collateral_lag30    0.98
debt_ratio_Lag_2                           0.87
debt_ratio_Lag_1                           0.87
debt_ratio_Lag_3                           0.86
                                           ... 
1 - PnL                                     NaN
2 - Assets                                  NaN
2.8 - Operating Reserves                    NaN
3 - Liabilities & Equity                    NaN
3.8 - Equity (Operating Reserves)           NaN
Name: eth_a_vault_cumulative_collateral, Length: 393, dtype: float64


In [370]:
for index, value in collateral_sorted_correlations.items():
    print(f"{index:50} {value}")

eth_a_vault_cumulative_collateral_30d_ma           0.9943398210444682
eth_a_vault_cumulative_collateral_lag30            0.9805547497547766
debt_ratio_Lag_2                                   0.8736438996467315
debt_ratio_Lag_1                                   0.8707493279881601
debt_ratio_Lag_3                                   0.8639384287062543
cumulative_expenses                                0.8509625923156909
debt_ratio_Lag_4                                   0.8473905860144817
debt_ratio_Lag_12                                  0.8427451211411413
debt_ratio                                         0.8422797707986605
debt_ratio_Lag_7                                   0.8396211743519353
debt_ratio_Lag_8                                   0.8394948356457375
debt_ratio_Lag_11                                  0.8386780413470225
debt_ratio_Lag_9                                   0.8364084137718227
debt_ratio_Lag_10                                  0.8356662609559448
debt_ratio_Lag_6    

In [432]:

# Select correlations of all features with the target variable, excluding the target variable itself
etha_target_correlations = correlations['eth_a_vault_liquidation_ratio'].drop('eth_a_vault_liquidation_ratio')

# Sort the correlations to see the most positive and negative correlations at the top and bottom, respectively
etha_sorted_correlations = etha_target_correlations.sort_values(ascending=False)

for index, value in etha_sorted_correlations.items():
    print(f"{index:50} {value}")

ROE_Lag_2                                          0.8564102814077655
ROE_Lag_3                                          0.8421159335778359
b_s_Equity_rolling_avg                             0.8300568243606087
b_s_Equity                                         0.8271204896706157
ROE_Lag_5                                          0.7953387104636003
debt_ratio_Lag_6                                   0.7942735290480126
debt_ratio_Lag_7                                   0.7884222499276735
ROE_Lag_4                                          0.7796483453861516
ROE_Lag_1                                          0.7774006733018913
debt_ratio_Lag_5                                   0.7773651106695469
ROE_Lag_6                                          0.7767961913616538
debt_ratio_Lag_8                                   0.7725618759404949
eth_a_vault_cumulative_collateral_30d_ma           0.7615768361141249
eth_a_vault_cumulative_collateral_lag30            0.7596304842848594
eth_a_vault_cumulati

In [371]:
# Select correlations of all features with the target variable, excluding the target variable itself
dsr_target_correlations = correlations['dsr_rate'].drop('dsr_rate')

# Sort the correlations to see the most positive and negative correlations at the top and bottom, respectively
dsr_sorted_correlations = dsr_target_correlations.sort_values(ascending=False)

for index, value in dsr_sorted_correlations.items():
    print(f"{index:50} {value}")

dsr_interest                                       0.957173341735613
dai_percent_in_dsr                                 0.8861496852741225
dsr_balance                                        0.8675209608131664
where_is_dai_Dai Savings                           0.8547481095819917
b_s_Real-World_Assets_rolling_avg                  0.806432549779197
b_s_Real-World Assets                              0.8001473169042703
psm_lifetime_turnover                              0.7607235766295427
eth_a_vault_annualized stability fee               0.7106474267890815
effective_funds_rate                               0.7010858581105622
cumulative_revenues                                0.6925449520987872
1.1 - Lending Revenues                             0.6774585340586271
3_m_tbill_yield                                    0.6496205442713194
cumulative_net_income                              0.5571337307824817
s&p_500_market_Close                               0.5546886211827123
dai_total_balance     

In [372]:
# Select correlations of all features with the target variable, excluding the target variable itself
stability_fee_target_correlations = correlations['eth_a_vault_annualized stability fee'].drop('eth_a_vault_annualized stability fee')

# Sort the correlations to see the most positive and negative correlations at the top and bottom, respectively
stability_fee_target_correlations = stability_fee_target_correlations.sort_values(ascending=False)

for index, value in stability_fee_target_correlations.items():
    print(f"{index:50} {value}")

eth_a_vault_annualized stability fee_lag30         0.8263602331280309
eth_a_vault_annualized stability fee_90d_ma        0.8064729805305909
dsr_interest                                       0.7126069357733581
dsr_rate                                           0.7106474267890815
1.1 - Lending Revenues                             0.6927947297890712
ROA_Lag_1                                          0.661023675355134
profit_margin_Lag_1                                0.661023675355134
Total_Revenues_Lag_1                               0.6363416597811982
Total Revenues                                     0.6353741235591359
Net_Income_Lag_1                                   0.6281906347395354
ROA                                                0.6249489719613421
profit_margin                                      0.6249489719613421
1.9 - Net Income                                   0.594461311734676
dai_percent_in_dsr                                 0.5934799655401125
eth_a_vault_total_ann_r

In [373]:
# Select correlations of all features with the target variable, excluding the target variable itself
safety_ratio_target_correlations = correlations['eth_a_vault_safety_collateral_ratio'].drop('eth_a_vault_safety_collateral_ratio')

# Sort the correlations to see the most positive and negative correlations at the top and bottom, respectively
safety_ratio_target_correlations = safety_ratio_target_correlations.sort_values(ascending=False)

for index, value in safety_ratio_target_correlations.items():
    print(f"{index:50} {value}")

eth_a_vault_market_collateral_ratio                0.9971050651432883
eth_a_vault_safety_collateral_ratio_7d_ma          0.962860916302392
eth_a_vault_market_collateral_ratio_7d_ma          0.9606967723774925
eth_a_vault_safety_collateral_ratio_30d_ma         0.8729676723358981
eth_a_vault_market_collateral_ratio_30d_ma         0.8708217361979157
eth_a_vault_safety_collateral_ratio_lag30          0.6249188397208192
eth_a_vault_market_collateral_ratio_lag30          0.614666336421359
psm_lifetime_turnover                              0.6049245642873874
3_m_tbill_yield                                    0.5839417913763818
effective_funds_rate                               0.5765157479244135
cumulative_revenues                                0.5715081710021795
cumulative_net_income                              0.5080156413328435
dai_total_balance                                  0.48388217558818014
dai_percent_out_dsr                                0.4705578350142762
dsr_rate             

In [374]:
# Select correlations of all features with the target variable, excluding the target variable itself
dai_target_correlations = correlations['dai_total_balance'].drop('dai_total_balance')

# Sort the correlations to see the most positive and negative correlations at the top and bottom, respectively
dai_sorted_correlations = dai_target_correlations.sort_values(ascending=False)

for index, value in dai_sorted_correlations.items():
    print(f"{index:50} {value}")

dai_circulating                                    0.9803441280360398
dai_percent_out_dsr                                0.9759780719719016
3_m_tbill_yield                                    0.9212538850263818
effective_funds_rate                               0.9001627619326987
psm_lifetime_turnover                              0.8622720662998148
cumulative_revenues                                0.7351517263582567
sticky_cpi                                         0.7311772030422192
b_s_Real-World Assets                              0.7044493365181824
b_s_Real-World_Assets_rolling_avg                  0.7011917979014107
cumulative_net_income                              0.6439462252846612
eth_a_vault_safety_collateral_ratio_lag30          0.6331392918478008
b_s_Others_assets_rolling_avg                      0.6300646197827272
b_s_Others assets                                  0.6236625662218369
eth_a_vault_market_collateral_ratio_lag30          0.6023763633666922
eth_a_vault_safety_c

In [427]:
# Select correlations of all features with the target variable, excluding the target variable itself
sb_target_correlations = correlations['daily_surplus_buffer'].drop('daily_surplus_buffer')

# Sort the correlations to see the most positive and negative correlations at the top and bottom, respectively
sb_sorted_correlations = sb_target_correlations.sort_values(ascending=False)

for index, value in sb_sorted_correlations.items():
    print(f"{index:50} {value}")

dai_maturity_outflow_surplus_buffer_1-year         0.9901743483451282
3.7 - Equity (Surplus Buffer)                      0.9746395333866137
where_is_dai_EOA                                   0.93990438227667
psm_lifetime_fees                                  0.865636125322519
eth_a_vault_dai_floor                              0.8470973288356538
sticky_cpi                                         0.8466611655413736
dai_maturity_outflow_1-year                        0.7876869760070996
dai_maturity_outflow_dai_only_1-year               0.781035815729949
b_s_Stablecoins_rolling_avg                        0.7539390533409592
b_s_Stablecoins                                    0.7467853841164048
b_s_Others assets                                  0.7457914770305075
b_s_Others_assets_rolling_avg                      0.7406027961038805
2.2 - Trading Assets                               0.7181724704348985
psm_balance                                        0.7141839636969732
cumulative_net_income   

In [431]:

# Select correlations of all features with the target variable, excluding the target variable itself
ni_target_correlations = correlations['1.9 - Net Income'].drop('1.9 - Net Income')

# Sort the correlations to see the most positive and negative correlations at the top and bottom, respectively
ni_sorted_correlations = ni_target_correlations.sort_values(ascending=False)

for index, value in ni_sorted_correlations.items():
    print(f"{index:50} {value}")

Total Revenues                                     0.9124313875089496
ROA                                                0.8894602971029851
profit_margin                                      0.8894602971029851
1.1 - Lending Revenues                             0.8083327971717559
eth_a_vault_total_ann_revenues                     0.6900794385437758
eth_a_vault_annualized stability fee_lag30         0.632369301082226
eth_a_vault_annualized stability fee_90d_ma        0.6315090027654996
eth_a_vault_annualized stability fee               0.594461311734676
btc_market_Close_30d_ma                            0.5855675241300378
ROE                                                0.5826457835955418
eth_a_vault_annualized_revenues                    0.566910764875092
mkr_market_Close_7d_ma                             0.5645940375248284
eth_a_vault_daily_revenues                         0.5620243419178836
Total_Revenues_Lag_1                               0.5599858612439028
eth_a_vault_daily_reven

In [429]:
# Select correlations of all features with the target variable, excluding the target variable itself
dai_market_Volume_target_correlations = correlations['dai_market_Volume'].drop('dai_market_Volume')

# Sort the correlations to see the most positive and negative correlations at the top and bottom, respectively
dai_market_Volume_sorted_correlations = dai_market_Volume_target_correlations.sort_values(ascending=False)

for index, value in dai_market_Volume_sorted_correlations.items():
    print(f"{index:50} {value}")

dai_market_Volume_7d_ma                            0.7694704316998318
eth_market_Volume                                  0.6064953242078771
dai_market_Volume_30d_ma                           0.6034313873288162
b_s_Crypto-Loans_rolling_avg                       0.5548466313272022
b_s_Crypto-Loans                                   0.5463016005652909
ROA_Lag_7                                          0.5246271522912113
profit_margin_Lag_7                                0.5246271522912113
eth_market_Volume_7d_ma                            0.520872484773236
eth_a_vault_collateral_usd_7d_ma                   0.507558247294655
eth_a_vault_debt_balance_7d_ma                     0.4900718803385421
profit_margin_Lag_6                                0.4892433932561513
ROA_Lag_6                                          0.4892433932561513
eth_a_vault_prev_dai_ceiling                       0.48857051141937086
eth_a_vault_collateral_usd_30d_ma                  0.4870455458711481
eth_a_vault_dai_ceili

In [428]:
# Select correlations of all features with the target variable, excluding the target variable itself
dai_treasury_target_correlations = correlations['where_is_dai_Treasury'].drop('where_is_dai_Treasury')

# Sort the correlations to see the most positive and negative correlations at the top and bottom, respectively
dai_t_sorted_correlations = dai_treasury_target_correlations.sort_values(ascending=False)

for index, value in dai_t_sorted_correlations.items():
    print(f"{index:50} {value}")

Total_Revenues_Lag_8                               0.5519982089582511
daily_surplus_buffer                               0.538787666054152
dai_maturity_outflow_surplus_buffer_1-year         0.5053544635624468
3.7 - Equity (Surplus Buffer)                      0.4962084291367374
eth_a_vault_dai_floor                              0.4769258813378755
sticky_cpi                                         0.4713602581725595
b_s_Stablecoins_rolling_avg                        0.46373511686009977
b_s_Stablecoins                                    0.45653473238809955
psm_balance                                        0.448449977413799
where_is_dai_EOA                                   0.4416666045418965
2.2 - Trading Assets                               0.42553995011002044
1.9 - Net Income_rolling_avg_pct_chg_lag_12        0.4175115156840678
Net_Income_Lag_9                                   0.4152861613104318
dai_circulating                                    0.40430640532545753
Total_Revenues_Lag

In [426]:
# Select correlations of all features with the target variable, excluding the target variable itself
dai_p_target_correlations = correlations['dai_market_Close'].drop('dai_market_Close')

# Sort the correlations to see the most positive and negative correlations at the top and bottom, respectively
dai_p_sorted_correlations = dai_p_target_correlations.sort_values(ascending=False)

for index, value in dai_p_sorted_correlations.items():
    print(f"{index:50} {value}")

debt_to_equity_Lag_7                               0.6120168988268051
debt_to_equity_Lag_2                               0.5857667436953803
debt_to_equity_Lag_3                               0.5789025544376855
ROE_Lag_1                                          0.5578778179979771
debt_to_equity_Lag_4                               0.5533278086345387
ROE_Lag_2                                          0.5385222248935791
b_s_Equity_rolling_avg                             0.5301078337533965
b_s_Equity                                         0.5293969791935087
debt_to_equity_Lag_6                               0.5258437003635129
debt_to_equity_Lag_1                               0.5173824695324983
eth_a_vault_liquidation_ratio                      0.5089295456879575
dai_market_Close_daily_returns                     0.5044002738393341
Total Revenues_volatility_pct_chg_lag_1            0.5033753203889272
Total Revenues_rolling_avg_pct_chg_lag_1           0.5025205109840889
debt_ratio_Lag_3    

In [375]:
# Select correlations of all features with the target variable, excluding the target variable itself
mkr_target_correlations = correlations['mkr_market_Close'].drop('mkr_market_Close')

# Sort the correlations to see the most positive and negative correlations at the top and bottom, respectively
mkr_target_correlations = mkr_target_correlations.sort_values(ascending=False)

for index, value in mkr_target_correlations.items():
    print(f"{index:50} {value}")

mkr_market_Close_7d_ma                             0.9832239084323248
mkr_market_Close_30d_ma                            0.8996578704202103
eth_a_vault_hypothetical_dai_ceiling               0.837207589354274
eth_a_vault_annualized_revenues                    0.8365616806125646
eth_a_vault_collateral_usd                         0.8303355791576112
eth_a_vault_daily_revenues                         0.8267471989457971
eth_a_vault_usd_safety_value                       0.8229993652733495
eth_a_vault_collateral_usd_7d_ma                   0.8191820582094417
btc_market_Close_30d_ma                            0.8128904346440006
eth_a_vault_daily_revenues_7d_ma                   0.8043427714790372
btc_market_Close_7d_ma                             0.791415055963069
eth_a_vault_collateral_usd_30d_ma                  0.7825159607810909
btc_market_Close                                   0.7808343654982153
eth_a_vault_daily_revenues_30d_ma                  0.7777093293412046
eth_a_vault_dart      

In [None]:
# Select correlations of all features with the target variable, excluding the target variable itself
mkr_target_correlations = correlations['mkr_market_Close'].drop('mkr_market_Close')

# Sort the correlations to see the most positive and negative correlations at the top and bottom, respectively
mkr_target_correlations = mkr_target_correlations.sort_values(ascending=False)

for index, value in mkr_target_correlations.items():
    print(f"{index:50} {value}")

Next regression analysis (single, multiple) for target variable and strong correlating features

In your model, which aims to simulate collateral balance for a vault and use Reinforcement Learning (RL) for Maker Protocol financial management with the Dai Savings Rate (DSR) and Stability Fee as part of the action space, the variables can be categorized as follows:

Independent Variables
These are the input features that your model will use to make predictions or decisions. Based on the extensive list of features you've provided and the objectives of your model, independent variables may include:

Market data (e.g., eth_market_Close, btc_market_Close, dai_market_Close)
Operational metrics from the vault (e.g., eth_a_vault_market_price, eth_a_vault_collateral_usd)
Financial metrics (e.g., cumulative_revenues, cumulative_expenses, profit_margin)
DAI metrics (e.g., dai_circulating, dai_percent_in_dsr, dsr_balance)
Macro-economic indicators (e.g., effective_funds_rate, 3_m_tbill_yield, sticky_cpi)
These variables are considered independent because they are assumed not to be influenced by the model's outputs but rather to affect them.

Dependent Variables
These are the outcomes or targets your model aims to predict or optimize. In the context of your model's objectives, the dependent variables could be:

For the simulation segment:

eth_a_vault_cumulative_collateral: This represents the total collateral balance in an ETH vault, which you aim to simulate as a response to changes in policy variables.
For the RL segment:

dsr_rate: The Dai Savings Rate, which is a policy tool MakerDAO uses to influence various aspects of the ecosystem.
stability_fee: The fee applied to vaults, which acts as another policy lever to maintain the peg of DAI to its underlying asset and control supply dynamics.
The choice of dependent and independent variables will depend on the specific objectives of each segment of your model (simulation and RL). The model aims to learn how changes in independent variables (market conditions, operational metrics, financial metrics, macroeconomic indicators, etc.) influence the dependent variables (collateral balance for the simulation part and the optimal DSR and Stability Fee rates for the RL part). This setup allows the model to simulate the impact of policy decisions on the vault's collateral balance and to optimize these decisions to achieve desired outcomes in the MakerDAO ecosystem.

When the Debt Ceiling parameter for a vault type is set lower than the current amount of DAI outstanding (i.e., the total amount of DAI minted from that vault type), it does not directly affect the existing DAI or vaults. Here's what happens:

No Immediate Effect on Existing DAI: Lowering the Debt Ceiling below the current amount of DAI outstanding does not trigger automatic liquidations or directly impact the existing DAI in circulation. The existing DAI remains valid, and holders of DAI are not directly affected by this change in the Debt Ceiling.

Prevention of New DAI Minting: The primary consequence of lowering the Debt Ceiling below the current DAI outstanding is the prevention of new DAI being minted from that specific vault type. This means that users cannot increase their debt (i.e., mint more DAI) from vaults of that type until the total debt is reduced below the new Debt Ceiling level or until the Debt Ceiling is raised again.

Encourages DAI Repayment: By setting a lower Debt Ceiling, the Maker Governance effectively limits the potential for further exposure to the collateral type associated with that vault. This may encourage users to repay some of their DAI debt to free up space under the Debt Ceiling, allowing for new minting activities, or to shift their operations to other vault types with available capacity.

Governance and Risk Management Tool: Lowering the Debt Ceiling can be a proactive measure by Maker Governance to manage risk, especially if there are concerns about overexposure to a particular asset type or if there's a desire to mitigate potential impacts of the 'OSM Timing Attack.' It's a governance tool used to control the risk profile of the MakerDAO system and ensure the stability and security of the DAI peg to the USD.

In summary, setting a Debt Ceiling lower than the current amount of DAI minted does not affect the existing DAI but prevents further minting of DAI from the affected vault type. This measure serves as a risk management and governance tool, allowing Maker Governance to control exposure to specific assets and maintain the system's stability.