In [1]:
import streamlit as st
import pandas as pd
import requests
import numpy as np
import yfinance as yf
from dune_client.client import DuneClient

In [2]:
@st.cache_data()
def fetch_data_from_api(api_url, params=None):
    response = requests.get(api_url, params=params)
    if response.status_code == 200:
        data = response.json()
        if 'rows' in data['result']:
            return pd.DataFrame(data['result']['rows'])
        return data
    else:
        print(f"Failed to retrieve data: {response.status_code}")
        return pd.DataFrame()  # or an empty dict



In [3]:
def fetch_historical_data(api_url, api_key):
    # Use the API key either as a query parameter or in the headers
    params = {'vs_currency': 'usd', 'days': 'max', 'interval': 'daily', 'x_cg_demo_api_key': api_key}
    headers = {'x-cg-demo-api-key': api_key}  # Alternatively, use this header

    response = requests.get(api_url, params=params, headers=headers)

    if response.status_code == 200:
        # Parse the JSON response
        historical_pricedata = response.json()
        # Extract the 'prices' and 'market_caps' data
        historical_price = historical_pricedata['prices']
        market_cap = pd.DataFrame(historical_pricedata['market_caps'], columns=['date', 'marketcap'])

        # Convert the 'timestamp' column from UNIX timestamps in milliseconds to datetime objects
        history = pd.DataFrame(historical_price, columns=['timestamp', 'price'])
        history['date'] = pd.to_datetime(history['timestamp'], unit='ms')
        history.set_index('date', inplace=True)
        history.drop(columns='timestamp', inplace=True)

        vol = pd.DataFrame(historical_pricedata['total_volumes'], columns=['date', 'volume'])
        vol['date'] = pd.to_datetime(vol['date'], unit='ms')
        vol.set_index('date', inplace=True)
        
        return history, market_cap, vol
    else:
        print(f"Failed to retrieve data: {response.status_code}")
        return pd.DataFrame(), pd.DataFrame(), pd.DataFrame()

In [4]:
api_key_dune = st.secrets["api_key"]
api_key_cg = st.secrets["api_key_cg"]
api_key_FRED = st.secrets["FRED_API_KEY"]

In [5]:
dune = DuneClient(api_key_dune)

In [6]:
def fetch_dune_data(num):
    result = dune.get_latest_result(num)
    return pd.DataFrame(result.result.rows)

In [7]:
pd.options.display.float_format = '{:,.5f}'.format

### First, lets get MakerDAO Financial Statements from https://dune.com/steakhouse/makerdao

Balance Sheet

In [8]:
# Balance Sheet
#bs_raw = dune.get_latest_result(2840463)

In [9]:
#bs_df = pd.DataFrame(bs_raw.result.rows)
#bs_df['period'] = pd.to_datetime(bs_df['period'])
#bs_df.set_index('period', inplace=True)
#bs_df.index = bs_df.index.normalize()
#bs_df = bs_df.sort_index()

In [10]:
#bs_df

In [11]:
bs_path = '../data/csv/bs.csv'

In [12]:
#bs_df.to_csv(bs_path)

In [13]:
bs_csv = pd.read_csv(bs_path, index_col='period', parse_dates=True)

In [14]:
#categorizing items as asset, liability, or equity
def categorize_item(item):
    if item in ['Crypto-Loans', 'Real-World Assets', 'Others assets', 'Stablecoins']:
        return 'Assets'
    elif item in ['DAI','DSR']:  # Assuming DAI represents a liability here; adjust according to your accounting rules
        return 'Liabilities'
    elif item == 'Equity':
        return 'Equity'
    else:
        return 'Other'  # For any item not explicitly categorized

# Assuming 'df' is your DataFrame
bs_csv['category'] = bs_csv['item'].apply(categorize_item)



In [15]:
bs_csv = bs_csv.iloc[::-1]

In [16]:
bs_csv.tail()

Unnamed: 0_level_0,balance,item,normalized,category
period,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2020-07-01 00:00:00+00:00,,Stablecoins,,Assets
2020-07-01 00:00:00+00:00,-111909502.58181,DAI,-0.77283,Liabilities
2020-07-01 00:00:00+00:00,144805383.35301,Crypto-Loans,1.0,Assets
2020-07-01 00:00:00+00:00,-200158.69712,Equity,-0.00138,Equity
2020-07-01 00:00:00+00:00,,Others assets,,Assets


In [17]:
bs_csv.describe()

Unnamed: 0,balance,normalized
count,8549.0,8549.0
mean,0.0,0.0
std,2676252421.26448,0.47437
min,-9951190219.64641,-0.9943
25%,-81183521.56298,-0.01444
50%,46794.81485,1e-05
75%,1333547688.39506,0.30441
max,6135551191.71599,1.0


In [18]:
pivoted_balance_sheet = bs_csv.pivot(columns='item', values='balance')

In [19]:
# Percent Changes in account balances 
pivoted_balance_sheet['Crypto-Loans_pct_chg'] = pivoted_balance_sheet['Crypto-Loans'].pct_change()
pivoted_balance_sheet['DAI_pct_chg'] = pivoted_balance_sheet['DAI'].pct_change()
pivoted_balance_sheet['DSR_pct_chg'] = pivoted_balance_sheet['DSR'].pct_change()
pivoted_balance_sheet['Equity_pct_chg'] = pivoted_balance_sheet['Equity'].pct_change()
pivoted_balance_sheet['Others_assets_pct_chg'] = pivoted_balance_sheet['Others assets'].pct_change()  # Assuming this is the correct column name
pivoted_balance_sheet['Real-World_Assets_pct_chg'] = pivoted_balance_sheet['Real-World Assets'].pct_change()
pivoted_balance_sheet['Stablecoins_pct_chg'] = pivoted_balance_sheet['Stablecoins'].pct_change()


In [20]:
# Rolling Averages, Standard Deviation
# Define the window size for rolling calculation
window_size = 7  # for example, a 7-day rolling window

# Calculate rolling averages
pivoted_balance_sheet['Crypto-Loans_7d_rolling_avg'] = pivoted_balance_sheet['Crypto-Loans'].rolling(window=window_size).mean()
pivoted_balance_sheet['DAI_7d_rolling_avg'] = pivoted_balance_sheet['DAI'].rolling(window=window_size).mean()
pivoted_balance_sheet['DSR_7d_rolling_avg'] = pivoted_balance_sheet['DSR'].rolling(window=window_size).mean()
pivoted_balance_sheet['Equity_7d_rolling_avg'] = pivoted_balance_sheet['Equity'].rolling(window=window_size).mean()
pivoted_balance_sheet['Others_assets_7d_rolling_avg'] = pivoted_balance_sheet['Others assets'].rolling(window=window_size).mean()
pivoted_balance_sheet['Real-World_Assets_7d_rolling_avg'] = pivoted_balance_sheet['Real-World Assets'].rolling(window=window_size).mean()
pivoted_balance_sheet['Stablecoins_7d_rolling_avg'] = pivoted_balance_sheet['Stablecoins'].rolling(window=window_size).mean()

# Calculate volatility (standard deviation)
pivoted_balance_sheet['Crypto-Loans_7d_rolling_volatility'] = pivoted_balance_sheet['Crypto-Loans_pct_chg'].rolling(window=window_size).std()
pivoted_balance_sheet['DAI_7d_rolling_volatility'] = pivoted_balance_sheet['DAI_pct_chg'].rolling(window=window_size).std()
pivoted_balance_sheet['DSR_7d_rolling_volatility'] = pivoted_balance_sheet['DSR_pct_chg'].rolling(window=window_size).std()
pivoted_balance_sheet['Equity_7d_rolling_volatility'] = pivoted_balance_sheet['Equity_pct_chg'].rolling(window=window_size).std()
pivoted_balance_sheet['Others_assets_7d_rolling_volatility'] = pivoted_balance_sheet['Others_assets_pct_chg'].rolling(window=window_size).std()
pivoted_balance_sheet['Real-World_Assets_7d_rolling_volatility'] = pivoted_balance_sheet['Real-World_Assets_pct_chg'].rolling(window=window_size).std()
pivoted_balance_sheet['Stablecoins_7d_rolling_volatility'] = pivoted_balance_sheet['Stablecoins_pct_chg'].rolling(window=window_size).std()



In [21]:
# Assuming you've already calculated percent changes (_pct_chg)
window_size = 30  # Adjust based on your analysis needs

# Calculate rolling averages and volatilities based on percent changes
for col in ['Crypto-Loans', 'DAI', 'DSR', 'Equity', 'Others_assets', 'Real-World_Assets', 'Stablecoins']:
    pct_chg_col = f'{col}_pct_chg'  # The column names for percent changes you've calculated
    pivoted_balance_sheet[f'{col}_30d_rolling_avg_pct_chg'] = pivoted_balance_sheet[pct_chg_col].rolling(window=window_size).mean()
    pivoted_balance_sheet[f'{col}_30d_volatility_pct_chg'] = pivoted_balance_sheet[pct_chg_col].rolling(window=window_size).std()


In [22]:
pivoted_balance_sheet.columns = [f'b_s_{col}' if col != 'period' else col for col in pivoted_balance_sheet.columns]

In [23]:
pivoted_balance_sheet.tail()

Unnamed: 0_level_0,b_s_Crypto-Loans,b_s_DAI,b_s_DSR,b_s_Equity,b_s_Others assets,b_s_Real-World Assets,b_s_Stablecoins,b_s_Crypto-Loans_pct_chg,b_s_DAI_pct_chg,b_s_DSR_pct_chg,...,b_s_DSR_30d_rolling_avg_pct_chg,b_s_DSR_30d_volatility_pct_chg,b_s_Equity_30d_rolling_avg_pct_chg,b_s_Equity_30d_volatility_pct_chg,b_s_Others_assets_30d_rolling_avg_pct_chg,b_s_Others_assets_30d_volatility_pct_chg,b_s_Real-World_Assets_30d_rolling_avg_pct_chg,b_s_Real-World_Assets_30d_volatility_pct_chg,b_s_Stablecoins_30d_rolling_avg_pct_chg,b_s_Stablecoins_30d_volatility_pct_chg
period,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2024-03-17 00:00:00+00:00,2567732340.42011,-3225717987.93427,-1405341920.90512,-68936169.7521,48533.08553,1082824203.89962,1049391001.18623,-0.02106,-0.01047,0.03167,...,0.00766,0.04836,0.00931,0.03991,0.0,0.0,-0.01815,0.0325,0.02895,0.17235
2024-03-18 00:00:00+00:00,2487739660.28346,-3236587097.37393,-1440743920.84188,-67073216.58339,48533.08553,1084684721.76837,1171931319.66186,-0.03115,0.00337,0.02519,...,0.0082,0.04847,0.00852,0.0404,0.0,0.0,-0.01809,0.03253,0.03273,0.17301
2024-03-19 00:00:00+00:00,2442828415.48219,-3237504446.80348,-1399485314.96027,-66776806.15773,48533.08553,1084527447.5354,1176362171.81837,-0.01805,0.00028,-0.02864,...,0.00727,0.04891,0.00854,0.0404,0.0,0.0,-0.01809,0.03253,0.03285,0.17299
2024-03-20 00:00:00+00:00,2358443170.28319,-3225358948.44251,-1373045029.84252,-67160889.56552,48533.08553,1083706110.61496,1223367053.86688,-0.03454,-0.00375,-0.01889,...,0.00601,0.04909,0.00779,0.04023,0.0,0.0,-0.01644,0.03209,0.0276,0.17019
2024-03-21 00:00:00+00:00,2358355394.96955,-3223114058.6577,-1384700394.25705,-67156270.07927,48533.08553,1083706110.61496,1232860684.32398,-4e-05,-0.0007,0.00849,...,0.00684,0.04891,0.00766,0.04025,0.0,0.0,-0.01643,0.03209,0.0302,0.16923


In [24]:
pivoted_balance_sheet.shape[0]

1360

MONTHLY Income Statement/PnL (also includes more detailed balance sheet)

In [25]:
#is_df = fetch_dune_data(2641549) 

In [26]:
#is_df.head()

In [27]:
#is_df_wide = is_df.pivot_table(index='period', columns='item', values='value', aggfunc='sum').reset_index()
#is_df_wide = is_df_wide.iloc[::-1]

In [28]:
#is_df_wide.head()

In [29]:
is_path = '../data/csv/is.csv'

In [30]:
#is_df.to_csv(is_path)

In [31]:
is_csv = pd.read_csv(is_path, index_col='period', parse_dates=True)

In [32]:
cleaned_is = is_csv.drop(columns=['Unnamed: 0','year'])

In [33]:
cleaned_is.describe()

Unnamed: 0,expenses,lending_income,liquidation_income,month,net_income,trading_income,value
count,106.0,106.0,106.0,954.0,106.0,106.0,689.0
mean,-2138542.90391,4914952.32038,851312.42005,6.43396,3835126.96118,207405.12467,1346134004.5149
std,2317856.05771,5635119.84691,3083911.34385,3.59701,6165521.06181,387438.99815,2368584436.45978
min,-13632119.60085,2828.77295,-5787399.49279,1.0,-5601718.87432,0.0,-13632119.60085
25%,-2990860.0,993839.65491,4623.87785,3.0,36867.282,0.0,0.0
50%,-1820097.70969,2965648.32045,42700.72846,6.0,2136663.15839,9359.23882,3044390.27061
75%,0.0,7786060.64006,205622.80539,10.0,6547116.46689,229736.2243,2038968283.82544
max,0.0,25123953.0812,18025406.6245,12.0,23542837.91237,1556244.79182,9429355721.6668


In [34]:
pivoted_income_statement = is_csv.pivot_table(index='period', 
                            columns='item', 
                            values='value', 
                            aggfunc='sum').reset_index()

In [35]:
pivoted_income_statement.drop(columns=['1 - PnL','2 - Assets','2.8 - Operating Reserves','3 - Liabilities & Equity','3.8 - Equity (Operating Reserves)'], inplace=True)

In [36]:
pivoted_income_statement['Total Revenues']= pivoted_income_statement[['1.1 - Lending Revenues', '1.2 - Liquidations Revenues', '1.3 - Trading Revenues']].sum(axis=1)
pivoted_income_statement['Total Expenses'] = pivoted_income_statement[['1.4 - Lending Expenses', '1.5 - Liquidations Expenses', '1.6 - Workforce Expenses']].sum(axis=1)
pivoted_income_statement['profit_margin'] = pivoted_income_statement['1.9 - Net Income'] / pivoted_income_statement['2.9 - Total Assets']
pivoted_income_statement['ROA'] = pivoted_income_statement['1.9 - Net Income'] / pivoted_income_statement['2.9 - Total Assets']
pivoted_income_statement['ROE'] = pivoted_income_statement['1.9 - Net Income'] / pivoted_income_statement['3.7 - Equity (Surplus Buffer)']
pivoted_income_statement['debt_to_equity'] = pivoted_income_statement['3.1 - Liabilities (DAI)'] / pivoted_income_statement['3.7 - Equity (Surplus Buffer)']
pivoted_income_statement['debt_ratio'] = pivoted_income_statement['3.1 - Liabilities (DAI)'] / pivoted_income_statement['2.9 - Total Assets'] 
pivoted_income_statement['cumulative_revenues'] = pivoted_income_statement['Total Revenues'].cumsum()
pivoted_income_statement['cumulative_expenses'] = pivoted_income_statement['Total Expenses'].cumsum()
pivoted_income_statement['cumulative_net_income'] = pivoted_income_statement['1.9 - Net Income'].cumsum()


In [37]:
pivoted_income_statement[['1.9 - Net Income','2.9 - Total Assets']]

item,1.9 - Net Income,2.9 - Total Assets
0,46551.83753,23732990.97703
1,196973.36834,71962891.76456
2,253432.04703,107709249.00283
3,365364.77968,119275347.31669
4,-5601718.87432,83316231.70835
5,50782.20835,99737116.85066
6,36867.282,129988074.3464
7,120628.50811,127330715.57274
8,128524.3843,362999338.47932
9,39460.56869,442396656.59973


In [38]:
pivoted_income_statement.tail()

item,period,1.1 - Lending Revenues,1.2 - Liquidations Revenues,1.3 - Trading Revenues,1.4 - Lending Expenses,1.5 - Liquidations Expenses,1.6 - Workforce Expenses,1.9 - Net Income,2.1 - Crypto Loans,2.2 - Trading Assets,...,Total Revenues,Total Expenses,profit_margin,ROA,ROE,debt_to_equity,debt_ratio,cumulative_revenues,cumulative_expenses,cumulative_net_income
48,2023-11-01,29666040.78753,455.02627,0.0,-6376789.60409,0.0,-2974302.24767,20315403.96204,4636257421.47005,707478489.39263,...,29666495.8138,-9351091.85176,0.0038,0.0038,0.34933,90.88666,0.98912,291977550.61831,-133751913.33975,158225637.27856
49,2023-12-01,14270261.48143,0.0,0.0,-6484200.84138,0.0,-2658930.55662,5127130.08344,4820477732.34982,400493643.70898,...,14270261.48143,-9143131.39799,0.00098,0.00098,0.09786,98.64843,0.98996,306247812.09975,-142895044.73775,163352767.362
50,2024-01-01,20751255.62092,27376.29516,0.0,-5943924.2348,0.0,-5669555.33271,9165152.34857,4321343136.28758,563679392.71587,...,20778631.91608,-11613479.56751,0.00188,0.00188,0.17441,91.9622,0.98924,327026444.01583,-154508524.30526,172517919.71057
51,2024-02-01,29691662.11849,0.0,0.0,-4567709.0373,0.0,-1581115.16882,23542837.91237,4083073663.20968,913091407.04291,...,29691662.11849,-6148824.20612,0.00471,0.00471,0.35349,74.01712,0.98667,356718106.13432,-160657348.51138,196060757.62294
52,2024-03-01,18130771.84133,4623.87785,0.0,-7076544.93907,0.0,-3857879.46039,7200971.31971,3442061505.5845,1232860684.32398,...,18135395.71918,-10934424.39946,0.00154,0.00154,0.1073,68.66294,0.98565,374853501.8535,-171591772.91084,203261728.94266


In [39]:
# Percent Changes in account balances
window_size = 3  # Three months
# Calculate rolling averages and volatilities based on percent changes
for col in ['Total Revenues', 'Total Expenses', '1.9 - Net Income']:
    pct_chg_col = f'{col}_pct_chg'  # Define the percent change column name
    pivoted_income_statement[pct_chg_col] = pivoted_income_statement[col].pct_change()
    # Use the pct_chg_col variable correctly now
    pivoted_income_statement[f'{col}_3m_rolling_avg_pct_chg'] = pivoted_income_statement[pct_chg_col].rolling(window=window_size).mean()
    pivoted_income_statement[f'{col}_3m_volatility_pct_chg'] = pivoted_income_statement[pct_chg_col].rolling(window=window_size).std()
    for lag in range(1,13):
        pivoted_income_statement[f'{col}_3m_rolling_avg_pct_chg_lag_{lag}'] = pivoted_income_statement[f'{col}_3m_rolling_avg_pct_chg'].shift(lag)
        pivoted_income_statement[f'{col}_3m_volatility_pct_chg_lag_{lag}'] = pivoted_income_statement[f'{col}_3m_volatility_pct_chg'].shift(lag)


In [40]:

# Generate lagged features
for lag in range(1, 13):  # From 1 to 12 months
    pivoted_income_statement[f'Total_Revenues_Lag_{lag}m'] = pivoted_income_statement['Total Revenues'].shift(lag)
    pivoted_income_statement[f'Total_Expenses_Lag_{lag}m'] = pivoted_income_statement['Total Expenses'].shift(lag)
    pivoted_income_statement[f'Net_Income_Lag_{lag}m'] = pivoted_income_statement['1.9 - Net Income'].shift(lag)
    pivoted_income_statement[f'profit_margin_Lag_{lag}m'] = pivoted_income_statement['profit_margin'].shift(lag)
    pivoted_income_statement[f'ROA_Lag_{lag}m'] = pivoted_income_statement['ROA'].shift(lag)
    pivoted_income_statement[f'ROE_Lag_{lag}m'] = pivoted_income_statement['ROE'].shift(lag)
    pivoted_income_statement[f'debt_to_equity_Lag_{lag}m'] = pivoted_income_statement['debt_to_equity'].shift(lag)
    pivoted_income_statement[f'debt_ratio_Lag_{lag}m'] = pivoted_income_statement['debt_ratio'].shift(lag)


  pivoted_income_statement[f'debt_ratio_Lag_{lag}m'] = pivoted_income_statement['debt_ratio'].shift(lag)
  pivoted_income_statement[f'Total_Revenues_Lag_{lag}m'] = pivoted_income_statement['Total Revenues'].shift(lag)
  pivoted_income_statement[f'Total_Expenses_Lag_{lag}m'] = pivoted_income_statement['Total Expenses'].shift(lag)
  pivoted_income_statement[f'Net_Income_Lag_{lag}m'] = pivoted_income_statement['1.9 - Net Income'].shift(lag)
  pivoted_income_statement[f'profit_margin_Lag_{lag}m'] = pivoted_income_statement['profit_margin'].shift(lag)
  pivoted_income_statement[f'ROA_Lag_{lag}m'] = pivoted_income_statement['ROA'].shift(lag)
  pivoted_income_statement[f'ROE_Lag_{lag}m'] = pivoted_income_statement['ROE'].shift(lag)
  pivoted_income_statement[f'debt_to_equity_Lag_{lag}m'] = pivoted_income_statement['debt_to_equity'].shift(lag)
  pivoted_income_statement[f'debt_ratio_Lag_{lag}m'] = pivoted_income_statement['debt_ratio'].shift(lag)
  pivoted_income_statement[f'Total_Revenues_Lag

In [41]:
pivoted_income_statement.shape[0]

53

Assets/Revenue Per Type
Coinbase asset type: http://forum.makerdao.com/t/mip81-coinbase-usdc-institutional-rewards/17703/254?u=sebventures


In [42]:
#assets_raw = dune.get_latest_result(58495)

In [43]:
#assets_p_t_ts = pd.DataFrame(assets_raw.result.rows)
#assets_p_t_ts['dt'] = pd.to_datetime(assets_p_t_ts['dt'])
#assets_p_t_ts.set_index('dt', inplace=True)

In [44]:
#assets_p_t_ts.head()

In [45]:
#assets_p_t_ts.to_csv(as_path)

In [46]:
as_path = '../data/csv/as.csv'

In [47]:
as_csv = pd.read_csv(as_path, index_col='dt', parse_dates=True)

In [48]:
as_csv = as_csv.drop(columns=['total_asset'])

Daily Interest Revenues By Vault

In [49]:
#ir_v = fetch_dune_data(3567939) 

In [50]:
#ir_v['period'] = pd.to_datetime(ir_v['period'])
#ir_v.set_index('period', inplace=True)

In [51]:
#ir_v.head()

In [52]:
daily_int_path = '../data/csv/d_int.csv'

In [53]:
#ir_v.to_csv(daily_int_path)

In [54]:
ir_csv = pd.read_csv(daily_int_path, index_col='period', parse_dates=True)

In [55]:
ir_csv.shape[0]

22031

In [56]:

ir_csv = ir_csv.rename_axis('day')

 

In [57]:
ir_csv = ir_csv.rename(columns={'collateral':'ilk'})

In [58]:
ir_csv['ilk'].unique()

array(['ETH-A', 'ETH-B', 'ETH-C', 'WBTC-A', 'WBTC-B', 'WBTC-C',
       'WSTETH-A', 'WSTETH-B', 'RWA002-A', 'RWA013-A', 'DIRECT-SPARK-DAI',
       'RWA014-A', 'RWA005-A', 'RWA012-A', 'RWA015-A', 'RWA007-A',
       'RETH-A', 'RWA003-A', 'GUNIV3DAIUSDC2-A', 'CRVV1ETHSTETH-A',
       'USDC-B', 'LINK-A', 'MATIC-A', 'UNIV2USDCETH-A', 'GNO-A',
       'UNIV2DAIUSDC-A', 'YFI-A', 'RWA004-A', 'GUNIV3DAIUSDC1-A',
       'GUSD-A', 'PAXUSD-A', 'USDC-A', 'DIRECT-AAVEV2-DAI',
       'DIRECT-COMPV2-DAI', 'RWA008-A', 'RENBTC-A', 'MANA-A', 'RWA009-A',
       'RWA001-A', 'UNI-A', 'UNIV2DAIETH-A', 'UNIV2WBTCETH-A',
       'UNIV2WBTCDAI-A', 'RWA-001', 'UNIV2UNIETH-A', 'TUSD-A', 'USDP-A',
       'BAT-A', 'BAL-A', 'ZRX-A', 'COMP-A', 'PSM-GUSD-A', 'AAVE-A',
       'UNIV2LINKETH-A', 'KNC-A', 'LRC-A', 'PSM-USDC-A', 'UNIV2AAVEETH-A',
       'UNIV2DAIUSDT-A', 'UNIV2ETHUSDT-A', 'USDT-A', 'PSM-PAX-A',
       'RWA006-A', 'PAX-A', nan, 'USDC', 'SAI'], dtype=object)

In [59]:
top_vaults = ir_csv.groupby('ilk').sum().sort_values('daily_revenues', ascending=False)

In [60]:
#10 Most Revenue Generating Vaults
top_10_vaults = top_vaults.head(10)

In [61]:
top_10_vaults

Unnamed: 0_level_0,daily_revenues
ilk,Unnamed: 1_level_1
ETH-A,110769338.08915
WBTC-A,41088135.60381
WSTETH-A,22414721.27108
ETH-C,17297706.32896
ETH-B,15374386.82625
WSTETH-B,11554717.4949
USDC-A,7351192.95707
RWA013-A,3241981.73715
WBTC-C,2697915.3242
RWA012-A,2120499.1615


Dai Maturity Profile

Step-by-Step Process:
Tracking DAI Movements:

First, all transactions involving DAI are tracked to understand how DAI moves in and out of wallets. This includes both inflows (adding DAI to a wallet) and outflows (removing DAI from a wallet).
Defining Maturity Buckets:

Maturity buckets are predefined categories based on time durations, such as "1-day", "1-week", "1-month", "1-year", etc. Each bucket represents a hypothesis about how long DAI tends to stay put before being moved again.
Assigning Weights to Buckets:

Weights are assigned to each maturity bucket to reflect assumptions or historical observations about the distribution of DAI across these buckets. For example, if historically 30% of DAI is moved or used within a day, then the "1-day" bucket might get a weight of 0.30 (or 30%).
Applying Weights Based on Wallet Types:

DAI can be held in different types of wallets or contracts, each with its own expected behavior. For example, DAI in a savings contract (like DSR) might be considered more long-term ("1-year"), while DAI in a regular wallet might be more liquid ("1-day" or "1-week"). The weights applied to the DAI in these wallets reflect these expectations.
Calculating DAI Amounts per Bucket:

For each wallet (or DAI holding), the total amount of DAI is distributed across the maturity buckets based on the assigned weights. This means if a wallet has 100 DAI and the "1-day" bucket weight is 30%, then 30 DAI is considered to have a 1-day maturity.
The process is repeated for each wallet and each maturity bucket, based on the specific weights for that wallet type and the total DAI it holds.
Aggregating Across the Ecosystem:

Finally, to get the ecosystem-wide view, the amounts of DAI in each maturity bucket from all wallets are aggregated. This provides a snapshot of how much DAI is considered to be in each maturity bucket across the entire MakerDAO system at any given time.

In [62]:
#d_m = fetch_dune_data(907852)

In [63]:
#d_m['dt'] = pd.to_datetime(d_m['dt'])

In [64]:
#d_m

In [65]:

#d_m.to_csv(d_m_path)


In [66]:
d_m_path = '../data/csv/d_m.csv'

In [67]:
d_m_csv = pd.read_csv(d_m_path, index_col='dt', parse_dates=True)

In [68]:
print(d_m_csv.describe())

       Unnamed: 0             outflow    outflow_dai_only  \
count 9,528.00000         9,528.00000         9,528.00000   
mean  4,763.50000   730,298,227.76490   723,044,786.34445   
std   2,750.64102 1,172,359,348.26991 1,153,238,025.25812   
min       0.00000             0.00000             0.00000   
25%   2,381.75000   107,656,317.15838   107,636,256.58728   
50%   4,763.50000   323,980,629.04069   323,980,629.04069   
75%   7,145.25000   628,916,174.70223   628,916,174.70223   
max   9,527.00000 6,014,959,678.79310 5,952,992,110.29562   

       outflow_surplus_buffer         total_period  
count             9,456.00000          9,528.00000  
mean          7,308,670.66985  4,381,789,366.58941  
std          20,455,276.74713  2,882,217,947.64180  
min          -5,674,219.65270              0.00000  
25%                   0.00000  1,093,258,152.23960  
50%                   0.00000  5,189,669,321.25249  
75%                   0.00000  6,203,283,988.25880  
max          83,550,327.25

In [69]:
clean_dm = d_m_csv.drop(columns=['Unnamed: 0'])

In [70]:
clean_dm.describe()

Unnamed: 0,outflow,outflow_dai_only,outflow_surplus_buffer,total_period
count,9528.0,9528.0,9456.0,9528.0
mean,730298227.7649,723044786.34445,7308670.66985,4381789366.58941
std,1172359348.26991,1153238025.25812,20455276.74713,2882217947.6418
min,0.0,0.0,-5674219.6527,0.0
25%,107656317.15838,107636256.58728,0.0,1093258152.2396
50%,323980629.04069,323980629.04069,0.0,5189669321.25249
75%,628916174.70223,628916174.70223,0.0,6203283988.2588
max,6014959678.7931,5952992110.29562,83550327.25113,10118832636.48147


In [71]:
dai_maturity_df = d_m_csv.pivot_table(
    index='dt',  # or 'period' if your DataFrame's time column is named 'period'
    columns='maturity',
    values=['outflow', 'outflow_dai_only', 'outflow_surplus_buffer'],
    aggfunc='sum'  # or any other aggregation function that fits your needs
)

# Flatten the MultiIndex columns (optional, for cleaner column names)
dai_maturity_df.columns = ['_'.join(col).strip() for col in dai_maturity_df.columns.values]

# Reset the index if you want 'dt' back as a regular column
dai_maturity_df.reset_index(inplace=True)



In [72]:
dai_maturity_df.tail()

Unnamed: 0,dt,outflow_1-block,outflow_1-day,outflow_1-month,outflow_1-week,outflow_1-year,outflow_3-months,outflow_dai_only_1-block,outflow_dai_only_1-day,outflow_dai_only_1-month,outflow_dai_only_1-week,outflow_dai_only_1-year,outflow_dai_only_3-months,outflow_surplus_buffer_1-block,outflow_surplus_buffer_1-day,outflow_surplus_buffer_1-month,outflow_surplus_buffer_1-week,outflow_surplus_buffer_1-year,outflow_surplus_buffer_3-months
1583,2024-03-16 00:00:00+00:00,453757132.16554,453757132.16554,136277964.29642,591913771.82219,2850066310.39846,201065848.96193,453757132.16554,453757132.16554,136277964.29642,591913771.82219,2781519501.85289,201065848.96193,0.0,0.0,0.0,0.0,68546808.54557,0.0
1584,2024-03-17 00:00:00+00:00,455307007.69644,455307007.69644,137425908.656,596032698.73347,2849349624.77721,202759537.36131,455307007.69644,455307007.69644,137425908.656,596032698.73347,2780461988.11065,202759537.36131,0.0,0.0,0.0,0.0,68887636.66657,0.0
1585,2024-03-18 00:00:00+00:00,461095932.80881,461095932.80881,140439203.62959,607501207.94779,2863252281.52928,207205382.40431,461095932.80881,461095932.80881,140439203.62959,607501207.94779,2796227598.03142,207205382.40431,0.0,0.0,0.0,0.0,67024683.49786,0.0
1586,2024-03-19 00:00:00+00:00,455524838.31873,455524838.31873,137117144.95605,595167015.96846,2854314452.32749,202303984.36139,455524838.31873,455524838.31873,137117144.95605,595167015.96846,2787586179.25529,202303984.36139,0.0,0.0,0.0,0.0,66728273.0722,0.0
1587,2024-03-20 00:00:00+00:00,451479837.89191,451479837.89191,135641898.56847,589090233.01061,2833931375.48651,200127391.33053,451479837.89191,451479837.89191,135641898.56847,589090233.01061,2766819019.00652,200127391.33053,0.0,0.0,0.0,0.0,67112356.47999,0.0


In [73]:
dai_maturity_df['dt'] = pd.to_datetime(dai_maturity_df['dt'])
dai_maturity_df.set_index('dt', inplace=True)


start_date = dai_maturity_df.index.min()
end_date = dai_maturity_df.index.max()
date_range = pd.date_range(start=start_date, end=end_date, freq='D')  # 'D' for daily frequency




In [74]:
dai_maturity_df_reindexed = dai_maturity_df.reindex(date_range)

# Forward-fill missing values
dai_maturity_df_reindexed.ffill(inplace=True)

# Optionally, reset the index if you want 'dt' back as a column
dai_maturity_df_reindexed.reset_index(inplace=True)
dai_maturity_df_reindexed.rename(columns={'index': 'day'}, inplace=True)

In [75]:
dai_maturity_df_reindexed.columns = [f'dai_maturity_{col}' if col != 'day' and not col.startswith('dai_maturity_') else col for col in dai_maturity_df_reindexed.columns]

In [76]:
dai_maturity_df_reindexed.tail()

Unnamed: 0,day,dai_maturity_outflow_1-block,dai_maturity_outflow_1-day,dai_maturity_outflow_1-month,dai_maturity_outflow_1-week,dai_maturity_outflow_1-year,dai_maturity_outflow_3-months,dai_maturity_outflow_dai_only_1-block,dai_maturity_outflow_dai_only_1-day,dai_maturity_outflow_dai_only_1-month,dai_maturity_outflow_dai_only_1-week,dai_maturity_outflow_dai_only_1-year,dai_maturity_outflow_dai_only_3-months,dai_maturity_outflow_surplus_buffer_1-block,dai_maturity_outflow_surplus_buffer_1-day,dai_maturity_outflow_surplus_buffer_1-month,dai_maturity_outflow_surplus_buffer_1-week,dai_maturity_outflow_surplus_buffer_1-year,dai_maturity_outflow_surplus_buffer_3-months
1585,2024-03-16 00:00:00+00:00,453757132.16554,453757132.16554,136277964.29642,591913771.82219,2850066310.39846,201065848.96193,453757132.16554,453757132.16554,136277964.29642,591913771.82219,2781519501.85289,201065848.96193,0.0,0.0,0.0,0.0,68546808.54557,0.0
1586,2024-03-17 00:00:00+00:00,455307007.69644,455307007.69644,137425908.656,596032698.73347,2849349624.77721,202759537.36131,455307007.69644,455307007.69644,137425908.656,596032698.73347,2780461988.11065,202759537.36131,0.0,0.0,0.0,0.0,68887636.66657,0.0
1587,2024-03-18 00:00:00+00:00,461095932.80881,461095932.80881,140439203.62959,607501207.94779,2863252281.52928,207205382.40431,461095932.80881,461095932.80881,140439203.62959,607501207.94779,2796227598.03142,207205382.40431,0.0,0.0,0.0,0.0,67024683.49786,0.0
1588,2024-03-19 00:00:00+00:00,455524838.31873,455524838.31873,137117144.95605,595167015.96846,2854314452.32749,202303984.36139,455524838.31873,455524838.31873,137117144.95605,595167015.96846,2787586179.25529,202303984.36139,0.0,0.0,0.0,0.0,66728273.0722,0.0
1589,2024-03-20 00:00:00+00:00,451479837.89191,451479837.89191,135641898.56847,589090233.01061,2833931375.48651,200127391.33053,451479837.89191,451479837.89191,135641898.56847,589090233.01061,2766819019.00652,200127391.33053,0.0,0.0,0.0,0.0,67112356.47999,0.0


MakerDAO Stablecoin Ratio
This can give insights into the proportion of assets held in stablecoins (including DAI) relative to other assets. A higher stablecoin ratio might suggest a preference for stability within the MakerDAO system, which can have implications for DAI's stability.

In [77]:
#stablecoin_ratio_df = fetch_dune_data(58136)

In [78]:
#stablecoin_ratio_df['dt'] = pd.to_datetime(stablecoin_ratio_df['dt'])
#stablecoin_ratio_df.set_index('dt', inplace=True)

In [79]:
#stablecoin_ratio_df.to_csv(stablecoin_ratio_path)

In [80]:
stablecoin_ratio_path = '../data/csv/srp.csv'

In [81]:
stablecoin_ratio_csv = pd.read_csv(stablecoin_ratio_path, index_col='dt', parse_dates=True)

In [82]:
stablecoin_ratio_csv.head()

Unnamed: 0_level_0,Unnamed: 0,Unnamed: 2_level_0,assets_size,assets_stablecoins,assets_usdc,stablecoins_ratio,usdc_ratio
dt,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2024-03-21 00:00:00+00:00,0,1.0,4692077547.48832,510492745.87887,509792133.9771,0.1088,0.10865
2024-03-20 00:00:00+00:00,1,1.0,4665516334.76502,451367053.86688,450666441.96511,0.09675,0.0966
2024-03-19 00:00:00+00:00,2,1.0,4703718034.83595,404362171.81837,403655435.55115,0.08597,0.08582
2024-03-18 00:00:00+00:00,3,1.0,4744355701.71368,399931319.66186,399224583.39463,0.0843,0.08415
2024-03-17 00:00:00+00:00,4,1.0,4699947545.50595,632391001.18623,631684264.919,0.13455,0.1344


In [83]:
stable_coin_ratios = stablecoin_ratio_csv[['stablecoins_ratio','usdc_ratio']]

In [84]:
stable_coin_ratios = stable_coin_ratios.rename_axis('day')

In [85]:
stable_coin_ratios.index

DatetimeIndex(['2024-03-21 00:00:00+00:00', '2024-03-20 00:00:00+00:00',
               '2024-03-19 00:00:00+00:00', '2024-03-18 00:00:00+00:00',
               '2024-03-17 00:00:00+00:00', '2024-03-16 00:00:00+00:00',
               '2024-03-15 00:00:00+00:00', '2024-03-14 00:00:00+00:00',
               '2024-03-13 00:00:00+00:00', '2024-03-12 00:00:00+00:00',
               ...
               '2019-11-27 00:00:00+00:00', '2019-11-26 00:00:00+00:00',
               '2019-11-25 00:00:00+00:00', '2019-11-24 00:00:00+00:00',
               '2019-11-23 00:00:00+00:00', '2019-11-22 00:00:00+00:00',
               '2019-11-21 00:00:00+00:00', '2019-11-20 00:00:00+00:00',
               '2019-11-19 00:00:00+00:00', '2019-11-18 00:00:00+00:00'],
              dtype='datetime64[ns, UTC]', name='day', length=1586, freq=None)

Maker Peg Stability Module Stats
Data on the Peg Stability Module, which helps maintain DAI's peg to the USD, can be vital. Insights into the inflows, outflows, and balances within the PSM can directly indicate efforts to stabilize DAI.

In [86]:
#psm_stats_df = fetch_dune_data(17216)

In [87]:
#psm_stats_df['date'] = pd.to_datetime(psm_stats_df['date'])

In [88]:
psm_stats_path = '../data/csv/psm.csv'

In [89]:
#psm_stats_df.to_csv(psm_stats_path)

In [90]:
psm_stats_csv = pd.read_csv(psm_stats_path, index_col='date', parse_dates=True)

In [91]:
psm_stats_csv.columns = [f'psm_{col}' if col != 'dt' and not col.startswith('psm_') else col for col in psm_stats_csv.columns]


In [92]:
psm_stats_csv = psm_stats_csv.drop(columns=['psm_Unnamed: 0'])

In [93]:
psm_stats_csv = psm_stats_csv.rename_axis('day')

In [94]:
psm_stats_csv.columns

Index(['psm_change', 'psm_change_excl_rwa', 'psm_change_excl_rwa_30d_avg',
       'psm_change_excl_rwa_7d_avg', 'psm_fees', 'psm_inflow',
       'psm_inflow_exl_rwa', 'psm_lifetime_fees', 'psm_lifetime_turnover',
       'psm_outflow', 'psm_balance', 'psm_turnover'],
      dtype='object')

In [95]:
psm_stats_csv[['psm_balance','psm_fees','psm_inflow','psm_outflow','psm_turnover']].describe()

Unnamed: 0,psm_balance,psm_fees,psm_inflow,psm_outflow,psm_turnover
count,1179.0,1102.0,1102.0,1102.0,1102.0
mean,2495919311.98562,4037.43702,45752972.37799,45289730.32183,91042702.69982
std,1628328770.12384,17575.39274,71460076.00738,62717895.80422,116889284.79574
min,1.0,0.0,0.0,0.0,1.0
25%,673188300.20901,0.0,12117870.97887,9329900.9738,28952809.73599
50%,3041213026.5004,0.0,26787127.56153,29106242.25453,59685795.04156
75%,3652875373.77239,0.32211,54102189.61512,60721834.75167,114257782.06707
max,5965535187.97622,329095.82767,1399943587.76463,828699035.76155,2005261976.79404


Where is dai lets us know how dai is being used; dai in lending could be considered to add to stability 

In [96]:
#where_is_dai_df = fetch_dune_data(54599)

In [97]:
#where_is_dai_df['dt'] = pd.to_datetime(where_is_dai_df['dt'])

In [98]:
wid_path = '../data/csv/wid.csv'

In [99]:
#where_is_dai_df.to_csv(wid_path)

In [100]:
where_is_dai_csv = pd.read_csv(wid_path, index_col='dt', parse_dates=True)

In [101]:
where_is_dai_csv = where_is_dai_csv.rename_axis('day')

In [102]:
print(where_is_dai_csv.describe())

        Unnamed: 0             balance        total_balance
count 11,112.00000        11,112.00000         11,112.00000
mean   5,555.50000   623,630,370.48427  4,989,042,963.87414
std    3,207.90243   885,053,602.90604  2,513,963,972.77145
min        0.00000             0.00000    113,631,008.03312
25%    2,777.75000    37,929,286.63720  4,426,586,805.75519
50%    5,555.50000   252,756,050.88655  5,195,896,461.42494
75%    8,333.25000   686,266,921.16145  6,378,627,017.74390
max   11,111.00000 4,078,699,922.32103 10,080,785,059.40782


In [103]:
where_is_dai_csv.columns = [f'where_is_dai_{col}' if col != 'day' and not col.startswith('where_is_dai_') else col for col in where_is_dai_csv.columns]

In [104]:
where_is_dai_csv = where_is_dai_csv.drop(columns=['where_is_dai_Unnamed: 0'])

In [105]:
where_is_dai_csv.shape[0]

11112

In [106]:
where_is_dai_csv_table = where_is_dai_csv.pivot_table(values='where_is_dai_balance', index='day', columns='where_is_dai_wallet', aggfunc='sum')

In [107]:
where_is_dai_csv_table.describe()

where_is_dai_wallet,Bridge,CeFi,Dai Savings,Dex,EOA,Lending,Other,Treasury
count,1389.0,1389.0,1389.0,1389.0,1389.0,1389.0,1389.0,1389.0
mean,519571584.3381,97793318.98787,311612990.82754,759685160.26328,2313033791.12395,583815304.12475,384965889.60998,18564924.59866
std,479488038.35103,78415911.28473,518564427.53373,660249624.37196,1133804050.65508,565104617.61903,199111750.49807,22963663.96745
min,91.54365,6032365.69533,1167096.23327,5969545.92583,70539903.52468,2382557.63369,7624088.19928,0.0
25%,254354292.48149,22517650.67099,12215114.08115,242906003.63346,1797171880.5804,86534442.3262,256075295.19992,0.0
50%,452459560.65111,83466578.33732,90963507.52642,563571678.16712,2869541236.00474,379804255.54487,384559660.5049,19322535.41463
75%,611193769.10517,161210994.77376,253531898.07589,1035302796.12233,3126200483.95878,1051192009.32218,539753467.20927,27500346.82052
max,2131833654.66169,311873211.3831,1750756886.51841,2603174883.48654,4078699922.32103,1854438972.60497,829237365.55131,155646703.5267


In [108]:
where_is_dai_csv_table.columns = [f'where_is_dai_{col}' if col != 'day' and not col.startswith('where_is_dai_') else col for col in where_is_dai_csv_table.columns]

In [109]:
where_is_dai_csv_table.shape[0]

1389

Daily surplus buffer
Provides information on the surplus buffer in MakerDAO, which is a key financial metric. The surplus buffer acts as a reserve to cover potential system shortfalls and ensures the stability and solvency of the system. This data could be valuable for understanding the financial health and risk management strategies of MakerDAO over time

In [110]:
#daily_surplus_buffer = fetch_dune_data(3567837)

In [111]:
#daily_surplus_buffer['period'] = pd.to_datetime(daily_surplus_buffer['period'])

In [112]:
dsb_path = '../data/csv/dsb.csv'
#daily_surplus_buffer.to_csv(dsb_path)
daily_surplus_buffer_csv = pd.read_csv(dsb_path, index_col='period', parse_dates=True)

In [113]:
daily_surplus_buffer_csv = daily_surplus_buffer_csv.drop(columns=['Unnamed: 0'])

In [114]:
daily_surplus_buffer_csv.describe()

Unnamed: 0,delta_30d,delta_90d,surplus_buffer
count,1579.0,1519.0,1609.0
mean,11341558.39501,11070966.35479,42392130.19224
std,53417133.65263,39917755.40227,30073000.80117
min,-210344528.51881,-93067840.03924,-5674219.91199
25%,-10729783.97915,-10868500.09631,4001728.35768
50%,2241898.68637,3942382.84675,50260545.40284
75%,42026961.48864,32613114.2774,69818697.28523
max,154842096.11319,122533492.57577,83553170.97114


In [115]:
daily_surplus_buffer_csv.columns = [f'daily_surplus_buffer_{col}' if col != 'period' and not col.startswith('surplus_buffer') else col for col in daily_surplus_buffer_csv.columns]

In [116]:
daily_surplus_buffer_csv = daily_surplus_buffer_csv.rename(columns={'surplus_buffer':'daily_surplus_buffer'})


In [117]:
daily_surplus_buffer_csv = daily_surplus_buffer_csv.rename_axis('day')

In [118]:
daily_surplus_buffer_csv.head()

Unnamed: 0_level_0,daily_surplus_buffer_delta_30d,daily_surplus_buffer_delta_90d,daily_surplus_buffer
day,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2024-03-27 00:00:00+00:00,-118286288.52743,-23073880.39679,42679254.97383
2024-03-26 00:00:00+00:00,-117968788.92971,-22860798.29552,42972541.23601
2024-03-25 00:00:00+00:00,-114559431.39742,-21963371.45807,43449261.65617
2024-03-24 00:00:00+00:00,-110361924.94238,-20375056.07408,44097497.34994
2024-03-23 00:00:00+00:00,-105856419.39971,-19278637.61949,44733525.24447


Stability Fee history and Vault History - Rates Set by DAO
dart = debt balance

In [119]:
#sf_df = fetch_dune_data(3551110)

In [120]:
#sf_df['period'] = pd.to_datetime(sf_df['period'])

In [121]:
sf_path = '../data/csv/sf.csv'
#sf_df.to_csv(sf_path)
sf_history_csv = pd.read_csv(sf_path, index_col='period', parse_dates=True)

In [122]:
sf_history_csv

Unnamed: 0_level_0,Unnamed: 0,annualized,annualized_revenues,dart,ilk,total_ann_revenues
period,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2024-03-21 00:00:00+00:00,0,0.04900,0.00000,0.00000,GNO-A,176643660.32374
2024-03-21 00:00:00+00:00,1,0.00060,42545.92718,70910068.43688,GUNIV3DAIUSDC2-A,176643660.32374
2024-03-21 00:00:00+00:00,2,0.02500,-0.00000,-0.00000,LINK-A,176643660.32374
2024-03-21 00:00:00+00:00,3,0.01500,-0.00000,-0.00000,YFI-A,176643660.32374
2024-03-21 00:00:00+00:00,4,0.01000,0.00000,0.00000,COMP-A,176643660.32374
...,...,...,...,...,...,...
2019-11-01 00:00:00+00:00,95411,,,,WBTC-A,
2019-11-01 00:00:00+00:00,95412,,,,UNIV2UNIETH-A,
2019-11-01 00:00:00+00:00,95413,,,,RWA005-A,
2019-11-01 00:00:00+00:00,95414,,,,WSTETH-B,


In [123]:
sf_history_csv_reset = sf_history_csv.reset_index()

In [124]:
sf_history_csv_clean = sf_history_csv_reset.drop_duplicates(subset=['period', 'ilk'], keep='last')

In [125]:
sf_history_csv_clean = sf_history_csv_clean.drop(columns='Unnamed: 0')

In [126]:
sf_history_csv_clean['period'] = sf_history_csv_clean['period'].dt.date

In [127]:
sf_history_csv_clean = sf_history_csv_clean.rename(columns={'period':'day'})

DAI Savings Rate Historical - Set by DAO

In [128]:
#dsr_rate = fetch_dune_data(3581248)

In [129]:
dsr_rate_path = '../data/csv/dsr.csv'
#dsr_rate.to_csv(dsr_rate_path)
dsr_rate_csv = pd.read_csv(dsr_rate_path, index_col='dt', parse_dates=True)

In [130]:
dsr_rate_csv['dsr_rate'].describe()

count   1,552.00000
mean        0.01459
std         0.02789
min         0.00000
25%         0.00010
50%         0.00010
75%         0.01000
max         0.15000
Name: dsr_rate, dtype: float64

In [131]:
dsr_rate_csv = dsr_rate_csv.drop(columns=['Unnamed: 0'])

In [132]:
dsr_rate_csv['dai_percent_in_dsr'] = dsr_rate_csv['dsr_balance'] / dsr_rate_csv['total_balance']
dsr_rate_csv['dai_percent_out_dsr'] = dsr_rate_csv['non_dsr_balance'] / dsr_rate_csv['total_balance']

In [133]:
dsr_rate_csv = dsr_rate_csv.rename_axis('day')

In [134]:
print(dsr_rate_csv.describe())

              dsr_balance    dsr_rate     non_dsr_balance  \
count         1,552.00000 1,552.00000         1,552.00000   
mean    293,268,678.28110     0.01459 4,181,101,606.37370   
std     508,781,220.44869     0.02789 2,795,421,701.76920   
min       1,167,096.23327     0.00000    42,257,176.08489   
25%      13,831,047.69404     0.00010 1,494,884,857.88371   
50%      78,372,158.20967     0.00010 4,361,860,196.87080   
75%     208,537,359.70928     0.01000 6,149,889,671.03570   
max   1,750,756,886.51841     0.15000 9,944,328,573.42844   

             total_balance  dai_percent_in_dsr  dai_percent_out_dsr  
count          1,552.00000         1,552.00000          1,552.00000  
mean   4,474,370,284.65481             0.12679              0.87321  
std    2,786,126,412.57020             0.16384              0.16384  
min       72,614,957.57550             0.00015              0.37988  
25%    1,501,681,100.28274             0.00701              0.71799  
50%    5,130,678,028.50355    

dsr flows

In [135]:
#dsr_flows = fetch_dune_data(1753750)

In [136]:
dsr_flows_path='../data/csv/dsr_flows.csv'
#dsr_flows.to_csv(dsr_flows_path)
dsr_flows_csv = pd.read_csv(dsr_flows_path, index_col='period', parse_dates=True)

In [137]:
dsr_flows_csv.shape[0]

581

In [138]:
dsr_flows_csv = dsr_flows_csv.drop(columns=['Unnamed: 0','balance'])
dsr_flows_csv = dsr_flows_csv.rename_axis('day')

In [139]:
dsr_flows_csv.columns = [f'dsr_{col}' if col != 'day' and not col.startswith('surplus_buffer') else col for col in dsr_flows_csv.columns]

In [140]:
dsr_df = dsr_flows_csv.merge(dsr_rate_csv, on=['day'], how='inner')

In [141]:
dsr_df = dsr_df.rename(columns={'total_balance':'dai_total_balance'})
dsr_df = dsr_df.rename(columns={'non_dsr_balance':'dai_circulating'})

In [142]:
dsr_df.describe()

Unnamed: 0,dsr_inflow,dsr_interest,dsr_outflow,dsr_balance,dsr_rate,dai_circulating,dai_total_balance,dai_percent_in_dsr,dai_percent_out_dsr
count,199.0,420.0,126.0,581.0,581.0,581.0,581.0,581.0,581.0
mean,18211028.03099,128169.53472,17498729.01588,632212219.96053,0.02877,4584444424.95372,5216656644.91426,0.12307,0.87693
std,34118088.2212,126667.15813,43686010.0729,689782183.06167,0.03181,895024557.86261,495795966.5934,0.13229,0.13229
min,1e-05,0.11757,0.0075,1167096.23327,0.0001,3182053005.07452,4336565083.61191,0.00018,0.67419
25%,1995736.74851,3895.16291,625555.75381,35775627.7727,0.0001,3756055150.82835,4854191351.0048,0.0069,0.72043
50%,8698729.53951,156761.36393,6453848.9622,109586897.55483,0.01,4542041297.78562,5174925449.48353,0.02178,0.97822
75%,19085676.63882,210194.811,14916349.43764,1434744378.0496,0.05,5136076330.00988,5391627556.52055,0.27957,0.9931
max,298378513.72837,591988.67214,337868152.03203,1750756886.51841,0.15,6549164445.71382,6550365936.14021,0.32581,0.99982


In [143]:
#cum_bal_and_safetyprice_and_safetyvalue = vault_stats_6_20_through_6_21 

In [144]:
cumbal_stats_path ='../data/csv/cumbal.csv'
#cum_bal_and_safetyprice_and_safetyvalue.to_csv(cumbal_stats_path)
cumbal_csv = pd.read_csv(cumbal_stats_path, index_col = 'day', parse_dates=True)

In [145]:
#debtbal_lpenalty_lratio = fetch_dune_data(3568425)

In [146]:
debtbal_lpenalty_lratio_path = '../data/csv/debtbal_lpenalty_lratio.csv'
#debtbal_lpenalty_lratio.to_csv(debtbal_lpenalty_lratio_path)
debtbal_lpenalty_lratio_path_csv = pd.read_csv(debtbal_lpenalty_lratio_path, index_col = 'day', parse_dates=True)

In [147]:
debtbal_lpenalty_lratio_path_csv.head()

Unnamed: 0_level_0,Unnamed: 0,debt_balance,ilk,liquidation_penalty,liquidation_ratio
day,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2024-03-27,0,0.0,BAL-A,0.0,7.48256
2024-03-27,1,-0.0,KNC-A,0.0,12.28093
2024-03-27,2,300484322.01086,ETH-C,0.13,1.7
2024-03-27,3,0.0,GUSD-A,0.13,15.0
2024-03-27,4,0.0,BAT-A,0.0,47.50017


In [148]:
debtbal_lpenalty_lratio_path_csv_reset = debtbal_lpenalty_lratio_path_csv.reset_index()
debtbal_lpenalty_lratio_path_csv_clean = debtbal_lpenalty_lratio_path_csv_reset.drop_duplicates(subset=['day', 'ilk'], keep='last')

In [149]:
debtbal_lpenalty_lratio_path_csv_clean = debtbal_lpenalty_lratio_path_csv_clean.drop(columns='Unnamed: 0')

In [150]:
#dceiling_dfloor_scratio = fetch_dune_data(3568438)

In [151]:
dceiling_dfloor_scratio_path = '../data/csv/dceiling_dfloor_scratio.csv'
#dceiling_dfloor_scratio.to_csv(dceiling_dfloor_scratio_path)
dceiling_dfloor_scratio_csv = pd.read_csv(dceiling_dfloor_scratio_path, index_col = 'day', parse_dates=True)

In [152]:
#vault_market_price = fetch_dune_data(3568453)

In [153]:
vault_market_price_path = '../data/csv/vault_market_price.csv'
#vault_market_price.to_csv(vault_market_price_path)
vault_market_price_csv = pd.read_csv(vault_market_price_path, index_col = 'day', parse_dates=True)

In [154]:
vault_market_price_csv.head()

Unnamed: 0_level_0,Unnamed: 0,ilk,market_price
day,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2024-03-27,0,GUNIV3DAIUSDC2-A,203.38646
2024-03-27,1,BAL-A,21.63759
2024-03-27,2,LINK-A,6.27903
2024-03-27,3,UNIV2WBTCDAI-A,31062885.00401
2024-03-27,4,WBTC-A,69767.855


In [155]:
vault_market_price_csv_reset = vault_market_price_csv.reset_index()
vault_market_price_csv_clean = vault_market_price_csv_reset.drop_duplicates(subset=['day', 'ilk'], keep='last')

In [156]:
vault_market_price_csv_clean = vault_market_price_csv_clean.drop(columns=['Unnamed: 0'])

In [157]:
cumbal_csv_reset = cumbal_csv.reset_index()
dceiling_dfloor_scratio_csv_reset = dceiling_dfloor_scratio_csv.reset_index()

In [158]:
cumbal_csv_clean = cumbal_csv_reset.drop_duplicates(subset=['day', 'ilk'], keep='last')
dceiling_dfloor_scratio_csv_clean = dceiling_dfloor_scratio_csv_reset.drop_duplicates(subset=['day', 'ilk'], keep='last')

In [159]:
cumbal_csv_clean = cumbal_csv_clean.drop(columns=['Unnamed: 0'])

In [160]:
dceiling_dfloor_scratio_csv_clean = dceiling_dfloor_scratio_csv_clean.drop(columns=['Unnamed: 0'])

In [161]:
comprehensive_vault_stats = pd.merge(cumbal_csv_clean, dceiling_dfloor_scratio_csv_clean, on=['day', 'ilk'], how='inner')

In [162]:
comprehensive_vault_stats = comprehensive_vault_stats.merge(vault_market_price_csv_clean, on=['day', 'ilk'], how='inner' )

In [163]:
comprehensive_vault_stats = comprehensive_vault_stats.merge(debtbal_lpenalty_lratio_path_csv_clean, on=['day', 'ilk'], how='inner')

In [164]:
comprehensive_vault_stats['day'] = pd.to_datetime(comprehensive_vault_stats['day'])
sf_history_csv_clean['day'] = pd.to_datetime(sf_history_csv_clean['day'])

In [165]:
ir_csv.columns

Index(['ilk', 'daily_revenues'], dtype='object')

In [166]:
comprehensive_vault_stats.columns

Index(['day', 'cumulative_collateral', 'ilk', 'safety_price',
       'usd_safety_value', 'dai_ceiling', 'dai_floor',
       'safety_collateral_ratio', 'market_price', 'debt_balance',
       'liquidation_penalty', 'liquidation_ratio'],
      dtype='object')

In [167]:
comprehensive_vault_stats = comprehensive_vault_stats.merge(ir_csv, on=['day', 'ilk'], how='inner')

In [168]:
comprehensive_vault_stats = comprehensive_vault_stats.merge(sf_history_csv_clean, on=['day', 'ilk'], how='inner')

In [169]:
def localize_or_convert(df, column_name, timezone='UTC'):
    # Check if the first datetime object in the column is naive or aware
    if df[column_name].dt.tz is None:
        # If naive, use tz_localize
        df[column_name] = df[column_name].dt.tz_localize(timezone)
    else:
        # If aware, use tz_convert
        df[column_name] = df[column_name].dt.tz_convert(timezone)
    return df

# Apply the function to your DataFrames



In [170]:
comprehensive_vault_stats = localize_or_convert(comprehensive_vault_stats, 'day')
dai_maturity_df_reindexed = localize_or_convert(dai_maturity_df_reindexed, 'day')

In [171]:
stable_coin_ratios.index

DatetimeIndex(['2024-03-21 00:00:00+00:00', '2024-03-20 00:00:00+00:00',
               '2024-03-19 00:00:00+00:00', '2024-03-18 00:00:00+00:00',
               '2024-03-17 00:00:00+00:00', '2024-03-16 00:00:00+00:00',
               '2024-03-15 00:00:00+00:00', '2024-03-14 00:00:00+00:00',
               '2024-03-13 00:00:00+00:00', '2024-03-12 00:00:00+00:00',
               ...
               '2019-11-27 00:00:00+00:00', '2019-11-26 00:00:00+00:00',
               '2019-11-25 00:00:00+00:00', '2019-11-24 00:00:00+00:00',
               '2019-11-23 00:00:00+00:00', '2019-11-22 00:00:00+00:00',
               '2019-11-21 00:00:00+00:00', '2019-11-20 00:00:00+00:00',
               '2019-11-19 00:00:00+00:00', '2019-11-18 00:00:00+00:00'],
              dtype='datetime64[ns, UTC]', name='day', length=1586, freq=None)

In [172]:
comprehensive_vault_stats.tail()

Unnamed: 0,day,cumulative_collateral,ilk,safety_price,usd_safety_value,dai_ceiling,dai_floor,safety_collateral_ratio,market_price,debt_balance,liquidation_penalty,liquidation_ratio,daily_revenues,annualized,annualized_revenues,dart,total_ann_revenues
21747,2019-11-15 00:00:00+00:00,0.42,ETH-A,119.74059,50.29105,0.0,20.0,0.0,,,,,0.0,0.04,,,
21748,2019-11-15 00:00:00+00:00,10.0,BAT-A,0.16605,1.66048,0.0,20.0,0.0,,,,,0.0,0.04,,,
21749,2019-11-13 00:00:00+00:00,,SAI,0.0,,0.0,0.0,0.0,,,,,,0.0,,,
21750,2019-11-13 00:00:00+00:00,0.42,ETH-A,0.0,0.0,0.0,20.0,0.0,,,,,0.0,0.04,,,
21751,2019-11-13 00:00:00+00:00,10.0,BAT-A,0.0,0.0,0.0,20.0,0.0,,,,,0.0,0.04,,,


In [173]:
comprehensive_vault_stats.columns

Index(['day', 'cumulative_collateral', 'ilk', 'safety_price',
       'usd_safety_value', 'dai_ceiling', 'dai_floor',
       'safety_collateral_ratio', 'market_price', 'debt_balance',
       'liquidation_penalty', 'liquidation_ratio', 'daily_revenues',
       'annualized', 'annualized_revenues', 'dart', 'total_ann_revenues'],
      dtype='object')

In [174]:
comprehensive_vault_stats = comprehensive_vault_stats.rename(columns={'annualized':'annualized stability fee'})

In [175]:
def determine_status(row):
    # The vault is considered closed if 'dai_ceiling' is 0
    # This includes the first day if 'dai_ceiling' starts at 0 or if it drops to 0 from a nonzero value
    if pd.isnull(row['prev_dai_ceiling']) and row['dai_ceiling'] == 0:
        return 'Closed'
    elif row['prev_dai_ceiling'] >= 0 and row['dai_ceiling'] == 0:
        return 'Closed'
    # Check if 'safety_price' is 0
    elif row['safety_price'] == 0:
        return 'Closed'
    else:
        return 'Open'

In [176]:
comprehensive_vault_stats['market_price'] = np.where(comprehensive_vault_stats['liquidation_ratio'].isnull(), comprehensive_vault_stats['safety_price'], comprehensive_vault_stats['market_price'])

In [177]:
comprehensive_vault_stats = comprehensive_vault_stats.sort_values('day')
comprehensive_vault_stats['prev_dai_ceiling'] = comprehensive_vault_stats.groupby('ilk')['dai_ceiling'].shift(1)

In [178]:
comprehensive_vault_stats['status'] = comprehensive_vault_stats.apply(determine_status, axis=1)


In [179]:
comprehensive_vault_stats.columns

Index(['day', 'cumulative_collateral', 'ilk', 'safety_price',
       'usd_safety_value', 'dai_ceiling', 'dai_floor',
       'safety_collateral_ratio', 'market_price', 'debt_balance',
       'liquidation_penalty', 'liquidation_ratio', 'daily_revenues',
       'annualized stability fee', 'annualized_revenues', 'dart',
       'total_ann_revenues', 'prev_dai_ceiling', 'status'],
      dtype='object')

In [180]:
comprehensive_vault_stats['market_collateral_ratio'] = np.where(comprehensive_vault_stats['status']=='Open',((comprehensive_vault_stats['usd_safety_value'] *comprehensive_vault_stats['liquidation_ratio']) / comprehensive_vault_stats['debt_balance']), np.nan)
comprehensive_vault_stats['market_collateral_ratio'] = np.where(comprehensive_vault_stats['debt_balance']==0,0, comprehensive_vault_stats['market_collateral_ratio'])

comprehensive_vault_stats['safety_collateral_ratio'] = np.where(comprehensive_vault_stats['status']=='Open',comprehensive_vault_stats['safety_collateral_ratio'], np.nan)
comprehensive_vault_stats['safety_collateral_ratio'] = np.where(comprehensive_vault_stats['status'] == 'Closed', 0, comprehensive_vault_stats['safety_collateral_ratio'])
comprehensive_vault_stats['safety_collateral_ratio'] = np.where(comprehensive_vault_stats['debt_balance'] <= 0, 0, comprehensive_vault_stats['safety_collateral_ratio'])
comprehensive_vault_stats[['annualized_revenues', 'dart']] = np.where(comprehensive_vault_stats[['annualized_revenues', 'dart']] <= 0, 0, comprehensive_vault_stats[['annualized_revenues', 'dart']])

comprehensive_vault_stats['collateral_usd'] = comprehensive_vault_stats['usd_safety_value'] * comprehensive_vault_stats['liquidation_ratio']
comprehensive_vault_stats['hypothetical_dai_ceiling'] = comprehensive_vault_stats['collateral_usd']* (comprehensive_vault_stats['liquidation_ratio'] / 2)

In [181]:
#replace values when vault is closed to 0?

In [182]:
def clean_small_values(value, threshold=1e-8):
    """
    Set small negative and positive values within a specified threshold to zero.

    Parameters:
    - value: The value to be cleaned.
    - threshold: Values within +/- this threshold will be set to zero.

    Returns:
    - The cleaned value.
    """
    if -threshold < value < threshold:
        return 0.0
    else:
        return value

# Apply this function to the entire DataFrame
comprehensive_vault_stats[['cumulative_collateral','usd_safety_value','collateral_usd','hypothetical_dai_ceiling','debt_balance']] = comprehensive_vault_stats[['cumulative_collateral','usd_safety_value','collateral_usd','hypothetical_dai_ceiling','debt_balance']].applymap(lambda x: clean_small_values(x))


  comprehensive_vault_stats[['cumulative_collateral','usd_safety_value','collateral_usd','hypothetical_dai_ceiling','debt_balance']] = comprehensive_vault_stats[['cumulative_collateral','usd_safety_value','collateral_usd','hypothetical_dai_ceiling','debt_balance']].applymap(lambda x: clean_small_values(x))


In [183]:
comprehensive_vault_stats = comprehensive_vault_stats[comprehensive_vault_stats['ilk'] != 'SAI']

UNIV2ETHUSDT-A is an outlier and will be removed from dataset.  

In [184]:
comprehensive_vault_stats[comprehensive_vault_stats['safety_collateral_ratio'] < 0 ]

Unnamed: 0,day,cumulative_collateral,ilk,safety_price,usd_safety_value,dai_ceiling,dai_floor,safety_collateral_ratio,market_price,debt_balance,...,daily_revenues,annualized stability fee,annualized_revenues,dart,total_ann_revenues,prev_dai_ceiling,status,market_collateral_ratio,collateral_usd,hypothetical_dai_ceiling
12071,2021-08-08 00:00:00+00:00,0.0,UNIV2ETHUSDT-A,137635170.14814,0.0,2000000.0,10000.0,-12.94918,192689238.20739,0.0,...,4.86583,0.02,0.0,0.0,38032976.74097,2006530.97126,Open,-18.12886,0.0,0.0
12038,2021-08-09 00:00:00+00:00,0.0,UNIV2ETHUSDT-A,139168140.35708,0.0,2000000.0,10000.0,-13.09341,194835396.49991,0.0,...,4.74731,0.02,0.0,0.0,38121293.97276,2000000.0,Open,-18.33078,0.0,0.0
11980,2021-08-10 00:00:00+00:00,0.0,UNIV2ETHUSDT-A,140063017.89353,0.0,2000000.0,10000.0,-13.1776,196088225.05094,0.0,...,4.75025,0.02,0.0,0.0,38691927.82685,2000000.0,Open,-18.44865,0.0,0.0
11963,2021-08-11 00:00:00+00:00,0.0,UNIV2ETHUSDT-A,141384297.66638,0.0,2000000.0,10000.0,-13.30191,197938016.73293,0.0,...,4.72625,0.02,0.0,0.0,38773987.6761,2000000.0,Open,-18.62268,0.0,0.0
11918,2021-08-12 00:00:00+00:00,0.0,UNIV2ETHUSDT-A,137867965.99974,0.0,2000000.0,10000.0,-12.97109,193015152.39964,0.0,...,4.74149,0.02,0.0,0.0,40333600.8747,2000000.0,Open,-18.15952,0.0,0.0
11864,2021-08-13 00:00:00+00:00,0.0,UNIV2ETHUSDT-A,141650852.35091,0.0,2000000.0,10000.0,-13.32699,198311193.29127,0.0,...,4.74455,0.02,0.0,0.0,39855556.0794,2000000.0,Open,-18.65779,0.0,0.0
11843,2021-08-14 00:00:00+00:00,0.0,UNIV2ETHUSDT-A,142284082.27564,0.0,2000000.0,10000.0,-13.38657,199197715.1859,0.0,...,4.74283,0.02,0.0,0.0,40109465.52254,2000000.0,Open,-18.7412,0.0,0.0
11807,2021-08-15 00:00:00+00:00,0.0,UNIV2ETHUSDT-A,139997139.81457,0.0,2000000.0,10000.0,-13.17141,195995995.7404,0.0,...,4.74583,0.02,0.0,0.0,40161597.87318,2000000.0,Open,-18.43997,0.0,0.0
11765,2021-08-16 00:00:00+00:00,0.0,UNIV2ETHUSDT-A,142001139.88388,0.0,2000000.0,10000.0,-13.35995,198801595.83743,0.0,...,4.74373,0.02,0.0,0.0,40275499.77709,2000000.0,Open,-18.70393,0.0,0.0
11701,2021-08-17 00:00:00+00:00,0.0,UNIV2ETHUSDT-A,140896250.10819,0.0,2000000.0,10000.0,-13.256,197254750.15146,0.0,...,4.77617,0.02,0.0,0.0,40012479.04813,2000000.0,Open,-18.5584,0.0,0.0


In [185]:
comprehensive_vault_stats = comprehensive_vault_stats[comprehensive_vault_stats['ilk'] != 'UNIV2ETHUSDT-A']

In [186]:
comprehensive_vault_stats['debt_balance'].describe()

count          21,138.00000
mean      138,522,602.01186
std       387,265,830.48956
min            -2,528.51095
25%         1,403,187.57520
50%        15,818,062.07805
75%        88,412,143.19861
max     3,361,006,014.83486
Name: debt_balance, dtype: float64

In [187]:
#pd.set_option('display.max_columns', None)  # Show all columns
#pd.set_option('display.max_rows', None)  # Show all rows
#pd.set_option('display.max_colwidth', None)  # Show full content of each column

In [188]:
comprehensive_vault_stats['market_collateral_ratio'].describe()

count                    19,378.00000
mean        160,876,285,659,620.75000
std       7,377,037,911,158,396.00000
min     -32,429,545,705,439,232.00000
25%                           2.10807
50%                           3.13607
75%                           4.01436
max     324,295,457,054,392,320.00000
Name: market_collateral_ratio, dtype: float64

In [189]:
comprehensive_vault_stats['safety_collateral_ratio'].describe()

count                    21,501.00000
mean        144,095,140,219,423.31250
std       6,659,657,583,570,546.00000
min                           0.00000
25%                           1.40884
50%                           1.93921
75%                           2.49605
max     308,852,816,242,278,400.00000
Name: safety_collateral_ratio, dtype: float64

In [190]:
comprehensive_vault_stats[['day','ilk','market_collateral_ratio']][comprehensive_vault_stats['market_collateral_ratio'] > 3.31 ].sort_values('market_collateral_ratio')

Unnamed: 0,day,ilk,market_collateral_ratio
6980,2022-02-10 00:00:00+00:00,RENBTC-A,3.31031
13243,2021-07-06 00:00:00+00:00,RENBTC-A,3.31038
20676,2020-08-24 00:00:00+00:00,KNC-A,3.31044
845,2023-12-07 00:00:00+00:00,ETH-B,3.31056
2408,2023-05-19 00:00:00+00:00,RETH-A,3.31065
...,...,...,...
7720,2022-01-05 00:00:00+00:00,RWA003-A,324295457054392320.00000
8489,2021-11-30 00:00:00+00:00,RWA003-A,324295457054392320.00000
8141,2021-12-18 00:00:00+00:00,RWA003-A,324295457054392320.00000
7463,2022-01-15 00:00:00+00:00,RWA003-A,324295457054392320.00000


In [191]:
comprehensive_vault_stats[['day','ilk','safety_collateral_ratio']][comprehensive_vault_stats['safety_collateral_ratio'] > 100 ]

Unnamed: 0,day,ilk,safety_collateral_ratio
15803,2021-04-21 00:00:00+00:00,RWA002-A,149.50235
15727,2021-04-23 00:00:00+00:00,RWA001-A,32616554.13835
15494,2021-04-30 00:00:00+00:00,RWA001-A,32616554.13835
15464,2021-05-01 00:00:00+00:00,RWA001-A,32616554.13835
15421,2021-05-02 00:00:00+00:00,RWA001-A,32616554.13835
...,...,...,...
6015,2022-05-12 00:00:00+00:00,YFI-A,114.42917
5992,2022-05-13 00:00:00+00:00,YFI-A,111.95205
5637,2022-06-13 00:00:00+00:00,RWA003-A,113.27693
4516,2022-09-30 00:00:00+00:00,RWA003-A,9651650507571200.00000


In [192]:
comprehensive_vault_stats[['day','ilk','debt_balance']][comprehensive_vault_stats['debt_balance'] < 0 ]

Unnamed: 0,day,ilk,debt_balance
3051,2023-03-11 00:00:00+00:00,DIRECT-AAVEV2-DAI,-2528.51095


In [193]:
comprehensive_vault_stats = comprehensive_vault_stats[comprehensive_vault_stats['ilk'] != 'DIRECT-AAVEV2-DAI']

In [194]:
comprehensive_vault_stats[['day','ilk','debt_balance']][comprehensive_vault_stats['debt_balance'] < 0 ]

Unnamed: 0,day,ilk,debt_balance


In [195]:
comprehensive_vault_stats['status'].describe()

count     21499
unique        2
top        Open
freq      19896
Name: status, dtype: object

In [196]:
comprehensive_vault_stats[['liquidation_ratio','liquidation_penalty','annualized stability fee',
       'annualized_revenues', 'dart', 'total_ann_revenues',
       'prev_dai_ceiling']].describe()

Unnamed: 0,liquidation_ratio,liquidation_penalty,annualized stability fee,annualized_revenues,dart,total_ann_revenues,prev_dai_ceiling
count,21004.0,15665.0,21493.0,21136.0,21136.0,21491.0,21445.0
mean,1.48663,-0.15861,0.04059,3167088.51745,138541671.9347,72830970.89317,173188464.60203
std,2.19538,0.4928,0.07107,10267730.84294,387325542.98784,62927201.18464,470887247.03185
min,0.0,-1.0,0.0,0.0,0.0,767.83174,0.0
25%,1.25,-1.0,0.01,4979.85267,1402720.70489,29094709.61787,3306266.02121
50%,1.5,0.13,0.03,172814.73494,15890305.94887,52913177.34149,20000000.0
75%,1.75,0.13,0.045,1678781.64567,88457397.175,99900414.71195,120000000.0
max,100.0,0.13,0.5025,142209506.83116,3361006014.83486,351700458.60804,4361006014.83486


In [197]:
closed_vaults = comprehensive_vault_stats[comprehensive_vault_stats['status'] == 'Closed']

In [198]:
zero_balances = comprehensive_vault_stats[comprehensive_vault_stats['cumulative_collateral'] == 0]

In [199]:
usdt_a = comprehensive_vault_stats[comprehensive_vault_stats['ilk']=='USDT-A']

In [200]:
usdt_a.head()

Unnamed: 0,day,cumulative_collateral,ilk,safety_price,usd_safety_value,dai_ceiling,dai_floor,safety_collateral_ratio,market_price,debt_balance,...,daily_revenues,annualized stability fee,annualized_revenues,dart,total_ann_revenues,prev_dai_ceiling,status,market_collateral_ratio,collateral_usd,hypothetical_dai_ceiling
20599,2020-09-08 00:00:00+00:00,368.9,USDT-A,0.66688,246.01112,10000000.0,100.0,1.00009,1.00032,245.98824,...,0.01654,0.06,14.75929,245.98824,1855497.42474,,Open,1.50014,369.01668,276.76251
20588,2020-09-09 00:00:00+00:00,0.0,USDT-A,0.66677,0.0,10000000.0,100.0,0.0,1.00015,0.0,...,0.0,0.06,0.0,0.0,1763209.75651,10000000.0,Open,0.0,0.0,0.0
20567,2020-09-12 00:00:00+00:00,200.0,USDT-A,0.6669,133.38047,10000000.0,100.0,0.0,1.00035,0.0,...,0.11837,0.06,0.0,0.0,2227239.39218,10000000.0,Open,0.0,200.07071,150.05303
20561,2020-09-13 00:00:00+00:00,426826.21851,USDT-A,0.66697,284680.54059,10000000.0,100.0,1.49943,1.00046,189858.91329,...,20.62811,0.06,11391.53434,189858.91329,2223945.551,10000000.0,Open,2.24915,427020.81089,320265.60817
20558,2020-09-14 00:00:00+00:00,531480.10951,USDT-A,0.66749,354757.69091,10000000.0,100.0,1.41949,1.00124,249919.99878,...,44.86158,0.06,14995.19933,249919.99878,3317027.83408,10000000.0,Open,2.12923,532136.53637,399102.40228


In [201]:
zero_balances['ilk'].unique()

array(['USDT-A', 'UNIV2WBTCDAI-A', 'UNIV2DAIUSDT-A', 'RWA008-A'],
      dtype=object)

In [202]:
closed_vaults['ilk'].unique()

array(['BAT-A', 'ETH-A', 'LRC-A', 'COMP-A', 'LINK-A', 'ETH-B', 'BAL-A',
       'GUSD-A', 'TUSD-A', 'UNIV2DAIUSDC-A', 'UNIV2AAVEETH-A',
       'UNIV2WBTCDAI-A', 'UNIV2DAIUSDT-A', 'USDC-A', 'USDT-A', 'PAXUSD-A',
       'USDC-B', 'KNC-A', 'RWA006-A', 'GUNIV3DAIUSDC1-A',
       'UNIV2LINKETH-A', 'ZRX-A', 'AAVE-A', 'CRVV1ETHSTETH-A',
       'UNIV2DAIETH-A', 'UNIV2WBTCETH-A', 'UNI-A', 'RWA009-A', 'RETH-A',
       'RENBTC-A', 'GNO-A', 'GUNIV3DAIUSDC2-A', 'UNIV2USDCETH-A',
       'MATIC-A', 'RWA003-A', 'RWA005-A'], dtype=object)

In [203]:
wbtc = comprehensive_vault_stats[comprehensive_vault_stats['ilk']=='WBTC-A']
eth_a = comprehensive_vault_stats[comprehensive_vault_stats['ilk']=='ETH-A']

In [204]:
eth_a['status']

21750    Closed
21747    Closed
21745    Closed
21743    Closed
21742      Open
          ...  
37         Open
28         Open
18         Open
16         Open
7          Open
Name: status, Length: 1564, dtype: object

In [205]:
eth_a_df = eth_a['dai_ceiling'].to_frame('eth_a_dai_ceiling')

In [206]:
eth_a_zero_dai_ceiling = eth_a[eth_a['dai_ceiling'] == 0]

In [207]:
eth_a_zero_dai_ceiling.head()

Unnamed: 0,day,cumulative_collateral,ilk,safety_price,usd_safety_value,dai_ceiling,dai_floor,safety_collateral_ratio,market_price,debt_balance,...,daily_revenues,annualized stability fee,annualized_revenues,dart,total_ann_revenues,prev_dai_ceiling,status,market_collateral_ratio,collateral_usd,hypothetical_dai_ceiling
21750,2019-11-13 00:00:00+00:00,0.42,ETH-A,0.0,0.0,0.0,20.0,0.0,0.0,,...,0.0,0.04,,,,,Closed,,,
21747,2019-11-15 00:00:00+00:00,0.42,ETH-A,119.74059,50.29105,0.0,20.0,0.0,119.74059,,...,0.0,0.04,,,,0.0,Closed,,,
21745,2019-11-16 00:00:00+00:00,0.42,ETH-A,121.69333,51.1112,0.0,20.0,0.0,121.69333,,...,0.0,0.04,,,,0.0,Closed,,,
21743,2019-11-17 00:00:00+00:00,0.43,ETH-A,123.31394,53.02499,0.0,20.0,0.0,123.31394,,...,0.0,0.04,,,,0.0,Closed,,,


In [208]:
eth_a_df.head()

Unnamed: 0,eth_a_dai_ceiling
21750,0.0
21747,0.0
21745,0.0
21743,0.0
21742,50000000.0


In [209]:
wbtc = wbtc.set_index('day')

In [210]:
comprehensive_vault_stats.columns

Index(['day', 'cumulative_collateral', 'ilk', 'safety_price',
       'usd_safety_value', 'dai_ceiling', 'dai_floor',
       'safety_collateral_ratio', 'market_price', 'debt_balance',
       'liquidation_penalty', 'liquidation_ratio', 'daily_revenues',
       'annualized stability fee', 'annualized_revenues', 'dart',
       'total_ann_revenues', 'prev_dai_ceiling', 'status',
       'market_collateral_ratio', 'collateral_usd',
       'hypothetical_dai_ceiling'],
      dtype='object')

In [211]:
comprehensive_vault_stats['ilk'].unique()

array(['BAT-A', 'ETH-A', 'USDC-A', 'WBTC-A', 'USDC-B', 'TUSD-A', 'ZRX-A',
       'KNC-A', 'MANA-A', 'USDT-A', 'PAXUSD-A', 'LRC-A', 'COMP-A',
       'LINK-A', 'ETH-B', 'BAL-A', 'YFI-A', 'GUSD-A', 'UNI-A', 'RENBTC-A',
       'AAVE-A', 'UNIV2DAIETH-A', 'PSM-USDC-A', 'UNIV2USDCETH-A',
       'UNIV2WBTCETH-A', 'UNIV2DAIUSDC-A', 'UNIV2LINKETH-A',
       'UNIV2UNIETH-A', 'UNIV2AAVEETH-A', 'UNIV2WBTCDAI-A',
       'UNIV2DAIUSDT-A', 'ETH-C', 'RWA002-A', 'RWA001-A', 'RWA003-A',
       'RWA005-A', 'RWA006-A', 'RWA004-A', 'MATIC-A', 'PSM-PAX-A',
       'GUNIV3DAIUSDC1-A', 'WSTETH-A', 'WBTC-B', 'WBTC-C', 'PSM-GUSD-A',
       'GUNIV3DAIUSDC2-A', 'CRVV1ETHSTETH-A', 'WSTETH-B', 'RWA009-A',
       'RETH-A', 'RWA008-A', 'GNO-A', 'RWA013-A', 'RWA012-A'],
      dtype=object)

In [212]:
comprehensive_vault_stats['status'].head()

21751    Closed
21750    Closed
21748    Closed
21747    Closed
21746    Closed
Name: status, dtype: object

In [213]:
no_nan_vaults = comprehensive_vault_stats.copy()
no_nan_vaults = no_nan_vaults.fillna(0)

In [214]:
no_nan_vaults.columns

Index(['day', 'cumulative_collateral', 'ilk', 'safety_price',
       'usd_safety_value', 'dai_ceiling', 'dai_floor',
       'safety_collateral_ratio', 'market_price', 'debt_balance',
       'liquidation_penalty', 'liquidation_ratio', 'daily_revenues',
       'annualized stability fee', 'annualized_revenues', 'dart',
       'total_ann_revenues', 'prev_dai_ceiling', 'status',
       'market_collateral_ratio', 'collateral_usd',
       'hypothetical_dai_ceiling'],
      dtype='object')

In [215]:
comprehensive_vault_stats.shape[0]

21499

In [216]:
comprehensive_vault_stats['safety_collateral_ratio'].describe()

count                    21,499.00000
mean        144,108,545,041,993.62500
std       6,659,967,211,665,183.00000
min                           0.00000
25%                           1.40896
50%                           1.93928
75%                           2.49618
max     308,852,816,242,278,400.00000
Name: safety_collateral_ratio, dtype: float64

In [217]:
comprehensive_vault_stats.columns

Index(['day', 'cumulative_collateral', 'ilk', 'safety_price',
       'usd_safety_value', 'dai_ceiling', 'dai_floor',
       'safety_collateral_ratio', 'market_price', 'debt_balance',
       'liquidation_penalty', 'liquidation_ratio', 'daily_revenues',
       'annualized stability fee', 'annualized_revenues', 'dart',
       'total_ann_revenues', 'prev_dai_ceiling', 'status',
       'market_collateral_ratio', 'collateral_usd',
       'hypothetical_dai_ceiling'],
      dtype='object')

In [218]:
comprehensive_vault_stats[comprehensive_vault_stats['ilk']=='ETH-A']

Unnamed: 0,day,cumulative_collateral,ilk,safety_price,usd_safety_value,dai_ceiling,dai_floor,safety_collateral_ratio,market_price,debt_balance,...,daily_revenues,annualized stability fee,annualized_revenues,dart,total_ann_revenues,prev_dai_ceiling,status,market_collateral_ratio,collateral_usd,hypothetical_dai_ceiling
21750,2019-11-13 00:00:00+00:00,0.42000,ETH-A,0.00000,0.00000,0.00000,20.00000,0.00000,0.00000,,...,0.00000,0.04000,,,,,Closed,,,
21747,2019-11-15 00:00:00+00:00,0.42000,ETH-A,119.74059,50.29105,0.00000,20.00000,0.00000,119.74059,,...,0.00000,0.04000,,,,0.00000,Closed,,,
21745,2019-11-16 00:00:00+00:00,0.42000,ETH-A,121.69333,51.11120,0.00000,20.00000,0.00000,121.69333,,...,0.00000,0.04000,,,,0.00000,Closed,,,
21743,2019-11-17 00:00:00+00:00,0.43000,ETH-A,123.31394,53.02499,0.00000,20.00000,0.00000,123.31394,,...,0.00000,0.04000,,,,0.00000,Closed,,,
21742,2019-11-18 00:00:00+00:00,26313.38540,ETH-A,118.38667,3115153.98582,50000000.00000,20.00000,1.99867,177.58000,1558617.21545,...,37.68005,0.04000,62331.00644,1558275.08736,145487.20482,0.00000,Open,2.99800,4672730.97873,3504548.23405
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
37,2024-03-17 00:00:00+00:00,444063.06945,ETH-A,2504.00000,1111933925.89146,428458674.37487,7500.00000,4.50630,3630.80000,246751192.26092,...,135601.75320,0.15250,37629556.50495,246751192.26092,209313743.79574,428458674.37487,Open,6.53413,1612304192.54261,1168920539.59339
28,2024-03-18 00:00:00+00:00,443694.77113,ETH-A,2439.82534,1082537747.08583,428458674.37487,7500.00000,4.37918,3537.74675,247201188.45861,...,77882.14239,0.15250,37698180.92452,247201188.45861,206241667.89842,428458674.37487,Open,6.34981,1569679733.27445,1138017806.62398
18,2024-03-19 00:00:00+00:00,442822.49879,ETH-A,2262.58621,1001924077.87601,428458674.37487,7500.00000,4.08200,3280.75000,245449240.76560,...,171181.44442,0.15250,37431008.90357,245449240.76560,199379356.92988,428458674.37487,Open,5.91890,1452789912.92021,1053272686.86716
16,2024-03-20 00:00:00+00:00,444855.53146,ETH-A,2402.77586,1068888133.10945,428458674.37487,7500.00000,4.35423,3484.02500,245482433.73907,...,136276.38547,0.15250,37436070.83198,245482433.73907,203058946.36180,428458674.37487,Open,6.31364,1549887793.00870,1123668649.93131


In [219]:
no_nan_vaults[no_nan_vaults['ilk']=='WSTETH-A'].head()

Unnamed: 0,day,cumulative_collateral,ilk,safety_price,usd_safety_value,dai_ceiling,dai_floor,safety_collateral_ratio,market_price,debt_balance,...,daily_revenues,annualized stability fee,annualized_revenues,dart,total_ann_revenues,prev_dai_ceiling,status,market_collateral_ratio,collateral_usd,hypothetical_dai_ceiling
9079,2021-10-28 00:00:00+00:00,0.0,WSTETH-A,2697.62059,0.0,3000000.0,10000.0,0.0,0.0,0.0,...,0.0,0.04,0.0,0.0,76729520.44093,0.0,Open,0.0,0.0,0.0
9075,2021-10-29 00:00:00+00:00,305.63442,WSTETH-A,2857.23605,873269.67507,3000000.0,10000.0,1.38075,0.0,632462.52782,...,40.06399,0.04,25298.50231,632462.52782,80139053.58503,3000000.0,Open,0.0,0.0,0.0
9062,2021-10-30 00:00:00+00:00,674.45054,WSTETH-A,2811.58248,1896273.32956,4078604.2861,10000.0,1.69554,0.0,1118390.5302,...,128.35596,0.04,44735.62332,1118390.5302,81886606.49163,3000000.0,Open,0.0,0.0,0.0
9056,2021-10-31 00:00:00+00:00,962.89234,WSTETH-A,2730.51006,2629187.20833,4353719.21295,10000.0,1.92146,0.0,1368324.38884,...,99.9623,0.04,54732.97814,1368324.38884,85368386.52635,4078604.2861,Open,0.0,0.0,0.0
9043,2021-11-01 00:00:00+00:00,3417.46354,WSTETH-A,2831.65634,9677082.30557,4443895.32928,10000.0,2.17852,0.0,4442038.79317,...,264.63903,0.04,177681.56013,4442038.79317,84031640.03101,4353719.21295,Open,0.0,0.0,0.0


In [220]:
top_10_ilks = top_10_vaults.index.tolist()

In [221]:
top_10_ilks

['ETH-A',
 'WBTC-A',
 'WSTETH-A',
 'ETH-C',
 'ETH-B',
 'WSTETH-B',
 'USDC-A',
 'RWA013-A',
 'WBTC-C',
 'RWA012-A']

In [222]:
no_nan_vaults.columns

Index(['day', 'cumulative_collateral', 'ilk', 'safety_price',
       'usd_safety_value', 'dai_ceiling', 'dai_floor',
       'safety_collateral_ratio', 'market_price', 'debt_balance',
       'liquidation_penalty', 'liquidation_ratio', 'daily_revenues',
       'annualized stability fee', 'annualized_revenues', 'dart',
       'total_ann_revenues', 'prev_dai_ceiling', 'status',
       'market_collateral_ratio', 'collateral_usd',
       'hypothetical_dai_ceiling'],
      dtype='object')

In [223]:
topvaults = no_nan_vaults[no_nan_vaults['ilk'].isin(top_10_ilks)]

In [224]:
wbtc_a_vault = no_nan_vaults[no_nan_vaults['ilk']=='WBTC-A']
eth_a_vault = no_nan_vaults[no_nan_vaults['ilk']=='ETH-A']
wsteth_a_vault = no_nan_vaults[no_nan_vaults['ilk']=='WSTETH-A']
eth_c_vault = no_nan_vaults[no_nan_vaults['ilk']=='ETH-C']
eth_b_vault = no_nan_vaults[no_nan_vaults['ilk']=='ETH-B']

In [225]:
#We start with eth-a vault, which has long history and has generated most revenues 
eth_a_vault.columns

Index(['day', 'cumulative_collateral', 'ilk', 'safety_price',
       'usd_safety_value', 'dai_ceiling', 'dai_floor',
       'safety_collateral_ratio', 'market_price', 'debt_balance',
       'liquidation_penalty', 'liquidation_ratio', 'daily_revenues',
       'annualized stability fee', 'annualized_revenues', 'dart',
       'total_ann_revenues', 'prev_dai_ceiling', 'status',
       'market_collateral_ratio', 'collateral_usd',
       'hypothetical_dai_ceiling'],
      dtype='object')

In [226]:
eth_a_vault.describe()

Unnamed: 0,cumulative_collateral,safety_price,usd_safety_value,dai_ceiling,dai_floor,safety_collateral_ratio,market_price,debt_balance,liquidation_penalty,liquidation_ratio,daily_revenues,annualized stability fee,annualized_revenues,dart,total_ann_revenues,prev_dai_ceiling,market_collateral_ratio,collateral_usd,hypothetical_dai_ceiling
count,1564.0,1564.0,1564.0,1564.0,1564.0,1564.0,1564.0,1564.0,1564.0,1564.0,1564.0,1564.0,1564.0,1564.0,1564.0,1564.0,1564.0,1564.0,1564.0
mean,1458734.7046,1180.30691,1674815093.21662,854497895.34614,7344.53964,2.65932,1724.99305,693477623.79074,0.06671,1.46703,70427.29059,0.02932,21123475.17488,693604670.67998,51565245.82292,854223944.78707,3.90523,2461159737.81305,1808848479.57215
std,867527.30799,785.15114,1760759531.51142,836076226.19788,5719.59639,0.59356,1141.89464,788907642.18062,0.15642,0.07828,97692.23181,0.02216,27873569.53567,789103240.00125,53855257.9998,836286080.96628,0.84163,2584183106.69035,1897827173.00449
min,0.42,0.0,0.0,0.0,20.0,0.0,0.0,0.0,-1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
25%,517750.16129,396.92667,522930296.40556,350558569.73245,500.0,2.23024,595.39,179038619.21015,0.0,1.45,10818.51531,0.015,3031349.39672,179038619.21015,12664699.16828,350558569.73245,3.30004,758614754.08654,550071332.69252
50%,1733410.48161,1171.4312,673651523.57475,421276896.85485,7500.0,2.56129,1708.0925,216232858.04921,0.13,1.45,26452.76292,0.0225,7708878.62686,216270428.75855,35828999.86683,421276896.85485,3.75495,992240131.56515,726560471.73326
75%,2075231.70004,1624.45862,3121970537.42257,1494643974.46489,15000.0,3.03216,2372.85878,1340082473.97318,0.13,1.5,108883.02506,0.04,33206894.4272,1340082473.97318,73813092.26507,1494643974.46489,4.4187,4612763346.08734,3407265929.19947
max,2966468.3351,3298.34483,7428423394.603,3395556201.44285,15000.0,5.26541,4782.6,3077919054.98573,0.13,1.5,1475897.04401,0.1525,142209506.83116,3077919054.98573,351700458.60804,3395556201.44285,7.63485,11142635091.9045,8356976318.92837


In [227]:
eth_a_vault.columns

Index(['day', 'cumulative_collateral', 'ilk', 'safety_price',
       'usd_safety_value', 'dai_ceiling', 'dai_floor',
       'safety_collateral_ratio', 'market_price', 'debt_balance',
       'liquidation_penalty', 'liquidation_ratio', 'daily_revenues',
       'annualized stability fee', 'annualized_revenues', 'dart',
       'total_ann_revenues', 'prev_dai_ceiling', 'status',
       'market_collateral_ratio', 'collateral_usd',
       'hypothetical_dai_ceiling'],
      dtype='object')

In [228]:
#7 day MA
eth_a_vault['market_price_7d_ma'] = eth_a_vault['market_price'].rolling(window=7).mean()
eth_a_vault['collateral_usd_7d_ma'] = eth_a_vault['collateral_usd'].rolling(window=7).mean()
eth_a_vault['debt_balance_7d_ma'] = eth_a_vault['debt_balance'].rolling(window=7).mean()
eth_a_vault['safety_collateral_ratio_7d_ma'] = eth_a_vault['safety_collateral_ratio'].rolling(window=7).mean()
eth_a_vault['market_collateral_ratio_7d_ma'] = eth_a_vault['market_collateral_ratio'].rolling(window=7).mean()
eth_a_vault['daily_revenues_7d_ma'] = eth_a_vault['daily_revenues'].rolling(window=7).mean()

# Calculate 30-day moving averages
eth_a_vault['market_price_30d_ma'] = eth_a_vault['market_price'].rolling(window=30).mean()
eth_a_vault['collateral_usd_30d_ma'] = eth_a_vault['collateral_usd'].rolling(window=30).mean()
eth_a_vault['debt_balance_30d_ma'] = eth_a_vault['debt_balance'].rolling(window=30).mean()
eth_a_vault['cumulative_collateral_30d_ma'] = eth_a_vault['cumulative_collateral'].rolling(window=30).mean()
eth_a_vault['safety_collateral_ratio_30d_ma'] = eth_a_vault['safety_collateral_ratio'].rolling(window=30).mean()
eth_a_vault['market_collateral_ratio_30d_ma'] = eth_a_vault['market_collateral_ratio'].rolling(window=30).mean()
eth_a_vault['daily_revenues_30d_ma'] = eth_a_vault['daily_revenues'].rolling(window=30).mean()

#calculate 90 day ma stability fee
eth_a_vault['annualized stability fee_90d_ma'] = eth_a_vault['annualized stability fee'].rolling(window=90).mean()

# Display the head of the DataFrame to verify the new columns
print(eth_a_vault[['market_price', 'market_price_7d_ma', 'market_price_30d_ma', 
                   'collateral_usd', 'collateral_usd_7d_ma', 'collateral_usd_30d_ma',
                   'debt_balance', 'debt_balance_7d_ma', 'debt_balance_30d_ma']].tail())

    market_price  market_price_7d_ma  market_price_30d_ma      collateral_usd  \
37   3,630.80000         3,808.91852          3,441.49574 1,612,304,192.54261   
28   3,537.74675         3,736.14663          3,466.79463 1,569,679,733.27445   
18   3,280.75000         3,642.18091          3,481.44896 1,452,789,912.92021   
16   3,484.02500         3,565.54163          3,498.75413 1,549,887,793.00870   
7    3,420.85406         3,511.23940          3,513.14360 1,450,111,134.53540   

    collateral_usd_7d_ma  collateral_usd_30d_ma      debt_balance  \
37   1,697,607,736.83609    1,510,593,119.22876 246,751,192.26092   
28   1,661,773,903.88487    1,522,773,914.12774 247,201,188.45861   
18   1,618,456,795.84714    1,530,134,155.59352 245,449,240.76560   
16   1,583,861,447.21174    1,538,972,026.36643 245,482,433.73907   
7    1,549,619,887.15028    1,544,225,805.80555 216,024,445.88726   

    debt_balance_7d_ma  debt_balance_30d_ma  
37   247,744,408.07142    226,233,711.18418  
28   2

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  eth_a_vault['market_price_7d_ma'] = eth_a_vault['market_price'].rolling(window=7).mean()
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  eth_a_vault['collateral_usd_7d_ma'] = eth_a_vault['collateral_usd'].rolling(window=7).mean()
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  eth_a_vault['debt_balan

In [229]:
eth_a_vault['status']

21750    Closed
21747    Closed
21745    Closed
21743    Closed
21742      Open
          ...  
37         Open
28         Open
18         Open
16         Open
7          Open
Name: status, Length: 1564, dtype: object

In [230]:
# Calculate percent change for selected columns
for column in ['debt_balance', 'cumulative_collateral', 
               'safety_price', 'safety_collateral_ratio', 
               'market_collateral_ratio','annualized stability fee','daily_revenues']:
    eth_a_vault[f'{column}_pct_change'] = eth_a_vault[column].pct_change()

# Calculate volatility (standard deviation) of the percent changes over a 7-day rolling window
for column in ['debt_balance_pct_change', 'cumulative_collateral_pct_change', 
               'safety_price_pct_change', 'safety_collateral_ratio_pct_change', 
               'market_collateral_ratio_pct_change','annualized stability fee_pct_change','daily_revenues_pct_change']:
    eth_a_vault[f'{column}_volatility_7d'] = eth_a_vault[column].rolling(window=7).std()

# Calculate lag for selected columns (1-day lag as an example)
for column in ['debt_balance', 'cumulative_collateral', 
               'safety_price', 'safety_collateral_ratio', 
               'market_collateral_ratio','annualized stability fee','daily_revenues']:
    eth_a_vault[f'{column}_lag30'] = eth_a_vault[column].shift(30)


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  eth_a_vault[f'{column}_pct_change'] = eth_a_vault[column].pct_change()
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  eth_a_vault[f'{column}_pct_change'] = eth_a_vault[column].pct_change()
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  eth_a_vault[f'{column}_pct_change'] = eth_a_vault[column].pct_c

In [231]:
eth_a_vault['status']

21750    Closed
21747    Closed
21745    Closed
21743    Closed
21742      Open
          ...  
37         Open
28         Open
18         Open
16         Open
7          Open
Name: status, Length: 1564, dtype: object

In [232]:
eth_a_vault.columns = [f'eth_a_vault_{col}' if col != 'period' and not col.startswith('day') else col for col in eth_a_vault.columns]

In [233]:
eth_a_vault['eth_a_vault_status']

21750    Closed
21747    Closed
21745    Closed
21743    Closed
21742      Open
          ...  
37         Open
28         Open
18         Open
16         Open
7          Open
Name: eth_a_vault_status, Length: 1564, dtype: object

In [234]:
eth_a_vault.set_index('day', inplace=True)
eth_a_vault = eth_a_vault.drop(columns=['eth_a_vault_ilk'])

In [235]:
eth_a_vault.columns

Index(['eth_a_vault_cumulative_collateral', 'eth_a_vault_safety_price',
       'eth_a_vault_usd_safety_value', 'eth_a_vault_dai_ceiling',
       'eth_a_vault_dai_floor', 'eth_a_vault_safety_collateral_ratio',
       'eth_a_vault_market_price', 'eth_a_vault_debt_balance',
       'eth_a_vault_liquidation_penalty', 'eth_a_vault_liquidation_ratio',
       'eth_a_vault_daily_revenues', 'eth_a_vault_annualized stability fee',
       'eth_a_vault_annualized_revenues', 'eth_a_vault_dart',
       'eth_a_vault_total_ann_revenues', 'eth_a_vault_prev_dai_ceiling',
       'eth_a_vault_status', 'eth_a_vault_market_collateral_ratio',
       'eth_a_vault_collateral_usd', 'eth_a_vault_hypothetical_dai_ceiling',
       'eth_a_vault_market_price_7d_ma', 'eth_a_vault_collateral_usd_7d_ma',
       'eth_a_vault_debt_balance_7d_ma',
       'eth_a_vault_safety_collateral_ratio_7d_ma',
       'eth_a_vault_market_collateral_ratio_7d_ma',
       'eth_a_vault_daily_revenues_7d_ma', 'eth_a_vault_market_price_30d_m

In [236]:
print(list(dai_maturity_df_reindexed['dai_maturity_outflow_surplus_buffer_1-block']))

[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,

In [237]:
print(list(dai_maturity_df_reindexed['dai_maturity_outflow_surplus_buffer_1-day']))

[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,

In [238]:
print(list(dai_maturity_df_reindexed['dai_maturity_outflow_surplus_buffer_1-month']))

[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,

dai_maturity_outflow_surplus_buffer_1-block        nan
dai_maturity_outflow_surplus_buffer_1-day          nan
dai_maturity_outflow_surplus_buffer_1-month        nan
dai_maturity_outflow_surplus_buffer_1-week         nan
dai_maturity_outflow_surplus_buffer_3-months       nan
1 - PnL                                            nan
2 - Assets                                         nan
2.8 - Operating Reserves                           nan
3 - Liabilities & Equity                           nan
3.8 - Equity (Operating Reserves)                  nan

In [239]:
eth_a_vault = eth_a_vault.merge(dai_maturity_df_reindexed, on=['day'], how='inner')

In [240]:
stable_coin_ratios.head()

Unnamed: 0_level_0,stablecoins_ratio,usdc_ratio
day,Unnamed: 1_level_1,Unnamed: 2_level_1
2024-03-21 00:00:00+00:00,0.1088,0.10865
2024-03-20 00:00:00+00:00,0.09675,0.0966
2024-03-19 00:00:00+00:00,0.08597,0.08582
2024-03-18 00:00:00+00:00,0.0843,0.08415
2024-03-17 00:00:00+00:00,0.13455,0.1344


In [241]:
#eth_a_vault = eth_a_vault.merge(stable_coin_ratios, on=['day'], how='inner')

In [242]:
eth_a_vault.head()

Unnamed: 0,day,eth_a_vault_cumulative_collateral,eth_a_vault_safety_price,eth_a_vault_usd_safety_value,eth_a_vault_dai_ceiling,eth_a_vault_dai_floor,eth_a_vault_safety_collateral_ratio,eth_a_vault_market_price,eth_a_vault_debt_balance,eth_a_vault_liquidation_penalty,...,dai_maturity_outflow_dai_only_1-month,dai_maturity_outflow_dai_only_1-week,dai_maturity_outflow_dai_only_1-year,dai_maturity_outflow_dai_only_3-months,dai_maturity_outflow_surplus_buffer_1-block,dai_maturity_outflow_surplus_buffer_1-day,dai_maturity_outflow_surplus_buffer_1-month,dai_maturity_outflow_surplus_buffer_1-week,dai_maturity_outflow_surplus_buffer_1-year,dai_maturity_outflow_surplus_buffer_3-months
0,2019-11-13 00:00:00+00:00,0.42,0.0,0.0,0.0,20.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,2019-11-15 00:00:00+00:00,0.42,119.74059,50.29105,0.0,20.0,0.0,119.74059,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,2019-11-16 00:00:00+00:00,0.42,121.69333,51.1112,0.0,20.0,0.0,121.69333,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,2019-11-17 00:00:00+00:00,0.43,123.31394,53.02499,0.0,20.0,0.0,123.31394,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,2019-11-18 00:00:00+00:00,26313.3854,118.38667,3115153.98582,50000000.0,20.0,1.99867,177.58,1558617.21545,0.0,...,44042.45197,201006.77016,1173199.67285,64980.66684,0.0,0.0,0.0,0.0,90.78599,0.0


In [243]:
start_date = eth_a_vault['day'].min()
end_date = eth_a_vault['day'].max()
date_range = pd.date_range(start=start_date, end=end_date)

psm_full_range_df = pd.DataFrame(index=date_range)



In [244]:
psm_full_range_df.head()

2019-11-13 00:00:00+00:00
2019-11-14 00:00:00+00:00
2019-11-15 00:00:00+00:00
2019-11-16 00:00:00+00:00
2019-11-17 00:00:00+00:00


In [245]:


psm_columns = ['psm_change', 'psm_change_excl_rwa', 'psm_change_excl_rwa_30d_avg', 'psm_change_excl_rwa_7d_avg', 'psm_fees', 'psm_inflow', 'psm_inflow_exl_rwa', 'psm_lifetime_fees', 'psm_lifetime_turnover', 'psm_outflow', 'psm_balance', 'psm_turnover']
for column in psm_columns:
    psm_full_range_df[column] = psm_stats_csv[column]

# Fill missing values with zeros
psm_full_range_df.fillna(0, inplace=True)

In [246]:
eth_a_vault.describe()

Unnamed: 0,eth_a_vault_cumulative_collateral,eth_a_vault_safety_price,eth_a_vault_usd_safety_value,eth_a_vault_dai_ceiling,eth_a_vault_dai_floor,eth_a_vault_safety_collateral_ratio,eth_a_vault_market_price,eth_a_vault_debt_balance,eth_a_vault_liquidation_penalty,eth_a_vault_liquidation_ratio,...,dai_maturity_outflow_dai_only_1-month,dai_maturity_outflow_dai_only_1-week,dai_maturity_outflow_dai_only_1-year,dai_maturity_outflow_dai_only_3-months,dai_maturity_outflow_surplus_buffer_1-block,dai_maturity_outflow_surplus_buffer_1-day,dai_maturity_outflow_surplus_buffer_1-month,dai_maturity_outflow_surplus_buffer_1-week,dai_maturity_outflow_surplus_buffer_1-year,dai_maturity_outflow_surplus_buffer_3-months
count,1563.0,1563.0,1563.0,1563.0,1563.0,1563.0,1563.0,1563.0,1563.0,1563.0,...,1563.0,1563.0,1563.0,1563.0,1563.0,1563.0,1563.0,1563.0,1563.0,1563.0
mean,1459396.78487,1179.55265,1675246787.68156,854770473.22264,7344.44018,2.65806,1723.90804,693783096.07346,0.06667,1.46704,...,121829547.87291,534720838.07232,2643717876.29658,179748513.25511,0.0,0.0,0.0,0.0,43165723.38222,0.0
std,867409.61019,784.83542,1761240268.83814,836274295.11631,5721.4256,0.59166,1141.45336,789067601.73475,0.15646,0.0783,...,86900656.55588,372490872.44869,1759401866.72602,128214083.4431,0.0,0.0,0.0,0.0,30383977.10412,0.0
min,0.42,0.0,0.0,0.0,20.0,0.0,0.0,0.0,-1.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-5674219.6527,0.0
25%,517894.21274,396.44667,522790141.45999,350558569.73245,500.0,2.23013,594.67,179036518.29517,0.0,1.45,...,42015130.90771,170882210.18753,570286315.21323,61989537.40482,0.0,0.0,0.0,0.0,4001819.24375,0.0
50%,1734175.26095,1171.09511,673540872.02605,421276896.85485,7500.0,2.56118,1707.98,216249923.31576,0.13,1.45,...,118349364.9756,539666807.68135,3212265572.49737,174613817.17711,0.0,0.0,0.0,0.0,53218504.1793,0.0
75%,2075622.02272,1622.45345,3122870291.30778,1496694216.24505,15000.0,3.03136,2370.73243,1340283261.87801,0.13,1.5,...,168159893.31207,742722843.39037,3705032007.03328,248104760.62436,0.0,0.0,0.0,0.0,69846462.0653,0.0
max,2966468.3351,3298.34483,7428423394.603,3395556201.44285,15000.0,5.26541,4782.6,3077919054.98573,0.13,1.5,...,314069539.84859,1342922289.47086,5952992110.29562,463381288.3012,0.0,0.0,0.0,0.0,83550327.25113,0.0


In [247]:
eth_a_vault.columns

Index(['day', 'eth_a_vault_cumulative_collateral', 'eth_a_vault_safety_price',
       'eth_a_vault_usd_safety_value', 'eth_a_vault_dai_ceiling',
       'eth_a_vault_dai_floor', 'eth_a_vault_safety_collateral_ratio',
       'eth_a_vault_market_price', 'eth_a_vault_debt_balance',
       'eth_a_vault_liquidation_penalty', 'eth_a_vault_liquidation_ratio',
       'eth_a_vault_daily_revenues', 'eth_a_vault_annualized stability fee',
       'eth_a_vault_annualized_revenues', 'eth_a_vault_dart',
       'eth_a_vault_total_ann_revenues', 'eth_a_vault_prev_dai_ceiling',
       'eth_a_vault_status', 'eth_a_vault_market_collateral_ratio',
       'eth_a_vault_collateral_usd', 'eth_a_vault_hypothetical_dai_ceiling',
       'eth_a_vault_market_price_7d_ma', 'eth_a_vault_collateral_usd_7d_ma',
       'eth_a_vault_debt_balance_7d_ma',
       'eth_a_vault_safety_collateral_ratio_7d_ma',
       'eth_a_vault_market_collateral_ratio_7d_ma',
       'eth_a_vault_daily_revenues_7d_ma', 'eth_a_vault_market_pric

In [248]:
psm_full_range_df = psm_full_range_df.rename_axis('day')

In [249]:
psm_full_range_df.duplicated().sum()

424

In [250]:
eth_a_vault= eth_a_vault.merge(psm_full_range_df, on=['day'], how='inner')

In [251]:
eth_a_vault[['day','psm_change']].tail()

Unnamed: 0,day,psm_change
1558,2024-03-16 00:00:00+00:00,6819615.50378
1559,2024-03-17 00:00:00+00:00,64591276.21421
1560,2024-03-18 00:00:00+00:00,-232459681.52437
1561,2024-03-19 00:00:00+00:00,4430852.15651
1562,2024-03-20 00:00:00+00:00,47004882.0485


In [252]:
where_is_dai_csv_table_full = pd.DataFrame(index=date_range)


where_is_dai_csv_table_columns = where_is_dai_csv_table.columns
for column in where_is_dai_csv_table_columns:
    where_is_dai_csv_table_full[column] = where_is_dai_csv_table[column]

# Fill missing values with zeros
where_is_dai_csv_table_full.fillna(0, inplace=True)

In [253]:
where_is_dai_csv_table_full = where_is_dai_csv_table_full.rename_axis('day')

In [254]:
where_is_dai_csv_table_full.head()

Unnamed: 0_level_0,where_is_dai_Bridge,where_is_dai_CeFi,where_is_dai_Dai Savings,where_is_dai_Dex,where_is_dai_EOA,where_is_dai_Lending,where_is_dai_Other,where_is_dai_Treasury
day,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2019-11-13 00:00:00+00:00,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2019-11-14 00:00:00+00:00,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2019-11-15 00:00:00+00:00,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2019-11-16 00:00:00+00:00,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2019-11-17 00:00:00+00:00,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [255]:
eth_a_vault= eth_a_vault.merge(where_is_dai_csv_table_full, on=['day'], how='inner')

In [256]:
eth_a_vault.head()

Unnamed: 0,day,eth_a_vault_cumulative_collateral,eth_a_vault_safety_price,eth_a_vault_usd_safety_value,eth_a_vault_dai_ceiling,eth_a_vault_dai_floor,eth_a_vault_safety_collateral_ratio,eth_a_vault_market_price,eth_a_vault_debt_balance,eth_a_vault_liquidation_penalty,...,psm_balance,psm_turnover,where_is_dai_Bridge,where_is_dai_CeFi,where_is_dai_Dai Savings,where_is_dai_Dex,where_is_dai_EOA,where_is_dai_Lending,where_is_dai_Other,where_is_dai_Treasury
0,2019-11-13 00:00:00+00:00,0.42,0.0,0.0,0.0,20.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,2019-11-15 00:00:00+00:00,0.42,119.74059,50.29105,0.0,20.0,0.0,119.74059,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,2019-11-16 00:00:00+00:00,0.42,121.69333,51.1112,0.0,20.0,0.0,121.69333,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,2019-11-17 00:00:00+00:00,0.43,123.31394,53.02499,0.0,20.0,0.0,123.31394,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,2019-11-18 00:00:00+00:00,26313.3854,118.38667,3115153.98582,50000000.0,20.0,1.99867,177.58,1558617.21545,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [257]:
daily_surplus_buffer_csv = daily_surplus_buffer_csv.fillna(0)

In [258]:
eth_a_vault= eth_a_vault.merge(daily_surplus_buffer_csv, on=['day'], how='inner')

In [259]:
eth_a_vault['eth_a_vault_status']

0       Closed
1       Closed
2       Closed
3       Closed
4         Open
         ...  
1558      Open
1559      Open
1560      Open
1561      Open
1562      Open
Name: eth_a_vault_status, Length: 1563, dtype: object

In [260]:
dsr_df.fillna(0, inplace=True)

In [261]:
dsr_df.head()

Unnamed: 0_level_0,dsr_inflow,dsr_interest,dsr_outflow,dsr_balance,dsr_rate,dai_circulating,dai_total_balance,dai_percent_in_dsr,dai_percent_out_dsr
day,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
2024-03-27,5288164.42811,146319.9721,0.0,1545849069.62107,0.15,3198754360.76307,4744603430.38415,0.32581,0.67419
2024-03-26,24182507.40107,570070.21101,0.0,1510137336.76611,0.15,3243870446.94073,4754007783.70685,0.31766,0.68234
2024-03-25,0.0,591988.67214,79508005.51819,1485384759.15402,0.15,3238774686.95415,4724159446.10818,0.31442,0.68558
2024-03-24,32601790.65334,589190.34424,0.0,1564300776.00007,0.15,3258196717.18328,4822497493.18336,0.32438,0.67562
2024-03-23,42441802.1884,577016.24223,0.0,1531109795.00249,0.15,3245174169.16159,4776283964.1641,0.32057,0.67943


In [262]:
dsr_df.reset_index(inplace=True)
eth_a_vault.reset_index(inplace=True)
# Remove timezone information from both 'day' columns
eth_a_vault['day'] = eth_a_vault['day'].dt.tz_localize(None)
dsr_df['day'] = dsr_df['day'].dt.tz_localize(None)

In [263]:
dsr_df_full = pd.DataFrame(index=date_range)
dsr_df_full = dsr_df_full.rename_axis('day')

dsr_df_full.reset_index(inplace=True)
dsr_df_full['day'] = dsr_df_full['day'].dt.tz_localize(None)


In [264]:
dsr_df_full = dsr_df_full.merge(dsr_df, on=['day'], how='outer')

In [265]:
dsr_df_full.fillna(0, inplace=True)

In [266]:
dsr_df_full

Unnamed: 0,day,dsr_inflow,dsr_interest,dsr_outflow,dsr_balance,dsr_rate,dai_circulating,dai_total_balance,dai_percent_in_dsr,dai_percent_out_dsr
0,2019-11-13,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000
1,2019-11-14,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000
2,2019-11-15,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000
3,2019-11-16,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000
4,2019-11-17,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000
...,...,...,...,...,...,...,...,...,...,...
1592,2024-03-25,0.00000,591988.67214,79508005.51819,1485384759.15402,0.15000,3238774686.95415,4724159446.10818,0.31442,0.68558
1593,2024-03-24,32601790.65334,589190.34424,0.00000,1564300776.00007,0.15000,3258196717.18328,4822497493.18336,0.32438,0.67562
1594,2024-03-23,42441802.18840,577016.24223,0.00000,1531109795.00249,0.15000,3245174169.16159,4776283964.16410,0.32057,0.67943
1595,2024-03-22,80719471.60047,542875.90171,0.00000,1488090976.57187,0.15000,3217359661.10720,4705450637.67908,0.31625,0.68375


In [267]:
dsr_df_full = dsr_df_full.set_index('day')

In [268]:
#dsr_df_full = dsr_df_full.drop(columns=['level_0','index'])

In [269]:
dsr_df_full.head()

Unnamed: 0_level_0,dsr_inflow,dsr_interest,dsr_outflow,dsr_balance,dsr_rate,dai_circulating,dai_total_balance,dai_percent_in_dsr,dai_percent_out_dsr
day,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
2019-11-13,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2019-11-14,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2019-11-15,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2019-11-16,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2019-11-17,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [270]:
eth_a_vault= eth_a_vault.merge(dsr_df_full, on=['day'], how='inner')

In [271]:
eth_a_vault = eth_a_vault.drop(columns=['index'])

In [272]:
eth_a_vault.head()

Unnamed: 0,day,eth_a_vault_cumulative_collateral,eth_a_vault_safety_price,eth_a_vault_usd_safety_value,eth_a_vault_dai_ceiling,eth_a_vault_dai_floor,eth_a_vault_safety_collateral_ratio,eth_a_vault_market_price,eth_a_vault_debt_balance,eth_a_vault_liquidation_penalty,...,daily_surplus_buffer,dsr_inflow,dsr_interest,dsr_outflow,dsr_balance,dsr_rate,dai_circulating,dai_total_balance,dai_percent_in_dsr,dai_percent_out_dsr
0,2019-11-13,0.42,0.0,0.0,0.0,20.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,2019-11-15,0.42,119.74059,50.29105,0.0,20.0,0.0,119.74059,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,2019-11-16,0.42,121.69333,51.1112,0.0,20.0,0.0,121.69333,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,2019-11-17,0.43,123.31394,53.02499,0.0,20.0,0.0,123.31394,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,2019-11-18,26313.3854,118.38667,3115153.98582,50000000.0,20.0,1.99867,177.58,1558617.21545,0.0,...,90.77555,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [273]:
pivoted_balance_sheet = pivoted_balance_sheet.rename_axis('day')

In [274]:
pivoted_income_statement.rename(columns={'period':'day'}, inplace=True)

In [275]:
pivoted_balance_sheet.reset_index(inplace=True)
pivoted_balance_sheet['day'] = pivoted_balance_sheet['day'].dt.tz_localize(None)

In [276]:
pivoted_balance_sheet = pivoted_balance_sheet.set_index('day')

In [277]:
pivoted_balance_sheet = pivoted_balance_sheet.fillna(0)

In [278]:
print(pivoted_balance_sheet.describe())

         b_s_Crypto-Loans              b_s_DAI              b_s_DSR  \
count         1,360.00000          1,360.00000          1,360.00000   
mean  2,290,165,150.68213 -4,741,579,818.96597   -317,918,673.72473   
std   1,259,469,727.42181  2,522,050,832.58684    522,223,094.30581   
min     144,805,383.35301 -9,951,190,219.64641 -1,750,756,886.51841   
25%   1,334,072,649.50086 -6,326,898,796.77476   -258,531,477.03208   
50%   2,176,102,429.50594 -4,846,370,252.24157    -92,004,368.45499   
75%   2,843,872,505.75691 -3,656,086,112.99824    -12,338,379.65691   
max   6,135,551,191.71599   -108,067,101.21321     -1,167,096.23327   

             b_s_Equity  b_s_Others assets  b_s_Real-World Assets  \
count       1,360.00000        1,360.00000            1,360.00000   
mean  -50,907,047.74592       30,392.82750      580,607,673.56722   
std    26,391,917.30845       23,045.94153      880,997,103.85663   
min   -83,598,090.63895            0.00000                0.00000   
25%   -70,867,0

In [279]:
eth_a_vault= eth_a_vault.merge(pivoted_balance_sheet, on=['day'], how='left') 

In [280]:
eth_a_vault = eth_a_vault.fillna(0)

In [281]:
eth_a_vault.shape[0]

1563

In [282]:
print(list(eth_a_vault.columns))

['day', 'eth_a_vault_cumulative_collateral', 'eth_a_vault_safety_price', 'eth_a_vault_usd_safety_value', 'eth_a_vault_dai_ceiling', 'eth_a_vault_dai_floor', 'eth_a_vault_safety_collateral_ratio', 'eth_a_vault_market_price', 'eth_a_vault_debt_balance', 'eth_a_vault_liquidation_penalty', 'eth_a_vault_liquidation_ratio', 'eth_a_vault_daily_revenues', 'eth_a_vault_annualized stability fee', 'eth_a_vault_annualized_revenues', 'eth_a_vault_dart', 'eth_a_vault_total_ann_revenues', 'eth_a_vault_prev_dai_ceiling', 'eth_a_vault_status', 'eth_a_vault_market_collateral_ratio', 'eth_a_vault_collateral_usd', 'eth_a_vault_hypothetical_dai_ceiling', 'eth_a_vault_market_price_7d_ma', 'eth_a_vault_collateral_usd_7d_ma', 'eth_a_vault_debt_balance_7d_ma', 'eth_a_vault_safety_collateral_ratio_7d_ma', 'eth_a_vault_market_collateral_ratio_7d_ma', 'eth_a_vault_daily_revenues_7d_ma', 'eth_a_vault_market_price_30d_ma', 'eth_a_vault_collateral_usd_30d_ma', 'eth_a_vault_debt_balance_30d_ma', 'eth_a_vault_cumulati

In [283]:
eth_a_vault = eth_a_vault.sort_values(by='day')
pivoted_income_statement = pivoted_income_statement.sort_values(by='day')

In [284]:
pivoted_income_statement.tail()

item,day,1.1 - Lending Revenues,1.2 - Liquidations Revenues,1.3 - Trading Revenues,1.4 - Lending Expenses,1.5 - Liquidations Expenses,1.6 - Workforce Expenses,1.9 - Net Income,2.1 - Crypto Loans,2.2 - Trading Assets,...,debt_to_equity_Lag_11m,debt_ratio_Lag_11m,Total_Revenues_Lag_12m,Total_Expenses_Lag_12m,Net_Income_Lag_12m,profit_margin_Lag_12m,ROA_Lag_12m,ROE_Lag_12m,debt_to_equity_Lag_12m,debt_ratio_Lag_12m
48,2023-11-01,29666040.78753,455.02627,0.0,-6376789.60409,0.0,-2974302.24767,20315403.96204,4636257421.47005,707478489.39263,...,67.6484,0.98543,1169462.76725,-1941130.26503,-771667.49779,-0.00015,-0.00015,-0.01024,68.98648,0.98571
49,2023-12-01,14270261.48143,0.0,0.0,-6484200.84138,0.0,-2658930.55662,5127130.08344,4820477732.34982,400493643.70898,...,69.23278,0.98576,1364508.6449,-1849641.34466,-485132.69976,-9e-05,-9e-05,-0.00648,67.6484,0.98543
50,2024-01-01,20751255.62092,27376.29516,0.0,-5943924.2348,0.0,-5669555.33271,9165152.34857,4321343136.28758,563679392.71587,...,69.70243,0.98586,1540089.05977,-2003227.01648,-463137.95672,-9e-05,-9e-05,-0.00623,69.23278,0.98576
51,2024-02-01,29691662.11849,0.0,0.0,-4567709.0373,0.0,-1581115.16882,23542837.91237,4083073663.20968,913091407.04291,...,72.85136,0.98646,1750712.41881,-2422673.84315,-671961.42434,-0.00013,-0.00013,-0.00911,69.70243,0.98586
52,2024-03-01,18130771.84133,4623.87785,0.0,-7076544.93907,0.0,-3857879.46039,7200971.31971,3442061505.5845,1232860684.32398,...,68.98981,0.98571,2286044.03646,-4456169.85467,-2170125.81821,-0.00041,-0.00041,-0.03033,72.85136,0.98646


In [285]:
total_vault_data = pd.merge_asof(eth_a_vault, pivoted_income_statement, on='day')

In [286]:
print(list(total_vault_data.columns))

['day', 'eth_a_vault_cumulative_collateral', 'eth_a_vault_safety_price', 'eth_a_vault_usd_safety_value', 'eth_a_vault_dai_ceiling', 'eth_a_vault_dai_floor', 'eth_a_vault_safety_collateral_ratio', 'eth_a_vault_market_price', 'eth_a_vault_debt_balance', 'eth_a_vault_liquidation_penalty', 'eth_a_vault_liquidation_ratio', 'eth_a_vault_daily_revenues', 'eth_a_vault_annualized stability fee', 'eth_a_vault_annualized_revenues', 'eth_a_vault_dart', 'eth_a_vault_total_ann_revenues', 'eth_a_vault_prev_dai_ceiling', 'eth_a_vault_status', 'eth_a_vault_market_collateral_ratio', 'eth_a_vault_collateral_usd', 'eth_a_vault_hypothetical_dai_ceiling', 'eth_a_vault_market_price_7d_ma', 'eth_a_vault_collateral_usd_7d_ma', 'eth_a_vault_debt_balance_7d_ma', 'eth_a_vault_safety_collateral_ratio_7d_ma', 'eth_a_vault_market_collateral_ratio_7d_ma', 'eth_a_vault_daily_revenues_7d_ma', 'eth_a_vault_market_price_30d_ma', 'eth_a_vault_collateral_usd_30d_ma', 'eth_a_vault_debt_balance_30d_ma', 'eth_a_vault_cumulati

### Now for CoinGecko Crypto Market Data

In [287]:
#lets get price feeds for accepted collateral types

ir_csv['ilk'].unique()

array(['ETH-A', 'ETH-B', 'ETH-C', 'WBTC-A', 'WBTC-B', 'WBTC-C',
       'WSTETH-A', 'WSTETH-B', 'RWA002-A', 'RWA013-A', 'DIRECT-SPARK-DAI',
       'RWA014-A', 'RWA005-A', 'RWA012-A', 'RWA015-A', 'RWA007-A',
       'RETH-A', 'RWA003-A', 'GUNIV3DAIUSDC2-A', 'CRVV1ETHSTETH-A',
       'USDC-B', 'LINK-A', 'MATIC-A', 'UNIV2USDCETH-A', 'GNO-A',
       'UNIV2DAIUSDC-A', 'YFI-A', 'RWA004-A', 'GUNIV3DAIUSDC1-A',
       'GUSD-A', 'PAXUSD-A', 'USDC-A', 'DIRECT-AAVEV2-DAI',
       'DIRECT-COMPV2-DAI', 'RWA008-A', 'RENBTC-A', 'MANA-A', 'RWA009-A',
       'RWA001-A', 'UNI-A', 'UNIV2DAIETH-A', 'UNIV2WBTCETH-A',
       'UNIV2WBTCDAI-A', 'RWA-001', 'UNIV2UNIETH-A', 'TUSD-A', 'USDP-A',
       'BAT-A', 'BAL-A', 'ZRX-A', 'COMP-A', 'PSM-GUSD-A', 'AAVE-A',
       'UNIV2LINKETH-A', 'KNC-A', 'LRC-A', 'PSM-USDC-A', 'UNIV2AAVEETH-A',
       'UNIV2DAIUSDT-A', 'UNIV2ETHUSDT-A', 'USDT-A', 'PSM-PAX-A',
       'RWA006-A', 'PAX-A', nan, 'USDC', 'SAI'], dtype=object)

In [288]:
#need to use yfinance instead, coingecko clocked to 1 year historical

In [289]:
#sp = yf.Ticker("^GSPC")

In [290]:
#sp_from_nov_raw = sp.history(period="54mo")

In [291]:
sp_path = '../data/csv/sp500.csv'
#sp_from_nov_raw.to_csv(sp_path)
sp_from_nov = pd.read_csv(sp_path)

In [292]:
sp_from_nov.head()

Unnamed: 0,Date,Open,High,Low,Close,Volume,Dividends,Stock Splits
0,2019-10-11 00:00:00-04:00,2963.07007,2993.28003,2963.07007,2970.27002,3581320000,0.0,0.0
1,2019-10-14 00:00:00-04:00,2965.81006,2972.84009,2962.93994,2966.1499,2559270000,0.0,0.0
2,2019-10-15 00:00:00-04:00,2973.61011,3003.28003,2973.61011,2995.67993,3345090000,0.0,0.0
3,2019-10-16 00:00:00-04:00,2989.67993,2997.54004,2985.19995,2989.68994,3230320000,0.0,0.0
4,2019-10-17 00:00:00-04:00,3000.77002,3008.29004,2991.79004,2997.94995,3103470000,0.0,0.0


In [293]:
sp_from_nov = sp_from_nov.drop(columns=['Dividends','Stock Splits','Open','Low','High'])

In [294]:
sp_from_nov.columns = [f's&p_500_market_{col}' if col != 'Date' else col for col in sp_from_nov.columns]

In [295]:
sp_from_nov

Unnamed: 0,Date,s&p_500_market_Close,s&p_500_market_Volume
0,2019-10-11 00:00:00-04:00,2970.27002,3581320000
1,2019-10-14 00:00:00-04:00,2966.14990,2559270000
2,2019-10-15 00:00:00-04:00,2995.67993,3345090000
3,2019-10-16 00:00:00-04:00,2989.68994,3230320000
4,2019-10-17 00:00:00-04:00,2997.94995,3103470000
...,...,...,...
1127,2024-04-05 00:00:00-04:00,5204.33984,3386780000
1128,2024-04-08 00:00:00-04:00,5202.39014,3278180000
1129,2024-04-09 00:00:00-04:00,5209.91016,3400680000
1130,2024-04-10 00:00:00-04:00,5160.64014,3845930000


In [296]:
#btc = yf.Ticker('BTC-USD')

In [297]:
#btc_from_nov = btc.history(period='54mo')
btc_path = '../data/csv/btc.csv'
#btc_from_nov.to_csv(btc_path)
btc_from_nov = pd.read_csv(btc_path)

In [298]:
btc_from_nov.head()

Unnamed: 0,Date,btc_market_Close,btc_market_Volume
0,2019-10-11 00:00:00+00:00,8321.75684,19604381101
1,2019-10-12 00:00:00+00:00,8336.55566,14532641605
2,2019-10-13 00:00:00+00:00,8321.00586,13808286059
3,2019-10-14 00:00:00+00:00,8374.68652,15151387859
4,2019-10-15 00:00:00+00:00,8205.36914,15220412632


In [299]:
#btc_from_nov = btc_from_nov.drop(columns=['Dividends','Stock Splits','Open','Low','High'])

In [300]:
#btc_from_nov.columns = [f'btc_market_{col}' if col != 'Date' else col for col in btc_from_nov.columns]

In [301]:
btc_from_nov.head()

Unnamed: 0,Date,btc_market_Close,btc_market_Volume
0,2019-10-11 00:00:00+00:00,8321.75684,19604381101
1,2019-10-12 00:00:00+00:00,8336.55566,14532641605
2,2019-10-13 00:00:00+00:00,8321.00586,13808286059
3,2019-10-14 00:00:00+00:00,8374.68652,15151387859
4,2019-10-15 00:00:00+00:00,8205.36914,15220412632


eth = yf.Ticker('ETH-USD')
eth_from_nov = eth.history(period='54mo')

In [302]:
eth_from_nov_path = '../data/csv/eth.csv'
#eth_from_nov.to_csv(eth_from_nov_path)
eth_from_nov = pd.read_csv(eth_from_nov_path)

In [303]:
eth_from_nov.head()

Unnamed: 0,Date,Open,High,Low,Close,Volume,Dividends,Stock Splits
0,2019-10-11 00:00:00+00:00,191.80107,195.3188,181.66183,182.56969,9128522970,0.0,0.0
1,2019-10-12 00:00:00+00:00,182.53403,186.30489,179.98483,180.82664,7494328840,0.0,0.0
2,2019-10-13 00:00:00+00:00,180.86131,185.07591,180.31715,182.07515,6733182273,0.0,0.0
3,2019-10-14 00:00:00+00:00,182.02847,187.30362,181.66228,186.96091,7276520699,0.0,0.0
4,2019-10-15 00:00:00+00:00,186.97705,187.75987,179.46278,181.40607,7731456579,0.0,0.0


In [304]:
eth_from_nov = eth_from_nov.drop(columns=['Dividends','Stock Splits','Open','Low','High'])

In [305]:
eth_from_nov.columns = [f'eth_market_{col}' if col != 'Date' else col for col in eth_from_nov.columns]

In [306]:
eth_from_nov.head()

Unnamed: 0,Date,eth_market_Close,eth_market_Volume
0,2019-10-11 00:00:00+00:00,182.56969,9128522970
1,2019-10-12 00:00:00+00:00,180.82664,7494328840
2,2019-10-13 00:00:00+00:00,182.07515,6733182273
3,2019-10-14 00:00:00+00:00,186.96091,7276520699
4,2019-10-15 00:00:00+00:00,181.40607,7731456579


mkr = yf.Ticker('MKR-USD')
mkr_from_nov = mkr.history(period='54mo')

In [307]:
mkr_from_nov_path = '../data/csv/mkr.csv'
#mkr_from_nov.to_csv(mkr_from_nov_path)
mkr_from_nov = pd.read_csv(mkr_from_nov_path)
mkr_from_nov.head()

Unnamed: 0.1,Unnamed: 0,Date,mkr_market_Close,mkr_market_Volume
0,0,2019-10-11 00:00:00+00:00,507.65472,13260531
1,1,2019-10-12 00:00:00+00:00,507.39453,16557071
2,2,2019-10-13 00:00:00+00:00,501.76334,16830548
3,3,2019-10-14 00:00:00+00:00,511.23972,14246216
4,4,2019-10-15 00:00:00+00:00,495.81003,11560592


In [308]:
mkr_from_nov = mkr_from_nov.drop(columns=['Unnamed: 0'])

In [309]:
#mkr_from_nov.columns = [f'mkr_market_{col}' if col != 'Date' else col for col in mkr_from_nov.columns]

In [310]:
mkr_from_nov.head()

Unnamed: 0,Date,mkr_market_Close,mkr_market_Volume
0,2019-10-11 00:00:00+00:00,507.65472,13260531
1,2019-10-12 00:00:00+00:00,507.39453,16557071
2,2019-10-13 00:00:00+00:00,501.76334,16830548
3,2019-10-14 00:00:00+00:00,511.23972,14246216
4,2019-10-15 00:00:00+00:00,495.81003,11560592


vix = yf.Ticker('^VIX')
vix_from_nov = vix.history(period='54mo')

In [311]:
vix_from_nov_path = '../data/csv/vix.csv'
#vix_from_nov.to_csv(vix_from_nov_path)
vix_from_nov = pd.read_csv(vix_from_nov_path)
vix_from_nov = vix_from_nov.drop(columns=['Dividends','Stock Splits','Open','Low','High'])
vix_from_nov.head()

Unnamed: 0,Date,Close,Volume
0,2019-10-11 00:00:00-05:00,15.58,0
1,2019-10-14 00:00:00-05:00,14.57,0
2,2019-10-15 00:00:00-05:00,13.54,0
3,2019-10-16 00:00:00-05:00,13.68,0
4,2019-10-17 00:00:00-05:00,13.79,0


In [312]:
vix_from_nov.columns = [f'vix_market_{col}' if col != 'Date' else col for col in vix_from_nov.columns]

dai = yf.Ticker('DAI-USD')
dai_from_nov = dai.history(period='54mo')

In [313]:
dai_from_nov_path = '../data/csv/dai.csv'
#dai_from_nov.to_csv(dai_from_nov_path)
dai_from_nov = pd.read_csv(dai_from_nov_path)
dai_from_nov.head()

Unnamed: 0,Date,Open,High,Low,Close,Volume,Dividends,Stock Splits
0,2019-11-22 00:00:00+00:00,0.99736,1.02655,0.97216,0.9926,2104541,0.0,0.0
1,2019-11-23 00:00:00+00:00,0.99218,1.02875,0.98834,1.01628,311283,0.0,0.0
2,2019-11-24 00:00:00+00:00,1.01629,1.03514,0.99561,1.00227,255060,0.0,0.0
3,2019-11-25 00:00:00+00:00,1.00129,1.03471,0.9848,1.00702,958777,0.0,0.0
4,2019-11-26 00:00:00+00:00,1.00721,1.03143,0.99177,1.0061,217385,0.0,0.0


In [314]:
dai_from_nov = dai_from_nov.drop(columns=['Dividends','Stock Splits','Open','Low','High'])

In [315]:
dai_from_nov.columns = [f'dai_market_{col}' if col != 'Date' else col for col in dai_from_nov.columns]

In [316]:
dai_from_nov.head()

Unnamed: 0,Date,dai_market_Close,dai_market_Volume
0,2019-11-22 00:00:00+00:00,0.9926,2104541
1,2019-11-23 00:00:00+00:00,1.01628,311283
2,2019-11-24 00:00:00+00:00,1.00227,255060
3,2019-11-25 00:00:00+00:00,1.00702,958777
4,2019-11-26 00:00:00+00:00,1.0061,217385


In [317]:
dai_from_nov['dai_deviation'] = dai_from_nov['dai_market_Close'] - 1
dai_from_nov['dai_abs_deviation'] = dai_from_nov['dai_deviation'].abs()

average_deviation = dai_from_nov['dai_abs_deviation'].mean()
standard_deviation = dai_from_nov['dai_market_Close'].std()

print(f"DAI Average Deviation from $1: {average_deviation}")
print(f"Standard Deviation of DAI Price: {standard_deviation}")

DAI Average Deviation from $1: 0.002885558533802374
Standard Deviation of DAI Price: 0.006500104351783464


In [318]:
crypto_market_data = pd.merge(dai_from_nov, eth_from_nov, on=['Date'], how='inner')

In [319]:
crypto_market_data.head(30)

Unnamed: 0,Date,dai_market_Close,dai_market_Volume,dai_deviation,dai_abs_deviation,eth_market_Close,eth_market_Volume
0,2019-11-22 00:00:00+00:00,0.9926,2104541,-0.0074,0.0074,150.26817,12020749863
1,2019-11-23 00:00:00+00:00,1.01628,311283,0.01628,0.01628,153.41779,8289198330
2,2019-11-24 00:00:00+00:00,1.00227,255060,0.00227,0.00227,142.8347,7782769098
3,2019-11-25 00:00:00+00:00,1.00702,958777,0.00702,0.00702,146.47653,10962753356
4,2019-11-26 00:00:00+00:00,1.0061,217385,0.0061,0.0061,148.96507,7648516297
5,2019-11-27 00:00:00+00:00,0.99703,478836,-0.00297,0.00297,153.01056,8778095308
6,2019-11-28 00:00:00+00:00,1.00474,1295174,0.00474,0.00474,151.71758,7346531960
7,2019-11-29 00:00:00+00:00,1.00476,1517444,0.00476,0.00476,155.30415,7503898278
8,2019-11-30 00:00:00+00:00,1.00523,1099787,0.00523,0.00523,152.53969,6565950868
9,2019-12-01 00:00:00+00:00,1.00104,1383114,0.00104,0.00104,151.18573,7102780298


In [320]:
crypto_market_data = crypto_market_data.merge(btc_from_nov, on=['Date'], how='inner')

In [321]:
crypto_market_data = crypto_market_data.merge(mkr_from_nov, on=['Date'], how='inner')

In [322]:
crypto_market_data

Unnamed: 0,Date,dai_market_Close,dai_market_Volume,dai_deviation,dai_abs_deviation,eth_market_Close,eth_market_Volume,btc_market_Close,btc_market_Volume,mkr_market_Close,mkr_market_Volume
0,2019-11-22 00:00:00+00:00,0.99260,2104541,-0.00740,0.00740,150.26817,12020749863,7296.57764,34242315785,519.70923,5702366
1,2019-11-23 00:00:00+00:00,1.01628,311283,0.01628,0.01628,153.41779,8289198330,7397.79688,21008924418,551.96210,5249182
2,2019-11-24 00:00:00+00:00,1.00227,255060,0.00227,0.00227,142.83470,7782769098,7047.91699,30433517289,480.05551,4311479
3,2019-11-25 00:00:00+00:00,1.00702,958777,0.00702,0.00702,146.47653,10962753356,7146.13379,42685231262,528.87903,6497521
4,2019-11-26 00:00:00+00:00,1.00610,217385,0.00610,0.00610,148.96507,7648516297,7218.37109,21129505542,527.03693,5182243
...,...,...,...,...,...,...,...,...,...,...,...
1598,2024-04-07 00:00:00+00:00,0.99985,611101443,-0.00015,0.00015,3453.49463,9931108526,69362.55469,21204930369,3637.38794,69982639
1599,2024-04-08 00:00:00+00:00,0.99998,528959914,-0.00002,0.00002,3695.29272,19055143129,71631.35938,37261432669,3737.75903,104148315
1600,2024-04-09 00:00:00+00:00,0.99990,427618489,-0.00010,0.00010,3505.16333,18279773833,69139.01562,36426900409,3394.29321,100862751
1601,2024-04-10 00:00:00+00:00,1.00011,350570100,0.00011,0.00011,3543.73706,16872482726,70587.88281,38318601774,3338.57300,100824129


In [323]:
sp_from_nov['Date'] = pd.to_datetime(sp_from_nov['Date'],utc=True)
vix_from_nov['Date']= pd.to_datetime(vix_from_nov['Date'],utc=True)

In [324]:
sp_from_nov = localize_or_convert(sp_from_nov, 'Date')
vix_from_nov= localize_or_convert(vix_from_nov, 'Date')

In [325]:
sp_from_nov.describe()

Unnamed: 0,s&p_500_market_Close,s&p_500_market_Volume
count,1132.0,1132.0
mean,3989.32151,4414514713.78092
std,592.22602,1069487517.39506
min,2237.3999,1082286000.0
25%,3585.50256,3777510000.0
50%,4090.60498,4152040000.0
75%,4418.76758,4774665000.0
max,5254.3501,9976520000.0


In [326]:
crypto_market_data.set_index('Date', inplace=True)

In [327]:
crypto_market_data.index = pd.to_datetime(crypto_market_data.index)

In [328]:
sp_from_nov.set_index('Date', inplace=True)
vix_from_nov.set_index('Date', inplace=True)

In [329]:
#sp_from_nov.index = pd.to_datetime(sp_from_nov.index)

In [330]:
sp_from_nov_normalized = sp_from_nov.index.normalize()
crypto_market_data_normalized = crypto_market_data.index.normalize()
vix_from_nov_normalized = vix_from_nov.index.normalize()

In [331]:
sp_from_nov.index = sp_from_nov_normalized
crypto_market_data.index = crypto_market_data_normalized
vix_from_nov.index = vix_from_nov_normalized

In [332]:
sp_from_nov.head()

Unnamed: 0_level_0,s&p_500_market_Close,s&p_500_market_Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1
2019-10-11 00:00:00+00:00,2970.27002,3581320000
2019-10-14 00:00:00+00:00,2966.1499,2559270000
2019-10-15 00:00:00+00:00,2995.67993,3345090000
2019-10-16 00:00:00+00:00,2989.68994,3230320000
2019-10-17 00:00:00+00:00,2997.94995,3103470000


In [333]:
vix_from_nov.head()

Unnamed: 0_level_0,vix_market_Close,vix_market_Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1
2019-10-11 00:00:00+00:00,15.58,0
2019-10-14 00:00:00+00:00,14.57,0
2019-10-15 00:00:00+00:00,13.54,0
2019-10-16 00:00:00+00:00,13.68,0
2019-10-17 00:00:00+00:00,13.79,0


In [334]:
crypto_market_data.head()

Unnamed: 0_level_0,dai_market_Close,dai_market_Volume,dai_deviation,dai_abs_deviation,eth_market_Close,eth_market_Volume,btc_market_Close,btc_market_Volume,mkr_market_Close,mkr_market_Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
2019-11-22 00:00:00+00:00,0.9926,2104541,-0.0074,0.0074,150.26817,12020749863,7296.57764,34242315785,519.70923,5702366
2019-11-23 00:00:00+00:00,1.01628,311283,0.01628,0.01628,153.41779,8289198330,7397.79688,21008924418,551.9621,5249182
2019-11-24 00:00:00+00:00,1.00227,255060,0.00227,0.00227,142.8347,7782769098,7047.91699,30433517289,480.05551,4311479
2019-11-25 00:00:00+00:00,1.00702,958777,0.00702,0.00702,146.47653,10962753356,7146.13379,42685231262,528.87903,6497521
2019-11-26 00:00:00+00:00,1.0061,217385,0.0061,0.0061,148.96507,7648516297,7218.37109,21129505542,527.03693,5182243


In [335]:
crypto_market_data = crypto_market_data.merge(sp_from_nov, on=['Date'], how='inner')

In [336]:
crypto_market_data = crypto_market_data.merge(vix_from_nov, on=['Date'], how='inner')

In [337]:
crypto_market_data

Unnamed: 0_level_0,dai_market_Close,dai_market_Volume,dai_deviation,dai_abs_deviation,eth_market_Close,eth_market_Volume,btc_market_Close,btc_market_Volume,mkr_market_Close,mkr_market_Volume,s&p_500_market_Close,s&p_500_market_Volume,vix_market_Close,vix_market_Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
2019-11-22 00:00:00+00:00,0.99260,2104541,-0.00740,0.00740,150.26817,12020749863,7296.57764,34242315785,519.70923,5702366,3110.29004,3235270000,12.34000,0
2019-11-25 00:00:00+00:00,1.00702,958777,0.00702,0.00702,146.47653,10962753356,7146.13379,42685231262,528.87903,6497521,3133.63989,3514310000,11.87000,0
2019-11-26 00:00:00+00:00,1.00610,217385,0.00610,0.00610,148.96507,7648516297,7218.37109,21129505542,527.03693,5182243,3140.52002,4600450000,11.54000,0
2019-11-27 00:00:00+00:00,0.99703,478836,-0.00297,0.00297,153.01056,8778095308,7531.66357,23991412764,549.74243,4793026,3153.62988,3035470000,11.75000,0
2019-11-29 00:00:00+00:00,1.00476,1517444,0.00476,0.00476,155.30415,7503898278,7761.24365,19709695456,549.65155,4784047,3140.97998,1743420000,12.62000,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2024-04-05 00:00:00+00:00,1.00004,776652924,0.00004,0.00004,3318.88525,15214447092,67837.64062,33748230056,3682.49951,113082706,5204.33984,3386780000,16.03000,0
2024-04-08 00:00:00+00:00,0.99998,528959914,-0.00002,0.00002,3695.29272,19055143129,71631.35938,37261432669,3737.75903,104148315,5202.39014,3278180000,15.19000,0
2024-04-09 00:00:00+00:00,0.99990,427618489,-0.00010,0.00010,3505.16333,18279773833,69139.01562,36426900409,3394.29321,100862751,5209.91016,3400680000,14.98000,0
2024-04-10 00:00:00+00:00,1.00011,350570100,0.00011,0.00011,3543.73706,16872482726,70587.88281,38318601774,3338.57300,100824129,5160.64014,3845930000,15.80000,0


In [338]:
for col in crypto_market_data:
    crypto_market_data[f'{col}_daily_returns'] = crypto_market_data[col].pct_change()
    crypto_market_data[f'{col}_7d_ma']= crypto_market_data[col].rolling(window=7).mean()
    crypto_market_data[f'{col}_30d_ma'] = crypto_market_data[col].rolling(window=30).mean()



In [339]:
crypto_market_data.rename_axis('day',inplace=True)

In [340]:
crypto_market_data = crypto_market_data.fillna(0)

In [341]:
crypto_market_data.head(20)

Unnamed: 0_level_0,dai_market_Close,dai_market_Volume,dai_deviation,dai_abs_deviation,eth_market_Close,eth_market_Volume,btc_market_Close,btc_market_Volume,mkr_market_Close,mkr_market_Volume,...,s&p_500_market_Close_30d_ma,s&p_500_market_Volume_daily_returns,s&p_500_market_Volume_7d_ma,s&p_500_market_Volume_30d_ma,vix_market_Close_daily_returns,vix_market_Close_7d_ma,vix_market_Close_30d_ma,vix_market_Volume_daily_returns,vix_market_Volume_7d_ma,vix_market_Volume_30d_ma
day,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2019-11-22 00:00:00+00:00,0.9926,2104541,-0.0074,0.0074,150.26817,12020749863,7296.57764,34242315785,519.70923,5702366,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2019-11-25 00:00:00+00:00,1.00702,958777,0.00702,0.00702,146.47653,10962753356,7146.13379,42685231262,528.87903,6497521,...,0.0,0.08625,0.0,0.0,-0.03809,0.0,0.0,0.0,0.0,0.0
2019-11-26 00:00:00+00:00,1.0061,217385,0.0061,0.0061,148.96507,7648516297,7218.37109,21129505542,527.03693,5182243,...,0.0,0.30906,0.0,0.0,-0.0278,0.0,0.0,0.0,0.0,0.0
2019-11-27 00:00:00+00:00,0.99703,478836,-0.00297,0.00297,153.01056,8778095308,7531.66357,23991412764,549.74243,4793026,...,0.0,-0.34018,0.0,0.0,0.0182,0.0,0.0,0.0,0.0,0.0
2019-11-29 00:00:00+00:00,1.00476,1517444,0.00476,0.00476,155.30415,7503898278,7761.24365,19709695456,549.65155,4784047,...,0.0,-0.42565,0.0,0.0,0.07404,0.0,0.0,0.0,0.0,0.0
2019-12-02 00:00:00+00:00,0.99792,1652542,-0.00208,0.00208,149.05916,6670561362,7321.98828,17082040706,564.04742,4745961,...,0.0,0.88466,0.0,0.0,0.18146,0.0,0.0,0.0,0.0,0.0
2019-12-03 00:00:00+00:00,1.00392,1436267,0.00392,0.00392,147.95642,6204379850,7320.14551,14797485769,545.92542,4876703,...,0.0,0.11743,3298035714.28571,0.0,0.07042,12.99857,0.0,0.0,0.0,0.0
2019-12-04 00:00:00+00:00,1.00445,2308280,0.00445,0.00445,146.74774,7865937094,7252.03467,21664240918,495.40494,5053047,...,0.0,0.00855,3364851428.57143,0.0,-0.07268,13.35,0.0,0.0,0.0,0.0
2019-12-05 00:00:00+00:00,1.00821,1399150,0.00821,0.00821,149.24899,6745910228,7448.30762,18816085231,509.80917,5175476,...,0.0,-0.09249,3342875714.28571,0.0,-0.01892,13.72857,0.0,0.0,0.0,0.0
2019-12-06 00:00:00+00:00,1.0019,2479272,0.0019,0.0019,149.19444,6458766441,7546.99658,18104466307,505.81747,4642910,...,0.0,0.03655,3183284285.71429,0.0,-0.06198,14.02571,0.0,0.0,0.0,0.0


In [342]:
nan_rows = crypto_market_data[crypto_market_data.isna().any(axis=1)]
print(nan_rows)


Empty DataFrame
Columns: [dai_market_Close, dai_market_Volume, dai_deviation, dai_abs_deviation, eth_market_Close, eth_market_Volume, btc_market_Close, btc_market_Volume, mkr_market_Close, mkr_market_Volume, s&p_500_market_Close, s&p_500_market_Volume, vix_market_Close, vix_market_Volume, dai_market_Close_daily_returns, dai_market_Close_7d_ma, dai_market_Close_30d_ma, dai_market_Volume_daily_returns, dai_market_Volume_7d_ma, dai_market_Volume_30d_ma, dai_deviation_daily_returns, dai_deviation_7d_ma, dai_deviation_30d_ma, dai_abs_deviation_daily_returns, dai_abs_deviation_7d_ma, dai_abs_deviation_30d_ma, eth_market_Close_daily_returns, eth_market_Close_7d_ma, eth_market_Close_30d_ma, eth_market_Volume_daily_returns, eth_market_Volume_7d_ma, eth_market_Volume_30d_ma, btc_market_Close_daily_returns, btc_market_Close_7d_ma, btc_market_Close_30d_ma, btc_market_Volume_daily_returns, btc_market_Volume_7d_ma, btc_market_Volume_30d_ma, mkr_market_Close_daily_returns, mkr_market_Close_7d_ma, mkr

In [343]:
nan_rows.columns

Index(['dai_market_Close', 'dai_market_Volume', 'dai_deviation',
       'dai_abs_deviation', 'eth_market_Close', 'eth_market_Volume',
       'btc_market_Close', 'btc_market_Volume', 'mkr_market_Close',
       'mkr_market_Volume', 's&p_500_market_Close', 's&p_500_market_Volume',
       'vix_market_Close', 'vix_market_Volume',
       'dai_market_Close_daily_returns', 'dai_market_Close_7d_ma',
       'dai_market_Close_30d_ma', 'dai_market_Volume_daily_returns',
       'dai_market_Volume_7d_ma', 'dai_market_Volume_30d_ma',
       'dai_deviation_daily_returns', 'dai_deviation_7d_ma',
       'dai_deviation_30d_ma', 'dai_abs_deviation_daily_returns',
       'dai_abs_deviation_7d_ma', 'dai_abs_deviation_30d_ma',
       'eth_market_Close_daily_returns', 'eth_market_Close_7d_ma',
       'eth_market_Close_30d_ma', 'eth_market_Volume_daily_returns',
       'eth_market_Volume_7d_ma', 'eth_market_Volume_30d_ma',
       'btc_market_Close_daily_returns', 'btc_market_Close_7d_ma',
       'btc_market_Cl

for column in crypto_market_data[f'{col}_daily_returns']:
    crypto_market_data[f'{column}_volatility_7d'] = eth_a_vault[column].rolling(window=7).std()



# Calculate lag for selected columns (1-day lag as an example)
for column in ['debt_balance', 'cumulative_collateral', 
               'safety_price', 'safety_collateral_ratio', 
               'market_collateral_ratio','annualized stability fee','daily_revenues']:
    eth_a_vault[f'{column}_lag30'] = eth_a_vault[column].shift(30)

## Now for Macro Economic Data from FRED Api

In [344]:
def fetch_and_process_tbill_data(api_url, data_key, date_column, value_column, date_format='datetime'):
    # Retrieve the API key from Streamlit secrets
    api_key = st.secrets["FRED_API_KEY"]

    # Append the API key to the URL
    api_url_with_key = f"{api_url}&api_key={api_key}"

    response = requests.get(api_url_with_key)
    if response.status_code == 200:
        data = response.json()
        df = pd.DataFrame(data[data_key])
        
        if date_format == 'datetime':
            df[date_column] = pd.to_datetime(df[date_column])
        
        df.set_index(date_column, inplace=True)
        df[value_column] = df[value_column].astype(float)
        return df
    else:
        print(f"Failed to retrieve data: {response.status_code}")
        return pd.DataFrame()  # Return an empty DataFrame in case of failure

In [345]:
three_month_tbill_historical_api = "https://api.stlouisfed.org/fred/series/observations?series_id=TB3MS&file_type=json"
three_month_tbill = fetch_and_process_tbill_data(three_month_tbill_historical_api, "observations", "date", "value")


In [346]:
three_month_tbill = three_month_tbill[three_month_tbill.index >= '2019-11-01']

In [347]:
three_month_tbill['3m_tbill'] = three_month_tbill['value'] / 100

In [348]:
three_month_tbill

Unnamed: 0_level_0,realtime_start,realtime_end,value,3m_tbill
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2019-11-01,2024-04-11,2024-04-11,1.54,0.0154
2019-12-01,2024-04-11,2024-04-11,1.54,0.0154
2020-01-01,2024-04-11,2024-04-11,1.52,0.0152
2020-02-01,2024-04-11,2024-04-11,1.52,0.0152
2020-03-01,2024-04-11,2024-04-11,0.29,0.0029
2020-04-01,2024-04-11,2024-04-11,0.14,0.0014
2020-05-01,2024-04-11,2024-04-11,0.13,0.0013
2020-06-01,2024-04-11,2024-04-11,0.16,0.0016
2020-07-01,2024-04-11,2024-04-11,0.13,0.0013
2020-08-01,2024-04-11,2024-04-11,0.1,0.001


In [349]:
tmo_path = '../data/csv/3mo_tbill.csv'
three_month_tbill.to_csv(tmo_path)
three_month_tbill_csv = pd.read_csv(tmo_path, index_col='date', parse_dates=True)

In [350]:
three_month_tbill_csv.describe()

Unnamed: 0,value,3m_tbill
count,53.0,53.0
mean,2.05226,0.02052
std,2.20409,0.02204
min,0.02,0.0002
25%,0.09,0.0009
50%,0.98,0.0098
75%,4.65,0.0465
max,5.34,0.0534


https://fred.stlouisfed.org/series/FEDTARMDLR

In [351]:
forecast_ffr_path = "../data/csv/FEDTARMDLR.csv"
forecast_ffr = pd.read_csv(forecast_ffr_path)
forecast_ffr.head()

Unnamed: 0,DATE,FEDTARMDLR
0,2019-09-18,2.5
1,2019-12-11,2.5
2,2020-06-10,2.5
3,2020-09-16,2.5
4,2020-12-16,2.5


In [352]:
forecast_ffr=forecast_ffr.rename(columns={'DATE':'date'})
forecast_ffr=forecast_ffr.rename(columns={'FEDTARMDLR':'forecast_fed_funds'})
forecast_ffr['date'] = pd.to_datetime(forecast_ffr['date'])
forecast_ffr['date'] = forecast_ffr['date'].dt.tz_localize('UTC')

In [353]:
forecast_ffr.set_index('date',inplace=True)

In [354]:
forecast_ffr = forecast_ffr.resample('M').last().ffill()
forecast_ffr.reset_index(inplace=True)
forecast_ffr.head()

Unnamed: 0,date,forecast_fed_funds
0,2019-09-30 00:00:00+00:00,2.5
1,2019-10-31 00:00:00+00:00,2.5
2,2019-11-30 00:00:00+00:00,2.5
3,2019-12-31 00:00:00+00:00,2.5
4,2020-01-31 00:00:00+00:00,2.5


In [355]:
#sticky_price_consumer_index

spi_path = "../data/csv/sticky_price_consumer_price_index.csv"
sticky_index = pd.read_csv(spi_path)

In [356]:
print(sticky_index.describe())

       CORESTICKM159SFRBATL
count              52.00000
mean                3.86476
std                 1.71566
min                 1.40057
25%                 2.31302
50%                 3.71098
75%                 5.39869
max                 6.53817


https://fred.stlouisfed.org/series/PCECTPIMDLR

In [357]:
pce_path = "../data/csv/PCECTPIMDLR.csv"
forecast_pce_inflation = pd.read_csv(pce_path)
forecast_pce_inflation.head()

Unnamed: 0,DATE,PCECTPIMDLR
0,2019-09-18,2.0
1,2019-12-11,2.0
2,2020-06-10,2.0
3,2020-09-16,2.0
4,2020-12-16,2.0


In [358]:
forecast_pce_inflation=forecast_pce_inflation.rename(columns={'DATE':'date'})
forecast_pce_inflation=forecast_pce_inflation.rename(columns={'PCECTPIMDLR':'forecast_pce'})
forecast_pce_inflation['date'] = pd.to_datetime(forecast_pce_inflation['date'])
forecast_pce_inflation['date'] = forecast_pce_inflation['date'].dt.tz_localize('UTC')

In [359]:
forecast_pce_inflation.set_index('date',inplace=True)

In [360]:
forecast_pce_inflation = forecast_pce_inflation.resample('M').last().ffill()
forecast_pce_inflation.reset_index(inplace=True)
forecast_pce_inflation.head()

Unnamed: 0,date,forecast_pce
0,2019-09-30 00:00:00+00:00,2.0
1,2019-10-31 00:00:00+00:00,2.0
2,2019-11-30 00:00:00+00:00,2.0
3,2019-12-31 00:00:00+00:00,2.0
4,2020-01-31 00:00:00+00:00,2.0


https://fred.stlouisfed.org/series/M1SL

In [361]:
m1_path =  "../data/csv/M1SL.csv"
m1 = pd.read_csv(m1_path)

In [362]:
m1.tail()

Unnamed: 0,DATE,M1SL
47,2023-10-01,18071.5
48,2023-11-01,18014.7
49,2023-12-01,18022.0
50,2024-01-01,17987.0
51,2024-02-01,17944.1


In [363]:
m1=m1.rename(columns={'DATE':'date'})
m1['date'] = pd.to_datetime(m1['date'])
m1['date'] = m1['date'].dt.tz_localize('UTC')

https://fred.stlouisfed.org/series/M1V

In [364]:
m1_v_path =  "../data/csv/M1V.csv"
m1_v = pd.read_csv(m1_v_path)

In [365]:
m1_v.tail()

Unnamed: 0,DATE,M1V
13,2022-10-01,1.324
14,2023-01-01,1.39
15,2023-04-01,1.457
16,2023-07-01,1.511
17,2023-10-01,1.55


In [366]:
m1_v=m1_v.rename(columns={'DATE':'date'})
m1_v['date'] = pd.to_datetime(m1_v['date'])
m1_v['date'] = m1_v['date'].dt.tz_localize('UTC')

In [367]:
m1_v.set_index('date',inplace=True)
m1_v = m1_v.resample('M').last().ffill()
m1_v.reset_index(inplace=True)
m1_v.tail()

Unnamed: 0,date,M1V
47,2023-06-30 00:00:00+00:00,1.457
48,2023-07-31 00:00:00+00:00,1.511
49,2023-08-31 00:00:00+00:00,1.511
50,2023-09-30 00:00:00+00:00,1.511
51,2023-10-31 00:00:00+00:00,1.55


https://fred.stlouisfed.org/series/WM2NS

In [368]:
m2_path =  "../data/csv/WM2NS.csv"
m2 = pd.read_csv(m2_path)

In [369]:
m2.tail()

Unnamed: 0,DATE,WM2NS
222,2024-02-05,20877.2
223,2024-02-12,20746.2
224,2024-02-19,20756.5
225,2024-02-26,20744.3
226,2024-03-04,20908.9


In [370]:
m2=m2.rename(columns={'DATE':'date'})
m2['date'] = pd.to_datetime(m2['date'])
m2['date'] = m2['date'].dt.tz_localize('UTC')

In [371]:
m2.set_index('date',inplace=True)
m2 = m2.resample('M').mean()
m2.reset_index(inplace=True)
m2.tail()

Unnamed: 0,date,WM2NS
48,2023-11-30 00:00:00+00:00,20709.375
49,2023-12-31 00:00:00+00:00,20877.4
50,2024-01-31 00:00:00+00:00,20871.8
51,2024-02-29 00:00:00+00:00,20781.05
52,2024-03-31 00:00:00+00:00,20908.9


https://fred.stlouisfed.org/series/M2V

In [372]:
m2_v_path =  "../data/csv/M2V.csv"
m2_v = pd.read_csv(m2_v_path)

In [373]:
m2_v.tail()

Unnamed: 0,DATE,M2V
13,2022-10-01,1.236
14,2023-01-01,1.272
15,2023-04-01,1.302
16,2023-07-01,1.33
17,2023-10-01,1.348


In [374]:
m2_v=m2_v.rename(columns={'DATE':'date'})
m2_v['date'] = pd.to_datetime(m2_v['date'])
m2_v['date'] = m2_v['date'].dt.tz_localize('UTC')

In [375]:
m2_v.set_index('date',inplace=True)
m2_v = m2_v.resample('M').last().ffill()
m2_v.reset_index(inplace=True)
m2_v.tail()

Unnamed: 0,date,M2V
47,2023-06-30 00:00:00+00:00,1.302
48,2023-07-31 00:00:00+00:00,1.33
49,2023-08-31 00:00:00+00:00,1.33
50,2023-09-30 00:00:00+00:00,1.33
51,2023-10-31 00:00:00+00:00,1.348


In [376]:
rgdp_path = "../data/csv/GDPC1.csv"
rgdp = pd.read_csv(rgdp_path)
rgdp.tail()

Unnamed: 0,DATE,GDPC1
13,2022-10-01,21989.981
14,2023-01-01,22112.329
15,2023-04-01,22225.35
16,2023-07-01,22490.692
17,2023-10-01,22679.255


In [377]:
rgdp=rgdp.rename(columns={'DATE':'date'})
rgdp=rgdp.rename(columns={'GDPC1':'real_gdp'})
rgdp['date'] = pd.to_datetime(rgdp['date'])
rgdp['date'] = rgdp['date'].dt.tz_localize('UTC')

In [378]:
rgdp.set_index('date',inplace=True)
rgdp = rgdp.resample('M').last().ffill()
rgdp.reset_index(inplace=True)
rgdp.tail()

Unnamed: 0,date,real_gdp
47,2023-06-30 00:00:00+00:00,22225.35
48,2023-07-31 00:00:00+00:00,22490.692
49,2023-08-31 00:00:00+00:00,22490.692
50,2023-09-30 00:00:00+00:00,22490.692
51,2023-10-31 00:00:00+00:00,22679.255


https://fred.stlouisfed.org/series/GDPC1MDLR

In [379]:
forecast_median_real_gdp_path = "../data/csv/GDPC1MDLR.csv"
forecast_median_real_gdp = pd.read_csv(forecast_median_real_gdp_path)
forecast_median_real_gdp.head()

Unnamed: 0,DATE,GDPC1MDLR
0,2019-09-18,1.9
1,2019-12-11,1.9
2,2020-06-10,1.8
3,2020-09-16,1.9
4,2020-12-16,1.8


In [380]:
forecast_median_real_gdp=forecast_median_real_gdp.rename(columns={'DATE':'date'})
forecast_median_real_gdp=forecast_median_real_gdp.rename(columns={'GDPC1MDLR':'forecast_real_gdp'})
forecast_median_real_gdp['date'] = pd.to_datetime(forecast_median_real_gdp['date'])
forecast_median_real_gdp['date'] = forecast_median_real_gdp['date'].dt.tz_localize('UTC')

In [381]:
forecast_median_real_gdp.set_index('date',inplace=True)
forecast_median_real_gdp = forecast_median_real_gdp.resample('M').last().ffill()
forecast_median_real_gdp.reset_index(inplace=True)
forecast_median_real_gdp.tail()

Unnamed: 0,date,forecast_real_gdp
50,2023-11-30 00:00:00+00:00,1.8
51,2023-12-31 00:00:00+00:00,1.8
52,2024-01-31 00:00:00+00:00,1.8
53,2024-02-29 00:00:00+00:00,1.8
54,2024-03-31 00:00:00+00:00,1.8


https://fred.stlouisfed.org/series/GDP

In [382]:
gdp_path = "../data/csv/GDP.csv"
gdp = pd.read_csv(gdp_path)
gdp.tail()

Unnamed: 0,DATE,GDP
13,2022-10-01,26408.405
14,2023-01-01,26813.601
15,2023-04-01,27063.012
16,2023-07-01,27610.128
17,2023-10-01,27956.998


In [383]:
gdp=gdp.rename(columns={'DATE':'date'})
gdp['date'] = pd.to_datetime(gdp['date'])
gdp['date'] = gdp['date'].dt.tz_localize('UTC')

In [384]:
gdp.set_index('date',inplace=True)
gdp = gdp.resample('M').last().ffill()
gdp.reset_index(inplace=True)
gdp.tail()

Unnamed: 0,date,GDP
47,2023-06-30 00:00:00+00:00,27063.012
48,2023-07-31 00:00:00+00:00,27610.128
49,2023-08-31 00:00:00+00:00,27610.128
50,2023-09-30 00:00:00+00:00,27610.128
51,2023-10-31 00:00:00+00:00,27956.998


https://fred.stlouisfed.org/series/QBPQYTNIYVENTKREV

In [385]:
#in millions

vcr_path = "../data/csv/QBPQYTNIYVENTKREV.csv"
vc_revenue = pd.read_csv(vcr_path)
vc_revenue.tail()

Unnamed: 0,DATE,QBPQYTNIYVENTKREV
13,2022-10-01,32.242
14,2023-01-01,4.647
15,2023-04-01,22.256
16,2023-07-01,14.066
17,2023-10-01,-10.109


In [386]:
vc_revenue=vc_revenue.rename(columns={'DATE':'date'})
vc_revenue=vc_revenue.rename(columns={'QBPQYTNIYVENTKREV':'vc_revenue'})
vc_revenue['date'] = pd.to_datetime(vc_revenue['date'])
vc_revenue['date'] = vc_revenue['date'].dt.tz_localize('UTC')

In [387]:
vc_revenue.set_index('date',inplace=True)
vc_revenue = vc_revenue.resample('M').last().ffill()
vc_revenue.reset_index(inplace=True)
vc_revenue.tail()

Unnamed: 0,date,vc_revenue
47,2023-06-30 00:00:00+00:00,22.256
48,2023-07-31 00:00:00+00:00,14.066
49,2023-08-31 00:00:00+00:00,14.066
50,2023-09-30 00:00:00+00:00,14.066
51,2023-10-31 00:00:00+00:00,-10.109


https://fred.stlouisfed.org/series/RRPTTLD

In [388]:
#in billions

reverse_repo_path = "../data/csv/RRPTTLD.csv"
reverse_repo = pd.read_csv(reverse_repo_path)
reverse_repo.tail(20)

Unnamed: 0,DATE,RRPTTLD
1137,2024-03-12,476.862
1138,2024-03-13,521.738
1139,2024-03-14,483.573
1140,2024-03-15,413.877
1141,2024-03-18,440.596
1142,2024-03-19,446.978
1143,2024-03-20,496.245
1144,2024-03-21,460.631
1145,2024-03-22,478.531
1146,2024-03-25,473.787


In [389]:
reverse_repo['RRPTTLD'] = reverse_repo['RRPTTLD'].replace('.', np.nan).astype(float)

In [390]:
reverse_repo=reverse_repo.rename(columns={'DATE':'date'})
reverse_repo=reverse_repo.rename(columns={'RRPTTLD':'fed_reverse_repo'})
reverse_repo['date'] = pd.to_datetime(reverse_repo['date'])
reverse_repo['date'] = reverse_repo['date'].dt.tz_localize('UTC')

In [391]:
reverse_repo.set_index('date',inplace=True)
reverse_repo = reverse_repo.resample('M').mean()
reverse_repo.reset_index(inplace=True)
reverse_repo.tail(20)

Unnamed: 0,date,fed_reverse_repo
34,2022-09-30 00:00:00+00:00,2257.80429
35,2022-10-31 00:00:00+00:00,2224.45085
36,2022-11-30 00:00:00+00:00,2146.8247
37,2022-12-31 00:00:00+00:00,2184.70019
38,2023-01-31 00:00:00+00:00,2131.08385
39,2023-02-28 00:00:00+00:00,2079.45163
40,2023-03-31 00:00:00+00:00,2185.684
41,2023-04-30 00:00:00+00:00,2268.10384
42,2023-05-31 00:00:00+00:00,2233.55714
43,2023-06-30 00:00:00+00:00,2053.83


https://fred.stlouisfed.org/series/RPTTLD

In [392]:
#multiply by 1 billion for real figure

repo_path = "../data/csv/RPTTLD.csv"
repo = pd.read_csv(repo_path)
repo.head()

Unnamed: 0,DATE,RPTTLD
0,2019-10-31,72.533
1,2019-11-01,104.583
2,2019-11-04,77.583
3,2019-11-05,102.143
4,2019-11-06,62.61


In [393]:
repo['RPTTLD'] = repo['RPTTLD'].replace('.', np.nan).astype(float)

In [394]:
repo=repo.rename(columns={'DATE':'date'})
repo=repo.rename(columns={'RPTTLD':'fed_repo'})
repo['date'] = pd.to_datetime(repo['date'])
repo['date'] = repo['date'].dt.tz_localize('UTC')

In [395]:
repo.head(20)

Unnamed: 0,date,fed_repo
0,2019-10-31 00:00:00+00:00,72.533
1,2019-11-01 00:00:00+00:00,104.583
2,2019-11-04 00:00:00+00:00,77.583
3,2019-11-05 00:00:00+00:00,102.143
4,2019-11-06 00:00:00+00:00,62.61
5,2019-11-07 00:00:00+00:00,115.142
6,2019-11-08 00:00:00+00:00,70.643
7,2019-11-11 00:00:00+00:00,
8,2019-11-12 00:00:00+00:00,111.909
9,2019-11-13 00:00:00+00:00,77.093


In [396]:
repo.set_index('date',inplace=True)
repo = repo.resample('M').mean()
repo.reset_index(inplace=True)
repo.head(20)

Unnamed: 0,date,fed_repo
0,2019-10-31 00:00:00+00:00,72.533
1,2019-11-30 00:00:00+00:00,89.47516
2,2019-12-31 00:00:00+00:00,57.85181
3,2020-01-31 00:00:00+00:00,58.18619
4,2020-02-29 00:00:00+00:00,48.06816
5,2020-03-31 00:00:00+00:00,67.41073
6,2020-04-30 00:00:00+00:00,9.15952
7,2020-05-31 00:00:00+00:00,19.4975
8,2020-06-30 00:00:00+00:00,35.46364
9,2020-07-31 00:00:00+00:00,0.56818


https://fred.stlouisfed.org/series/QBPBSTAS

In [397]:
#in millions, multiply by 1 milion for real figure

fdic_assets_path = "../data/csv/QBPBSTAS.csv"
fdic_assets = pd.read_csv(fdic_assets_path)
fdic_assets.tail()

Unnamed: 0,DATE,QBPBSTAS
13,2022-10-01,23598510.634
14,2023-01-01,23719560.258
15,2023-04-01,23465122.585
16,2023-07-01,23408915.142
17,2023-10-01,23668802.145


In [398]:
fdic_assets=fdic_assets.rename(columns={'DATE':'date'})
fdic_assets=fdic_assets.rename(columns={'QBPBSTAS':'FDIC_Assets'})
fdic_assets['date'] = pd.to_datetime(fdic_assets['date'])
fdic_assets['date'] = fdic_assets['date'].dt.tz_localize('UTC')

In [399]:
fdic_assets.set_index('date',inplace=True)
fdic_assets = fdic_assets.resample('M').last().ffill()
fdic_assets.reset_index(inplace=True)
fdic_assets.head(20)

Unnamed: 0,date,FDIC_Assets
0,2019-07-31 00:00:00+00:00,18481914.694
1,2019-08-31 00:00:00+00:00,18481914.694
2,2019-09-30 00:00:00+00:00,18481914.694
3,2019-10-31 00:00:00+00:00,18645728.253
4,2019-11-30 00:00:00+00:00,18645728.253
5,2019-12-31 00:00:00+00:00,18645728.253
6,2020-01-31 00:00:00+00:00,20255187.954
7,2020-02-29 00:00:00+00:00,20255187.954
8,2020-03-31 00:00:00+00:00,20255187.954
9,2020-04-30 00:00:00+00:00,21139330.995


https://fred.stlouisfed.org/series/QBPBSTLKTL

In [400]:
#in millions, multiply by 1 milion for real figure

fdic_liabilities_path = "../data/csv/QBPBSTLKTL.csv"
fdic_liabilities = pd.read_csv(fdic_liabilities_path)
fdic_liabilities.tail()

Unnamed: 0,DATE,QBPBSTLKTL
13,2022-10-01,21391791.783
14,2023-01-01,21457216.757
15,2023-04-01,21212159.872
16,2023-07-01,21163960.487
17,2023-10-01,21371587.102


In [401]:
fdic_liabilities=fdic_liabilities.rename(columns={'DATE':'date'})
fdic_liabilities=fdic_liabilities.rename(columns={'QBPBSTLKTL':'FDIC_Liabilities'})
fdic_liabilities['date'] = pd.to_datetime(fdic_liabilities['date'])
fdic_liabilities['date'] = fdic_liabilities['date'].dt.tz_localize('UTC')

In [402]:
fdic_liabilities.set_index('date',inplace=True)
fdic_liabilities = fdic_liabilities.resample('M').last().ffill()
fdic_liabilities.reset_index(inplace=True)
fdic_liabilities.head(20)

Unnamed: 0,date,FDIC_Liabilities
0,2019-07-31 00:00:00+00:00,16380533.269
1,2019-08-31 00:00:00+00:00,16380533.269
2,2019-09-30 00:00:00+00:00,16380533.269
3,2019-10-31 00:00:00+00:00,16532063.893
4,2019-11-30 00:00:00+00:00,16532063.893
5,2019-12-31 00:00:00+00:00,16532063.893
6,2020-01-31 00:00:00+00:00,18137301.401
7,2020-02-29 00:00:00+00:00,18137301.401
8,2020-03-31 00:00:00+00:00,18137301.401
9,2020-04-30 00:00:00+00:00,18989844.279


https://fred.stlouisfed.org/series/QBPBSTLKTEQK

In [403]:
#in millions, multiply by 1 milion for real figure

fdic_equity_path = "../data/csv/QBPBSTLKTEQK.csv"
fdic_equity = pd.read_csv(fdic_equity_path)
fdic_equity.tail()

Unnamed: 0,DATE,QBPBSTLKTEQK
13,2022-10-01,2206718.851
14,2023-01-01,2262343.501
15,2023-04-01,2252962.713
16,2023-07-01,2244954.655
17,2023-10-01,2297215.043


In [404]:
fdic_equity=fdic_equity.rename(columns={'DATE':'date'})
fdic_equity=fdic_equity.rename(columns={'QBPBSTLKTEQK':'FDIC_Equity'})
fdic_equity['date'] = pd.to_datetime(fdic_equity['date'])
fdic_equity['date'] = fdic_equity['date'].dt.tz_localize('UTC')

In [405]:
fdic_equity.set_index('date',inplace=True)
fdic_equity = fdic_equity.resample('M').last().ffill()
fdic_equity.reset_index(inplace=True)
fdic_equity.head(20)

Unnamed: 0,date,FDIC_Equity
0,2019-07-31 00:00:00+00:00,2101381.395
1,2019-08-31 00:00:00+00:00,2101381.395
2,2019-09-30 00:00:00+00:00,2101381.395
3,2019-10-31 00:00:00+00:00,2113664.332
4,2019-11-30 00:00:00+00:00,2113664.332
5,2019-12-31 00:00:00+00:00,2113664.332
6,2020-01-31 00:00:00+00:00,2117886.524
7,2020-02-29 00:00:00+00:00,2117886.524
8,2020-03-31 00:00:00+00:00,2117886.524
9,2020-04-30 00:00:00+00:00,2149486.675


In [406]:
eff_rate_path =  "../data/csv/effective_federal_funds_rate.csv"
fed_funds_rate = pd.read_csv(eff_rate_path)

In [407]:
fed_funds_rate.describe()

Unnamed: 0,DATE,EFFR
count,1152,1152.0
unique,1152,34.0
top,2019-11-01,5.33
freq,1,171.0


In [408]:
fed_funds_rate=fed_funds_rate.rename(columns={'DATE':'date'})
fed_funds_rate['date'] = pd.to_datetime(fed_funds_rate['date'])

In [409]:
fed_funds_rate['date'] = fed_funds_rate['date'].dt.tz_localize('UTC')

In [410]:
fed_funds_rate.describe()

Unnamed: 0,date,EFFR
count,1152,1152.0
unique,,34.0
top,,5.33
freq,,171.0
mean,2022-01-15 12:00:00+00:00,
min,2019-11-01 00:00:00+00:00,
25%,2020-12-08 18:00:00+00:00,
50%,2022-01-15 12:00:00+00:00,
75%,2023-02-22 06:00:00+00:00,
max,2024-04-01 00:00:00+00:00,


In [411]:
three_month_tbill_csv.index = three_month_tbill_csv.index.tz_localize('UTC')

In [412]:
three_month_tbill_csv.tail()

Unnamed: 0_level_0,realtime_start,realtime_end,value,3m_tbill
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2023-11-01 00:00:00+00:00,2024-04-11,2024-04-11,5.27,0.0527
2023-12-01 00:00:00+00:00,2024-04-11,2024-04-11,5.24,0.0524
2024-01-01 00:00:00+00:00,2024-04-11,2024-04-11,5.22,0.0522
2024-02-01 00:00:00+00:00,2024-04-11,2024-04-11,5.24,0.0524
2024-03-01 00:00:00+00:00,2024-04-11,2024-04-11,5.24,0.0524


In [413]:
three_month_tbill_csv = three_month_tbill_csv.rename(columns={'value':'3_m_tbill_yield'})

In [414]:
three_month_tbill_csv.head()

Unnamed: 0_level_0,realtime_start,realtime_end,3_m_tbill_yield,3m_tbill
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2019-11-01 00:00:00+00:00,2024-04-11,2024-04-11,1.54,0.0154
2019-12-01 00:00:00+00:00,2024-04-11,2024-04-11,1.54,0.0154
2020-01-01 00:00:00+00:00,2024-04-11,2024-04-11,1.52,0.0152
2020-02-01 00:00:00+00:00,2024-04-11,2024-04-11,1.52,0.0152
2020-03-01 00:00:00+00:00,2024-04-11,2024-04-11,0.29,0.0029


In [415]:
print(three_month_tbill_csv.describe())

       3_m_tbill_yield  3m_tbill
count         53.00000  53.00000
mean           2.05226   0.02052
std            2.20409   0.02204
min            0.02000   0.00020
25%            0.09000   0.00090
50%            0.98000   0.00980
75%            4.65000   0.04650
max            5.34000   0.05340


In [416]:
fed_funds_rate['EFFR'] = fed_funds_rate['EFFR'].replace('.', np.nan).astype(float)

In [417]:
fed_funds_rate = fed_funds_rate.rename(columns={'EFFR':'effective_funds_rate'})
fed_funds_rate.set_index('date',inplace=True)
fed_funds_rate = fed_funds_rate.resample('M').mean().ffill()
fed_funds_rate.reset_index(inplace=True)
fed_funds_rate.head(20)

Unnamed: 0,date,effective_funds_rate
0,2019-11-30 00:00:00+00:00,1.55263
1,2019-12-31 00:00:00+00:00,1.55095
2,2020-01-31 00:00:00+00:00,1.5519
3,2020-02-29 00:00:00+00:00,1.58368
4,2020-03-31 00:00:00+00:00,0.62545
5,2020-04-30 00:00:00+00:00,0.04864
6,2020-05-31 00:00:00+00:00,0.05
7,2020-06-30 00:00:00+00:00,0.07682
8,2020-07-31 00:00:00+00:00,0.09348
9,2020-08-31 00:00:00+00:00,0.09476


In [418]:
macro_data = pd.merge_asof(fed_funds_rate.sort_values('date'), three_month_tbill_csv.sort_values('date'), on='date', direction='nearest')


In [419]:
macro_data.head()

Unnamed: 0,date,effective_funds_rate,realtime_start,realtime_end,3_m_tbill_yield,3m_tbill
0,2019-11-30 00:00:00+00:00,1.55263,2024-04-11,2024-04-11,1.54,0.0154
1,2019-12-31 00:00:00+00:00,1.55095,2024-04-11,2024-04-11,1.52,0.0152
2,2020-01-31 00:00:00+00:00,1.5519,2024-04-11,2024-04-11,1.52,0.0152
3,2020-02-29 00:00:00+00:00,1.58368,2024-04-11,2024-04-11,0.29,0.0029
4,2020-03-31 00:00:00+00:00,0.62545,2024-04-11,2024-04-11,0.14,0.0014


macro_data['effective_funds_rate'] = macro_data['effective_funds_rate'].replace('.', np.nan)

# Convert the column to a numeric type (float), necessary for ffill to work after replacing '.' with np.nan
macro_data['effective_funds_rate'] = pd.to_numeric(macro_data['effective_funds_rate'], errors='coerce')

# Now apply front filling
macro_data['effective_funds_rate'].ffill(inplace=True)

# Display the results


In [420]:
print(macro_data['effective_funds_rate'].describe())

count   54.00000
mean     2.04156
std      2.21550
min      0.04864
25%      0.08132
50%      0.97286
75%      4.63110
max      5.33000
Name: effective_funds_rate, dtype: float64


In [421]:
sticky_index.rename(columns={'DATE': 'date', 'CORESTICKM159SFRBATL': 'sticky_cpi'}, inplace=True)

In [422]:
sticky_index.tail()

Unnamed: 0,date,sticky_cpi
47,2023-10-01,4.88293
48,2023-11-01,4.68829
49,2023-12-01,4.5544
50,2024-01-01,4.60392
51,2024-02-01,4.4031


In [423]:
sticky_index['date']= pd.to_datetime(sticky_index['date'])

In [424]:
sticky_index = localize_or_convert(sticky_index, 'date')
sticky_index.set_index('date',inplace=True)
sticky_index = sticky_index.resample('M').last().ffill()
sticky_index.reset_index(inplace=True)
sticky_index.head(20)

Unnamed: 0,date,sticky_cpi
0,2019-11-30 00:00:00+00:00,2.77202
1,2019-12-31 00:00:00+00:00,2.75261
2,2020-01-31 00:00:00+00:00,2.79048
3,2020-02-29 00:00:00+00:00,2.8001
4,2020-03-31 00:00:00+00:00,2.60655
5,2020-04-30 00:00:00+00:00,2.17932
6,2020-05-31 00:00:00+00:00,2.01414
7,2020-06-30 00:00:00+00:00,1.99354
8,2020-07-31 00:00:00+00:00,2.28128
9,2020-08-31 00:00:00+00:00,2.21315


In [425]:
macro_data = macro_data.merge(sticky_index, on=['date'],how='inner')

In [426]:
macro_data

Unnamed: 0,date,effective_funds_rate,realtime_start,realtime_end,3_m_tbill_yield,3m_tbill,sticky_cpi
0,2019-11-30 00:00:00+00:00,1.55263,2024-04-11,2024-04-11,1.54,0.0154,2.77202
1,2019-12-31 00:00:00+00:00,1.55095,2024-04-11,2024-04-11,1.52,0.0152,2.75261
2,2020-01-31 00:00:00+00:00,1.5519,2024-04-11,2024-04-11,1.52,0.0152,2.79048
3,2020-02-29 00:00:00+00:00,1.58368,2024-04-11,2024-04-11,0.29,0.0029,2.8001
4,2020-03-31 00:00:00+00:00,0.62545,2024-04-11,2024-04-11,0.14,0.0014,2.60655
5,2020-04-30 00:00:00+00:00,0.04864,2024-04-11,2024-04-11,0.13,0.0013,2.17932
6,2020-05-31 00:00:00+00:00,0.05,2024-04-11,2024-04-11,0.16,0.0016,2.01414
7,2020-06-30 00:00:00+00:00,0.07682,2024-04-11,2024-04-11,0.13,0.0013,1.99354
8,2020-07-31 00:00:00+00:00,0.09348,2024-04-11,2024-04-11,0.1,0.001,2.28128
9,2020-08-31 00:00:00+00:00,0.09476,2024-04-11,2024-04-11,0.11,0.0011,2.21315


In [427]:
forecast_ffr

Unnamed: 0,date,forecast_fed_funds
0,2019-09-30 00:00:00+00:00,2.5
1,2019-10-31 00:00:00+00:00,2.5
2,2019-11-30 00:00:00+00:00,2.5
3,2019-12-31 00:00:00+00:00,2.5
4,2020-01-31 00:00:00+00:00,2.5
5,2020-02-29 00:00:00+00:00,2.5
6,2020-03-31 00:00:00+00:00,2.5
7,2020-04-30 00:00:00+00:00,2.5
8,2020-05-31 00:00:00+00:00,2.5
9,2020-06-30 00:00:00+00:00,2.5


In [428]:
print(macro_data['date'].dtype)
print(forecast_ffr['date'].dtype)

datetime64[ns, UTC]
datetime64[ns, UTC]


In [429]:
macro_data = pd.merge_asof(macro_data.sort_values('date'), forecast_ffr.sort_values('date'), on='date', direction='nearest')

In [430]:
macro_data = pd.merge_asof(macro_data.sort_values('date'), forecast_pce_inflation.sort_values('date'), on='date', direction='nearest')

In [431]:
macro_data = pd.merge_asof(macro_data.sort_values('date'), m1.sort_values('date'), on='date', direction='nearest')

In [432]:
macro_data = pd.merge_asof(macro_data.sort_values('date'), m1_v.sort_values('date'), on='date', direction='nearest')

In [433]:
macro_data = pd.merge_asof(macro_data.sort_values('date'), m2.sort_values('date'), on='date', direction='nearest')

In [434]:
macro_data = pd.merge_asof(macro_data.sort_values('date'), m2_v.sort_values('date'), on='date', direction='nearest')

In [435]:
macro_data = pd.merge_asof(macro_data.sort_values('date'), rgdp.sort_values('date'), on='date', direction='nearest')

In [436]:
macro_data = pd.merge_asof(macro_data.sort_values('date'), forecast_median_real_gdp.sort_values('date'), on='date', direction='nearest')

In [437]:
macro_data = pd.merge_asof(macro_data.sort_values('date'), gdp.sort_values('date'), on='date', direction='nearest')

In [438]:
macro_data = pd.merge_asof(macro_data.sort_values('date'), vc_revenue.sort_values('date'), on='date', direction='nearest')

In [439]:
macro_data = pd.merge_asof(macro_data.sort_values('date'), reverse_repo.sort_values('date'), on='date', direction='nearest')

In [440]:
macro_data = pd.merge_asof(macro_data.sort_values('date'), repo.sort_values('date'), on='date', direction='nearest')

In [441]:
macro_data = pd.merge_asof(macro_data.sort_values('date'), fdic_assets.sort_values('date'), on='date', direction='nearest')

In [442]:
macro_data = pd.merge_asof(macro_data.sort_values('date'), fdic_liabilities.sort_values('date'), on='date', direction='nearest')

In [443]:
macro_data = pd.merge_asof(macro_data.sort_values('date'), fdic_equity.sort_values('date'), on='date', direction='nearest')

In [444]:
macro_data

Unnamed: 0,date,effective_funds_rate,realtime_start,realtime_end,3_m_tbill_yield,3m_tbill,sticky_cpi,forecast_fed_funds,forecast_pce,M1SL,...,M2V,real_gdp,forecast_real_gdp,GDP,vc_revenue,fed_reverse_repo,fed_repo,FDIC_Assets,FDIC_Liabilities,FDIC_Equity
0,2019-11-30 00:00:00+00:00,1.55263,2024-04-11,2024-04-11,1.54,0.0154,2.77202,2.5,2.0,4008.4,...,1.437,20951.088,1.9,21902.39,42.445,6.57347,89.47516,18645728.253,16532063.893,2113664.332
1,2019-12-31 00:00:00+00:00,1.55095,2024-04-11,2024-04-11,1.52,0.0152,2.75261,2.5,2.0,3977.6,...,1.437,20951.088,1.9,21902.39,42.445,6.0331,57.85181,18645728.253,16532063.893,2113664.332
2,2020-01-31 00:00:00+00:00,1.5519,2024-04-11,2024-04-11,1.52,0.0152,2.79048,2.5,2.0,3979.6,...,1.392,20665.553,1.9,21706.513,-24.894,4.0141,58.18619,20255187.954,18137301.401,2117886.524
3,2020-02-29 00:00:00+00:00,1.58368,2024-04-11,2024-04-11,0.29,0.0029,2.8001,2.5,2.0,4260.9,...,1.392,20665.553,1.9,21706.513,-24.894,2.04474,48.06816,20255187.954,18137301.401,2117886.524
4,2020-03-31 00:00:00+00:00,0.62545,2024-04-11,2024-04-11,0.14,0.0014,2.60655,2.5,2.0,4788.8,...,1.392,20665.553,1.9,21706.513,-24.894,50.4835,67.41073,20255187.954,18137301.401,2117886.524
5,2020-04-30 00:00:00+00:00,0.04864,2024-04-11,2024-04-11,0.13,0.0013,2.17932,2.5,2.0,16245.5,...,1.128,19034.83,1.9,19913.143,45.203,44.6081,9.15952,21139330.995,18989844.279,2149486.675
6,2020-05-31 00:00:00+00:00,0.05,2024-04-11,2024-04-11,0.16,0.0016,2.01414,2.5,2.0,16574.1,...,1.128,19034.83,1.9,19913.143,45.203,2.09845,19.4975,21139330.995,18989844.279,2149486.675
7,2020-06-30 00:00:00+00:00,0.07682,2024-04-11,2024-04-11,0.13,0.0013,1.99354,2.5,2.0,16774.5,...,1.128,19034.83,1.8,19913.143,45.203,0.41336,35.46364,21139330.995,18989844.279,2149486.675
8,2020-07-31 00:00:00+00:00,0.09348,2024-04-11,2024-04-11,0.1,0.001,2.28128,2.5,2.0,16898.8,...,1.175,20511.785,1.8,21647.64,68.878,0.02141,0.56818,21209356.274,19023629.056,2185727.184
9,2020-08-31 00:00:00+00:00,0.09476,2024-04-11,2024-04-11,0.11,0.0011,2.21315,2.5,2.0,17171.2,...,1.175,20511.785,1.8,21647.64,68.878,0.02833,0.0,21209356.274,19023629.056,2185727.184


In [445]:
macro_data = macro_data.drop(columns=['3m_tbill','realtime_start','realtime_end'])

In [446]:
macro_data.set_index('date',inplace=True)

In [447]:
macro_data.rename_axis('day',inplace=True)

In [448]:
macro_data

Unnamed: 0_level_0,effective_funds_rate,3_m_tbill_yield,sticky_cpi,forecast_fed_funds,forecast_pce,M1SL,M1V,WM2NS,M2V,real_gdp,forecast_real_gdp,GDP,vc_revenue,fed_reverse_repo,fed_repo,FDIC_Assets,FDIC_Liabilities,FDIC_Equity
day,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1
2019-11-30 00:00:00+00:00,1.55263,1.54,2.77202,2.5,2.0,4008.4,5.525,15238.725,1.437,20951.088,1.9,21902.39,42.445,6.57347,89.47516,18645728.253,16532063.893,2113664.332
2019-12-31 00:00:00+00:00,1.55095,1.52,2.75261,2.5,2.0,3977.6,5.525,15396.9,1.437,20951.088,1.9,21902.39,42.445,6.0331,57.85181,18645728.253,16532063.893,2113664.332
2020-01-31 00:00:00+00:00,1.5519,1.52,2.79048,2.5,2.0,3979.6,5.33,15409.6,1.392,20665.553,1.9,21706.513,-24.894,4.0141,58.18619,20255187.954,18137301.401,2117886.524
2020-02-29 00:00:00+00:00,1.58368,0.29,2.8001,2.5,2.0,4260.9,5.33,15360.25,1.392,20665.553,1.9,21706.513,-24.894,2.04474,48.06816,20255187.954,18137301.401,2117886.524
2020-03-31 00:00:00+00:00,0.62545,0.14,2.60655,2.5,2.0,4788.8,5.33,15947.68,1.392,20665.553,1.9,21706.513,-24.894,50.4835,67.41073,20255187.954,18137301.401,2117886.524
2020-04-30 00:00:00+00:00,0.04864,0.13,2.17932,2.5,2.0,16245.5,1.588,17047.925,1.128,19034.83,1.9,19913.143,45.203,44.6081,9.15952,21139330.995,18989844.279,2149486.675
2020-05-31 00:00:00+00:00,0.05,0.16,2.01414,2.5,2.0,16574.1,1.588,17698.9,1.128,19034.83,1.9,19913.143,45.203,2.09845,19.4975,21139330.995,18989844.279,2149486.675
2020-06-30 00:00:00+00:00,0.07682,0.13,1.99354,2.5,2.0,16774.5,1.588,18060.2,1.128,19034.83,1.8,19913.143,45.203,0.41336,35.46364,21139330.995,18989844.279,2149486.675
2020-07-31 00:00:00+00:00,0.09348,0.1,2.28128,2.5,2.0,16898.8,1.277,18257.25,1.175,20511.785,1.8,21647.64,68.878,0.02141,0.56818,21209356.274,19023629.056,2185727.184
2020-08-31 00:00:00+00:00,0.09476,0.11,2.21315,2.5,2.0,17171.2,1.277,18299.62,1.175,20511.785,1.8,21647.64,68.878,0.02833,0.0,21209356.274,19023629.056,2185727.184


In [449]:
crypto_market_data = crypto_market_data.sort_values(by='day')
macro_data = macro_data.sort_values(by='day')



In [450]:
crypto_market_data.head()

Unnamed: 0_level_0,dai_market_Close,dai_market_Volume,dai_deviation,dai_abs_deviation,eth_market_Close,eth_market_Volume,btc_market_Close,btc_market_Volume,mkr_market_Close,mkr_market_Volume,...,s&p_500_market_Close_30d_ma,s&p_500_market_Volume_daily_returns,s&p_500_market_Volume_7d_ma,s&p_500_market_Volume_30d_ma,vix_market_Close_daily_returns,vix_market_Close_7d_ma,vix_market_Close_30d_ma,vix_market_Volume_daily_returns,vix_market_Volume_7d_ma,vix_market_Volume_30d_ma
day,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2019-11-22 00:00:00+00:00,0.9926,2104541,-0.0074,0.0074,150.26817,12020749863,7296.57764,34242315785,519.70923,5702366,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2019-11-25 00:00:00+00:00,1.00702,958777,0.00702,0.00702,146.47653,10962753356,7146.13379,42685231262,528.87903,6497521,...,0.0,0.08625,0.0,0.0,-0.03809,0.0,0.0,0.0,0.0,0.0
2019-11-26 00:00:00+00:00,1.0061,217385,0.0061,0.0061,148.96507,7648516297,7218.37109,21129505542,527.03693,5182243,...,0.0,0.30906,0.0,0.0,-0.0278,0.0,0.0,0.0,0.0,0.0
2019-11-27 00:00:00+00:00,0.99703,478836,-0.00297,0.00297,153.01056,8778095308,7531.66357,23991412764,549.74243,4793026,...,0.0,-0.34018,0.0,0.0,0.0182,0.0,0.0,0.0,0.0,0.0
2019-11-29 00:00:00+00:00,1.00476,1517444,0.00476,0.00476,155.30415,7503898278,7761.24365,19709695456,549.65155,4784047,...,0.0,-0.42565,0.0,0.0,0.07404,0.0,0.0,0.0,0.0,0.0


In [451]:
macro_data.head()

Unnamed: 0_level_0,effective_funds_rate,3_m_tbill_yield,sticky_cpi,forecast_fed_funds,forecast_pce,M1SL,M1V,WM2NS,M2V,real_gdp,forecast_real_gdp,GDP,vc_revenue,fed_reverse_repo,fed_repo,FDIC_Assets,FDIC_Liabilities,FDIC_Equity
day,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1
2019-11-30 00:00:00+00:00,1.55263,1.54,2.77202,2.5,2.0,4008.4,5.525,15238.725,1.437,20951.088,1.9,21902.39,42.445,6.57347,89.47516,18645728.253,16532063.893,2113664.332
2019-12-31 00:00:00+00:00,1.55095,1.52,2.75261,2.5,2.0,3977.6,5.525,15396.9,1.437,20951.088,1.9,21902.39,42.445,6.0331,57.85181,18645728.253,16532063.893,2113664.332
2020-01-31 00:00:00+00:00,1.5519,1.52,2.79048,2.5,2.0,3979.6,5.33,15409.6,1.392,20665.553,1.9,21706.513,-24.894,4.0141,58.18619,20255187.954,18137301.401,2117886.524
2020-02-29 00:00:00+00:00,1.58368,0.29,2.8001,2.5,2.0,4260.9,5.33,15360.25,1.392,20665.553,1.9,21706.513,-24.894,2.04474,48.06816,20255187.954,18137301.401,2117886.524
2020-03-31 00:00:00+00:00,0.62545,0.14,2.60655,2.5,2.0,4788.8,5.33,15947.68,1.392,20665.553,1.9,21706.513,-24.894,50.4835,67.41073,20255187.954,18137301.401,2117886.524


In [452]:
macro_and_crypto = pd.merge_asof(crypto_market_data, macro_data, on='day')
macro_and_crypto

Unnamed: 0,day,dai_market_Close,dai_market_Volume,dai_deviation,dai_abs_deviation,eth_market_Close,eth_market_Volume,btc_market_Close,btc_market_Volume,mkr_market_Close,...,M2V,real_gdp,forecast_real_gdp,GDP,vc_revenue,fed_reverse_repo,fed_repo,FDIC_Assets,FDIC_Liabilities,FDIC_Equity
0,2019-11-22 00:00:00+00:00,0.99260,2104541,-0.00740,0.00740,150.26817,12020749863,7296.57764,34242315785,519.70923,...,,,,,,,,,,
1,2019-11-25 00:00:00+00:00,1.00702,958777,0.00702,0.00702,146.47653,10962753356,7146.13379,42685231262,528.87903,...,,,,,,,,,,
2,2019-11-26 00:00:00+00:00,1.00610,217385,0.00610,0.00610,148.96507,7648516297,7218.37109,21129505542,527.03693,...,,,,,,,,,,
3,2019-11-27 00:00:00+00:00,0.99703,478836,-0.00297,0.00297,153.01056,8778095308,7531.66357,23991412764,549.74243,...,,,,,,,,,,
4,2019-11-29 00:00:00+00:00,1.00476,1517444,0.00476,0.00476,155.30415,7503898278,7761.24365,19709695456,549.65155,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1097,2024-04-05 00:00:00+00:00,1.00004,776652924,0.00004,0.00004,3318.88525,15214447092,67837.64062,33748230056,3682.49951,...,1.34800,22679.25500,1.80000,27956.99800,-10.10900,539.55665,0.00625,23668802.14500,21371587.10200,2297215.04300
1098,2024-04-08 00:00:00+00:00,0.99998,528959914,-0.00002,0.00002,3695.29272,19055143129,71631.35938,37261432669,3737.75903,...,1.34800,22679.25500,1.80000,27956.99800,-10.10900,539.55665,0.00625,23668802.14500,21371587.10200,2297215.04300
1099,2024-04-09 00:00:00+00:00,0.99990,427618489,-0.00010,0.00010,3505.16333,18279773833,69139.01562,36426900409,3394.29321,...,1.34800,22679.25500,1.80000,27956.99800,-10.10900,539.55665,0.00625,23668802.14500,21371587.10200,2297215.04300
1100,2024-04-10 00:00:00+00:00,1.00011,350570100,0.00011,0.00011,3543.73706,16872482726,70587.88281,38318601774,3338.57300,...,1.34800,22679.25500,1.80000,27956.99800,-10.10900,539.55665,0.00625,23668802.14500,21371587.10200,2297215.04300


In [453]:

columns_to_backfill = ['effective_funds_rate', '3_m_tbill_yield', 'sticky_cpi', 'forecast_fed_funds', 'forecast_pce', 'M1SL', 'M1V', 'WM2NS', 'M2V', 'real_gdp', 'forecast_real_gdp', 'GDP', 'vc_revenue', 'fed_reverse_repo', 'fed_repo', 'FDIC_Assets', 'FDIC_Liabilities', 'FDIC_Equity']
macro_and_crypto[columns_to_backfill] = macro_and_crypto[columns_to_backfill].fillna(method='bfill')


  macro_and_crypto[columns_to_backfill] = macro_and_crypto[columns_to_backfill].fillna(method='bfill')


In [454]:
macro_and_crypto

Unnamed: 0,day,dai_market_Close,dai_market_Volume,dai_deviation,dai_abs_deviation,eth_market_Close,eth_market_Volume,btc_market_Close,btc_market_Volume,mkr_market_Close,...,M2V,real_gdp,forecast_real_gdp,GDP,vc_revenue,fed_reverse_repo,fed_repo,FDIC_Assets,FDIC_Liabilities,FDIC_Equity
0,2019-11-22 00:00:00+00:00,0.99260,2104541,-0.00740,0.00740,150.26817,12020749863,7296.57764,34242315785,519.70923,...,1.43700,20951.08800,1.90000,21902.39000,42.44500,6.57347,89.47516,18645728.25300,16532063.89300,2113664.33200
1,2019-11-25 00:00:00+00:00,1.00702,958777,0.00702,0.00702,146.47653,10962753356,7146.13379,42685231262,528.87903,...,1.43700,20951.08800,1.90000,21902.39000,42.44500,6.57347,89.47516,18645728.25300,16532063.89300,2113664.33200
2,2019-11-26 00:00:00+00:00,1.00610,217385,0.00610,0.00610,148.96507,7648516297,7218.37109,21129505542,527.03693,...,1.43700,20951.08800,1.90000,21902.39000,42.44500,6.57347,89.47516,18645728.25300,16532063.89300,2113664.33200
3,2019-11-27 00:00:00+00:00,0.99703,478836,-0.00297,0.00297,153.01056,8778095308,7531.66357,23991412764,549.74243,...,1.43700,20951.08800,1.90000,21902.39000,42.44500,6.57347,89.47516,18645728.25300,16532063.89300,2113664.33200
4,2019-11-29 00:00:00+00:00,1.00476,1517444,0.00476,0.00476,155.30415,7503898278,7761.24365,19709695456,549.65155,...,1.43700,20951.08800,1.90000,21902.39000,42.44500,6.57347,89.47516,18645728.25300,16532063.89300,2113664.33200
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1097,2024-04-05 00:00:00+00:00,1.00004,776652924,0.00004,0.00004,3318.88525,15214447092,67837.64062,33748230056,3682.49951,...,1.34800,22679.25500,1.80000,27956.99800,-10.10900,539.55665,0.00625,23668802.14500,21371587.10200,2297215.04300
1098,2024-04-08 00:00:00+00:00,0.99998,528959914,-0.00002,0.00002,3695.29272,19055143129,71631.35938,37261432669,3737.75903,...,1.34800,22679.25500,1.80000,27956.99800,-10.10900,539.55665,0.00625,23668802.14500,21371587.10200,2297215.04300
1099,2024-04-09 00:00:00+00:00,0.99990,427618489,-0.00010,0.00010,3505.16333,18279773833,69139.01562,36426900409,3394.29321,...,1.34800,22679.25500,1.80000,27956.99800,-10.10900,539.55665,0.00625,23668802.14500,21371587.10200,2297215.04300
1100,2024-04-10 00:00:00+00:00,1.00011,350570100,0.00011,0.00011,3543.73706,16872482726,70587.88281,38318601774,3338.57300,...,1.34800,22679.25500,1.80000,27956.99800,-10.10900,539.55665,0.00625,23668802.14500,21371587.10200,2297215.04300


In [455]:
macro_and_crypto.shape[0]

1102

In [456]:
macro_and_crypto = localize_or_convert(macro_and_crypto, 'day')

In [457]:
total_vault_data = localize_or_convert(total_vault_data, 'day')

In [458]:
total_vault_data[['dai_maturity_outflow_surplus_buffer_1-block','dai_maturity_outflow_surplus_buffer_1-day','dai_maturity_outflow_surplus_buffer_1-month','dai_maturity_outflow_surplus_buffer_1-week','dai_maturity_outflow_surplus_buffer_3-months']]








Unnamed: 0,dai_maturity_outflow_surplus_buffer_1-block,dai_maturity_outflow_surplus_buffer_1-day,dai_maturity_outflow_surplus_buffer_1-month,dai_maturity_outflow_surplus_buffer_1-week,dai_maturity_outflow_surplus_buffer_3-months
0,0.00000,0.00000,0.00000,0.00000,0.00000
1,0.00000,0.00000,0.00000,0.00000,0.00000
2,0.00000,0.00000,0.00000,0.00000,0.00000
3,0.00000,0.00000,0.00000,0.00000,0.00000
4,0.00000,0.00000,0.00000,0.00000,0.00000
...,...,...,...,...,...
1558,0.00000,0.00000,0.00000,0.00000,0.00000
1559,0.00000,0.00000,0.00000,0.00000,0.00000
1560,0.00000,0.00000,0.00000,0.00000,0.00000
1561,0.00000,0.00000,0.00000,0.00000,0.00000


1 - PnL                                            nan
2 - Assets                                         nan
2.8 - Operating Reserves                           nan
3 - Liabilities & Equity                           nan
3.8 - Equity (Operating Reserves)                  nan

In [459]:
total_vault_data[['day','eth_a_vault_status']]

Unnamed: 0,day,eth_a_vault_status
0,2019-11-13 00:00:00+00:00,Closed
1,2019-11-15 00:00:00+00:00,Closed
2,2019-11-16 00:00:00+00:00,Closed
3,2019-11-17 00:00:00+00:00,Closed
4,2019-11-18 00:00:00+00:00,Open
...,...,...
1558,2024-03-16 00:00:00+00:00,Open
1559,2024-03-17 00:00:00+00:00,Open
1560,2024-03-18 00:00:00+00:00,Open
1561,2024-03-19 00:00:00+00:00,Open


In [460]:
dataset = total_vault_data.merge(macro_and_crypto, on=['day'],how='inner')

In [461]:
dataset

Unnamed: 0,day,eth_a_vault_cumulative_collateral,eth_a_vault_safety_price,eth_a_vault_usd_safety_value,eth_a_vault_dai_ceiling,eth_a_vault_dai_floor,eth_a_vault_safety_collateral_ratio,eth_a_vault_market_price,eth_a_vault_debt_balance,eth_a_vault_liquidation_penalty,...,M2V,real_gdp,forecast_real_gdp,GDP,vc_revenue,fed_reverse_repo,fed_repo,FDIC_Assets,FDIC_Liabilities,FDIC_Equity
0,2019-11-22 00:00:00+00:00,304390.45197,101.77017,30977868.42917,50000000.00000,20.00000,2.02427,152.65526,15303203.37321,0.00000,...,1.43700,20951.08800,1.90000,21902.39000,42.44500,6.57347,89.47516,18645728.25300,16532063.89300,2113664.33200
1,2019-11-25 00:00:00+00:00,381322.62944,98.11667,37414105.32491,50000000.00000,20.00000,1.92066,147.17500,19603299.56136,0.00000,...,1.43700,20951.08800,1.90000,21902.39000,42.44500,6.57347,89.47516,18645728.25300,16532063.89300,2113664.33200
2,2019-11-26 00:00:00+00:00,386677.72112,98.42000,38056821.31305,50000000.00000,20.00000,1.93341,147.63000,19684172.31099,0.00000,...,1.43700,20951.08800,1.90000,21902.39000,42.44500,6.57347,89.47516,18645728.25300,16532063.89300,2113664.33200
3,2019-11-27 00:00:00+00:00,389319.70935,102.50667,39907865.67277,50000000.00000,20.00000,2.02380,153.76000,19719540.20200,0.00000,...,1.43700,20951.08800,1.90000,21902.39000,42.44500,6.57347,89.47516,18645728.25300,16532063.89300,2113664.33200
4,2019-11-29 00:00:00+00:00,429288.08398,101.76333,43685786.38624,50000000.00000,20.00000,1.96933,152.64500,22183067.78010,0.00000,...,1.43700,20951.08800,1.90000,21902.39000,42.44500,6.57347,89.47516,18645728.25300,16532063.89300,2113664.33200
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1070,2024-03-14 00:00:00+00:00,444571.30869,2621.35838,1165380727.56247,428458674.37487,7500.00000,4.71096,3800.96966,247376629.26371,0.13000,...,1.34800,22679.25500,1.80000,27956.99800,-10.10900,539.55665,0.00625,23668802.14500,21371587.10200,2297215.04300
1071,2024-03-15 00:00:00+00:00,444754.39776,2530.00000,1125228626.34450,428458674.37487,7500.00000,4.54124,3668.50000,247780290.30156,0.13000,...,1.34800,22679.25500,1.80000,27956.99800,-10.10900,539.55665,0.00625,23668802.14500,21371587.10200,2297215.04300
1072,2024-03-18 00:00:00+00:00,443694.77113,2439.82534,1082537747.08583,428458674.37487,7500.00000,4.37918,3537.74675,247201188.45861,0.13000,...,1.34800,22679.25500,1.80000,27956.99800,-10.10900,539.55665,0.00625,23668802.14500,21371587.10200,2297215.04300
1073,2024-03-19 00:00:00+00:00,442822.49879,2262.58621,1001924077.87601,428458674.37487,7500.00000,4.08200,3280.75000,245449240.76560,0.13000,...,1.34800,22679.25500,1.80000,27956.99800,-10.10900,539.55665,0.00625,23668802.14500,21371587.10200,2297215.04300


In [462]:
macro_and_crypto['day']

0      2019-11-22 00:00:00+00:00
1      2019-11-25 00:00:00+00:00
2      2019-11-26 00:00:00+00:00
3      2019-11-27 00:00:00+00:00
4      2019-11-29 00:00:00+00:00
                  ...           
1097   2024-04-05 00:00:00+00:00
1098   2024-04-08 00:00:00+00:00
1099   2024-04-09 00:00:00+00:00
1100   2024-04-10 00:00:00+00:00
1101   2024-04-11 00:00:00+00:00
Name: day, Length: 1102, dtype: datetime64[ns, UTC]

In [463]:
dataset[['day','eth_a_vault_status']]

Unnamed: 0,day,eth_a_vault_status
0,2019-11-22 00:00:00+00:00,Open
1,2019-11-25 00:00:00+00:00,Open
2,2019-11-26 00:00:00+00:00,Open
3,2019-11-27 00:00:00+00:00,Open
4,2019-11-29 00:00:00+00:00,Open
...,...,...
1070,2024-03-14 00:00:00+00:00,Open
1071,2024-03-15 00:00:00+00:00,Open
1072,2024-03-18 00:00:00+00:00,Open
1073,2024-03-19 00:00:00+00:00,Open


In [464]:
dataset['real_gdp'].isna().sum()

0

In [465]:
nan_rows = dataset[dataset.isna().any(axis=1)]
print(nan_rows)


                          day  eth_a_vault_cumulative_collateral  \
0   2019-11-22 00:00:00+00:00                      304,390.45197   
1   2019-11-25 00:00:00+00:00                      381,322.62944   
2   2019-11-26 00:00:00+00:00                      386,677.72112   
3   2019-11-27 00:00:00+00:00                      389,319.70935   
4   2019-11-29 00:00:00+00:00                      429,288.08398   
..                        ...                                ...   
588 2022-03-25 00:00:00+00:00                    1,764,503.57562   
589 2022-03-28 00:00:00+00:00                    1,763,916.77464   
590 2022-03-29 00:00:00+00:00                    1,765,298.96415   
591 2022-03-30 00:00:00+00:00                    1,762,742.40476   
592 2022-03-31 00:00:00+00:00                    1,412,614.99779   

     eth_a_vault_safety_price  eth_a_vault_usd_safety_value  \
0                   101.77017              30,977,868.42917   
1                    98.11667              37,414,105.324

In [466]:
# Checking for columns with NaN values and their count of NaNs
nan_columns = dataset.isna().sum()
print(nan_columns[nan_columns > 0])


Total Revenues_pct_chg                           5
Total Revenues_3m_rolling_avg_pct_chg           47
Total Revenues_3m_volatility_pct_chg            47
Total Revenues_3m_rolling_avg_pct_chg_lag_1     66
Total Revenues_3m_volatility_pct_chg_lag_1      66
                                              ... 
profit_margin_Lag_12m                          237
ROA_Lag_12m                                    237
ROE_Lag_12m                                    237
debt_to_equity_Lag_12m                         237
debt_ratio_Lag_12m                             237
Length: 177, dtype: int64


In [467]:
dataset['vix_market_Close']

0      12.34000
1      11.87000
2      11.54000
3      11.75000
4      12.62000
         ...   
1070   14.40000
1071   14.41000
1072   14.33000
1073   13.82000
1074   13.04000
Name: vix_market_Close, Length: 1075, dtype: float64

In [468]:
# Temporarily adjust the display settings to show more rows
with pd.option_context('display.max_rows', None):  # None means show all rows
    # Checking for columns with NaN values and their count of NaNs
    nan_columns = dataset.isna().sum()
    print(nan_columns[nan_columns > 0])


Total Revenues_pct_chg                              5
Total Revenues_3m_rolling_avg_pct_chg              47
Total Revenues_3m_volatility_pct_chg               47
Total Revenues_3m_rolling_avg_pct_chg_lag_1        66
Total Revenues_3m_volatility_pct_chg_lag_1         66
Total Revenues_3m_rolling_avg_pct_chg_lag_2        88
Total Revenues_3m_volatility_pct_chg_lag_2         88
Total Revenues_3m_rolling_avg_pct_chg_lag_3       109
Total Revenues_3m_volatility_pct_chg_lag_3        109
Total Revenues_3m_rolling_avg_pct_chg_lag_4       129
Total Revenues_3m_volatility_pct_chg_lag_4        129
Total Revenues_3m_rolling_avg_pct_chg_lag_5       151
Total Revenues_3m_volatility_pct_chg_lag_5        151
Total Revenues_3m_rolling_avg_pct_chg_lag_6       173
Total Revenues_3m_volatility_pct_chg_lag_6        173
Total Revenues_3m_rolling_avg_pct_chg_lag_7       194
Total Revenues_3m_volatility_pct_chg_lag_7        194
Total Revenues_3m_rolling_avg_pct_chg_lag_8       215
Total Revenues_3m_volatility

In [469]:
dataset_no_nan = dataset.fillna(0)

In [470]:
dataset_no_nan.isna().any().sum()

0

In [471]:
print(list(dataset_no_nan.columns))

['day', 'eth_a_vault_cumulative_collateral', 'eth_a_vault_safety_price', 'eth_a_vault_usd_safety_value', 'eth_a_vault_dai_ceiling', 'eth_a_vault_dai_floor', 'eth_a_vault_safety_collateral_ratio', 'eth_a_vault_market_price', 'eth_a_vault_debt_balance', 'eth_a_vault_liquidation_penalty', 'eth_a_vault_liquidation_ratio', 'eth_a_vault_daily_revenues', 'eth_a_vault_annualized stability fee', 'eth_a_vault_annualized_revenues', 'eth_a_vault_dart', 'eth_a_vault_total_ann_revenues', 'eth_a_vault_prev_dai_ceiling', 'eth_a_vault_status', 'eth_a_vault_market_collateral_ratio', 'eth_a_vault_collateral_usd', 'eth_a_vault_hypothetical_dai_ceiling', 'eth_a_vault_market_price_7d_ma', 'eth_a_vault_collateral_usd_7d_ma', 'eth_a_vault_debt_balance_7d_ma', 'eth_a_vault_safety_collateral_ratio_7d_ma', 'eth_a_vault_market_collateral_ratio_7d_ma', 'eth_a_vault_daily_revenues_7d_ma', 'eth_a_vault_market_price_30d_ma', 'eth_a_vault_collateral_usd_30d_ma', 'eth_a_vault_debt_balance_30d_ma', 'eth_a_vault_cumulati

In [472]:
dataset_no_nan[['vix_market_Close','forecast_pce']]

Unnamed: 0,vix_market_Close,forecast_pce
0,12.34000,2.00000
1,11.87000,2.00000
2,11.54000,2.00000
3,11.75000,2.00000
4,12.62000,2.00000
...,...,...
1070,14.40000,2.00000
1071,14.41000,2.00000
1072,14.33000,2.00000
1073,13.82000,2.00000


## Running Correlations for Feature Engineering

In [473]:
numeric_dataset = dataset_no_nan.select_dtypes(include=[np.number])
correlations = numeric_dataset.corr()


In [474]:
# Assuming 'df' is your DataFrame
non_numeric_columns = dataset_no_nan.select_dtypes(exclude=[np.number])

# This will show you the columns that do not contain numeric data
print(non_numeric_columns.columns)


Index(['day', 'eth_a_vault_status'], dtype='object')


In [475]:
correlations = numeric_dataset.corr()

collateral_target_correlations = correlations['eth_a_vault_cumulative_collateral'].drop('eth_a_vault_cumulative_collateral')
collateral_sorted_correlations = collateral_target_correlations.sort_values(ascending=False)

# Display the sorted correlations
print(collateral_sorted_correlations)

eth_a_vault_cumulative_collateral_30d_ma   0.98334
eth_a_vault_cumulative_collateral_lag30    0.95279
cumulative_expenses                        0.82554
debt_ratio                                 0.75063
eth_a_vault_liquidation_ratio              0.73977
                                             ...  
vix_market_Volume                              NaN
vix_market_Volume_daily_returns                NaN
vix_market_Volume_7d_ma                        NaN
vix_market_Volume_30d_ma                       NaN
forecast_pce                                   NaN
Name: eth_a_vault_cumulative_collateral, Length: 419, dtype: float64


In [476]:
for index, value in collateral_sorted_correlations.items():
    print(f"{index:50} {value}")

eth_a_vault_cumulative_collateral_30d_ma           0.9833440157967134
eth_a_vault_cumulative_collateral_lag30            0.9527888166785834
cumulative_expenses                                0.8255383774250583
debt_ratio                                         0.7506274206800344
eth_a_vault_liquidation_ratio                      0.7397748992760704
b_s_Equity_7d_rolling_avg                          0.6859650223559908
b_s_Equity                                         0.6817006085867908
b_s_DAI_30d_rolling_avg_pct_chg                    0.6242305573457703
b_s_Equity_30d_rolling_avg_pct_chg                 0.5849728163851631
vc_revenue                                         0.5661777165180555
Total_Expenses_Lag_10m                             0.5584125805214935
btc_market_Volume_30d_ma                           0.5519526889602419
Total_Expenses_Lag_11m                             0.5504209328565566
eth_market_Volume_30d_ma                           0.5440358195667231
Total_Expenses_Lag_3

In [477]:
# Select correlations of all features with the target variable, excluding the target variable itself
etha_target_correlations = correlations['eth_a_vault_liquidation_ratio'].drop('eth_a_vault_liquidation_ratio')

# Sort the correlations to see the most positive and negative correlations at the top and bottom, respectively
etha_sorted_correlations = etha_target_correlations.sort_values(ascending=False)

for index, value in etha_sorted_correlations.items():
    print(f"{index:50} {value}")

b_s_Equity_7d_rolling_avg                          0.8999225749616835
b_s_Equity                                         0.8981168723059837
debt_ratio                                         0.7746475568463014
cumulative_expenses                                0.7535293864352546
b_s_DAI_7d_rolling_avg                             0.752093813442488
b_s_DAI                                            0.7468395870760071
eth_a_vault_cumulative_collateral                  0.7397748992760704
eth_a_vault_cumulative_collateral_30d_ma           0.6959764454618451
eth_a_vault_cumulative_collateral_lag30            0.6554975259497567
dai_abs_deviation_30d_ma                           0.6039966189335647
1.6 - Workforce Expenses                           0.6018518622945747
forecast_real_gdp                                  0.5949394801522802
dai_deviation_30d_ma                               0.5912369967405803
Total_Expenses_Lag_2m                              0.5859644430804001
Total_Expenses_Lag_1m

In [478]:
# Select correlations of all features with the target variable, excluding the target variable itself
dsr_target_correlations = correlations['dsr_rate'].drop('dsr_rate')

# Sort the correlations to see the most positive and negative correlations at the top and bottom, respectively
dsr_sorted_correlations = dsr_target_correlations.sort_values(ascending=False)

for index, value in dsr_sorted_correlations.items():
    print(f"{index:50} {value}")

dsr_interest                                       0.9588603699256447
dai_percent_in_dsr                                 0.8924820310766616
dsr_balance                                        0.8748076026498743
where_is_dai_Dai Savings                           0.8569946165657507
b_s_Real-World_Assets_7d_rolling_avg               0.8183091693644104
b_s_Real-World Assets                              0.8124818218866572
psm_lifetime_turnover                              0.7641068458489418
effective_funds_rate                               0.7096588077394401
1.1 - Lending Revenues                             0.696717144425878
cumulative_revenues                                0.6665760701825769
3_m_tbill_yield                                    0.6583879894993513
GDP                                                0.5917471250728279
eth_a_vault_annualized stability fee               0.565273791624572
dai_total_balance                                  0.5605379126342891
Total Revenues        

In [479]:
# Select correlations of all features with the target variable, excluding the target variable itself
stability_fee_target_correlations = correlations['eth_a_vault_annualized stability fee'].drop('eth_a_vault_annualized stability fee')

# Sort the correlations to see the most positive and negative correlations at the top and bottom, respectively
stability_fee_target_correlations = stability_fee_target_correlations.sort_values(ascending=False)

for index, value in stability_fee_target_correlations.items():
    print(f"{index:50} {value}")

eth_a_vault_annualized stability fee_lag30         0.7156749601828628
dsr_interest                                       0.5656038578353675
dsr_rate                                           0.565273791624572
1.1 - Lending Revenues                             0.5550957637337163
eth_a_vault_annualized stability fee_90d_ma        0.5476215893216465
Net_Income_Lag_1m                                  0.5200907894255853
Total_Revenues_Lag_1m                              0.5111305163740645
Total Revenues                                     0.510662238896403
eth_a_vault_total_ann_revenues                     0.4938933253637955
dai_percent_in_dsr                                 0.47832371522126577
1.9 - Net Income                                   0.4626985749379944
dsr_balance                                        0.4587185578668981
where_is_dai_Dai Savings                           0.4352359047507275
M2V                                                0.4325371881602999
btc_market_Close     

In [480]:
# Select correlations of all features with the target variable, excluding the target variable itself
safety_ratio_target_correlations = correlations['eth_a_vault_safety_collateral_ratio'].drop('eth_a_vault_safety_collateral_ratio')

# Sort the correlations to see the most positive and negative correlations at the top and bottom, respectively
safety_ratio_target_correlations = safety_ratio_target_correlations.sort_values(ascending=False)

for index, value in safety_ratio_target_correlations.items():
    print(f"{index:50} {value}")

eth_a_vault_market_collateral_ratio                0.996855656878879
eth_a_vault_safety_collateral_ratio_7d_ma          0.9664757562443748
eth_a_vault_market_collateral_ratio_7d_ma          0.9637440307954425
eth_a_vault_safety_collateral_ratio_30d_ma         0.8232025741815423
eth_a_vault_market_collateral_ratio_30d_ma         0.8137269353375636
psm_lifetime_turnover                              0.6504949210007218
eth_a_vault_safety_collateral_ratio_lag30          0.6281250897667459
cumulative_revenues                                0.6280972982700302
eth_a_vault_market_collateral_ratio_lag30          0.6113503147812341
3_m_tbill_yield                                    0.6096700372069486
cumulative_net_income                              0.5791173818110467
effective_funds_rate                               0.5770878611922756
GDP                                                0.5597046871096858
dai_total_balance                                  0.5429694793551214
dai_percent_out_dsr  

In [481]:
# Select correlations of all features with the target variable, excluding the target variable itself
dai_target_correlations = correlations['dai_total_balance'].drop('dai_total_balance')

# Sort the correlations to see the most positive and negative correlations at the top and bottom, respectively
dai_sorted_correlations = dai_target_correlations.sort_values(ascending=False)

for index, value in dai_sorted_correlations.items():
    print(f"{index:50} {value}")

dai_circulating                                    0.9832947497855676
dai_percent_out_dsr                                0.9796657732097308
3_m_tbill_yield                                    0.9288559605357046
effective_funds_rate                               0.9065294302806478
psm_lifetime_turnover                              0.8848128123613063
sticky_cpi                                         0.8054162304213114
GDP                                                0.7933718685984178
cumulative_revenues                                0.770341850079898
b_s_Real-World Assets                              0.7380559621284906
b_s_Real-World_Assets_7d_rolling_avg               0.7349731497366582
b_s_Others_assets_7d_rolling_avg                   0.7004108161417921
b_s_Others assets                                  0.6955499690070556
cumulative_net_income                              0.6939731861762163
real_gdp                                           0.6678077426875352
psm_lifetime_fees    

In [482]:
# Select correlations of all features with the target variable, excluding the target variable itself
sb_target_correlations = correlations['daily_surplus_buffer'].drop('daily_surplus_buffer')

# Sort the correlations to see the most positive and negative correlations at the top and bottom, respectively
sb_sorted_correlations = sb_target_correlations.sort_values(ascending=False)

for index, value in sb_sorted_correlations.items():
    print(f"{index:50} {value}")

dai_maturity_outflow_surplus_buffer_1-year         0.9958293578505897
3.7 - Equity (Surplus Buffer)                      0.9890519807802862
where_is_dai_EOA                                   0.9737801210380893
psm_lifetime_fees                                  0.9414697413547919
fed_reverse_repo                                   0.9402820722852194
eth_a_vault_dai_floor                              0.9231068171829071
dai_maturity_outflow_1-year                        0.9120407302117461
dai_maturity_outflow_dai_only_1-year               0.9091503707615295
WM2NS                                              0.8829867314509023
FDIC_Liabilities                                   0.872749005977216
cumulative_net_income                              0.8683827181787314
FDIC_Assets                                        0.8666424663105532
3.9 - Total Liabilities & Equity                   0.8612170393118931
2.9 - Total Assets                                 0.8612170393118928
3.1 - Liabilities (DA

In [483]:

# Select correlations of all features with the target variable, excluding the target variable itself
ni_target_correlations = correlations['1.9 - Net Income'].drop('1.9 - Net Income')

# Sort the correlations to see the most positive and negative correlations at the top and bottom, respectively
ni_sorted_correlations = ni_target_correlations.sort_values(ascending=False)

for index, value in ni_sorted_correlations.items():
    print(f"{index:50} {value}")

Total Revenues                                     0.9133062344059434
1.1 - Lending Revenues                             0.8251599110222695
eth_a_vault_total_ann_revenues                     0.7269838621349781
btc_market_Close_30d_ma                            0.646814048280665
mkr_market_Close_7d_ma                             0.6272555840738838
mkr_market_Close                                   0.6174461073018475
Total_Revenues_Lag_1m                              0.6164514922108612
btc_market_Close_7d_ma                             0.6138022369088303
eth_a_vault_annualized_revenues                    0.613176294836482
eth_a_vault_daily_revenues                         0.6115944259494119
mkr_market_Close_30d_ma                            0.6066899138897365
btc_market_Close                                   0.6028894954894735
eth_a_vault_daily_revenues_7d_ma                   0.6024287327474427
eth_a_vault_daily_revenues_30d_ma                  0.5909303063853806
Net_Income_Lag_1m     

In [484]:
# Select correlations of all features with the target variable, excluding the target variable itself
dai_market_Volume_target_correlations = correlations['dai_market_Volume'].drop('dai_market_Volume')

# Sort the correlations to see the most positive and negative correlations at the top and bottom, respectively
dai_market_Volume_sorted_correlations = dai_market_Volume_target_correlations.sort_values(ascending=False)

for index, value in dai_market_Volume_sorted_correlations.items():
    print(f"{index:50} {value}")

dai_market_Volume_7d_ma                            0.8205143861775882
dai_market_Volume_30d_ma                           0.7066783347315149
b_s_Crypto-Loans_7d_rolling_avg                    0.6764155164301144
b_s_Crypto-Loans                                   0.6708873115157393
eth_market_Close_7d_ma                             0.6218025947911744
eth_a_vault_market_price_7d_ma                     0.6203641310119258
eth_a_vault_market_price_30d_ma                    0.6055150472456813
eth_a_vault_market_price                           0.5978872991969473
eth_market_Close                                   0.5975522986471895
eth_a_vault_safety_price                           0.5948535990894656
eth_a_vault_collateral_usd_7d_ma                   0.5938633494399981
eth_market_Close_30d_ma                            0.5897639801615067
btc_market_Close_30d_ma                            0.5895738668775813
eth_market_Volume                                  0.5880084287135307
eth_a_vault_prev_dai

In [485]:
# Select correlations of all features with the target variable, excluding the target variable itself
dai_treasury_target_correlations = correlations['where_is_dai_Treasury'].drop('where_is_dai_Treasury')

# Sort the correlations to see the most positive and negative correlations at the top and bottom, respectively
dai_t_sorted_correlations = dai_treasury_target_correlations.sort_values(ascending=False)

for index, value in dai_t_sorted_correlations.items():
    print(f"{index:50} {value}")

fed_reverse_repo                                   0.6522088099940943
daily_surplus_buffer                               0.6173243241294564
Total_Revenues_Lag_8m                              0.6161957931990697
dai_maturity_outflow_surplus_buffer_1-year         0.5960937566259698
3.7 - Equity (Surplus Buffer)                      0.5883814118099544
eth_a_vault_dai_floor                              0.5854995904959459
b_s_Stablecoins_7d_rolling_avg                     0.5735479957026933
b_s_Stablecoins                                    0.5681864475073243
psm_balance                                        0.5593574708036745
where_is_dai_EOA                                   0.5530365975409924
2.2 - Trading Assets                               0.5462983110456322
dai_maturity_outflow_1-year                        0.5307698340387196
psm_lifetime_fees                                  0.5299929785988882
dai_maturity_outflow_dai_only_1-year               0.5288013269097265
sticky_cpi          

In [486]:
# Select correlations of all features with the target variable, excluding the target variable itself
dai_p_target_correlations = correlations['dai_market_Close'].drop('dai_market_Close')

# Sort the correlations to see the most positive and negative correlations at the top and bottom, respectively
dai_p_sorted_correlations = dai_p_target_correlations.sort_values(ascending=False)

for index, value in dai_p_sorted_correlations.items():
    print(f"{index:50} {value}")

dai_deviation                                      1.0000000000000004
dai_abs_deviation                                  0.8597739970773555
dai_deviation_7d_ma                                0.7350923921433116
dai_abs_deviation_7d_ma                            0.6639594426851969
dai_deviation_30d_ma                               0.5961467229592493
dai_abs_deviation_30d_ma                           0.5889543418431888
dai_market_Close_daily_returns                     0.4675638375688646
debt_ratio                                         0.4654548184009946
b_s_Equity                                         0.44636144117782134
b_s_Equity_7d_rolling_avg                          0.4454071129854132
b_s_DAI_7d_rolling_volatility                      0.43222944215852177
1.9 - Net Income_3m_volatility_pct_chg             0.4271861729793015
b_s_DAI_7d_rolling_avg                             0.4089722018240181
b_s_DAI                                            0.40834947487856654
eth_a_vault_liqui

In [487]:
# Select correlations of all features with the target variable, excluding the target variable itself
mkr_target_correlations = correlations['mkr_market_Close'].drop('mkr_market_Close')

# Sort the correlations to see the most positive and negative correlations at the top and bottom, respectively
mkr_target_correlations = mkr_target_correlations.sort_values(ascending=False)

for index, value in mkr_target_correlations.items():
    print(f"{index:50} {value}")

mkr_market_Close_7d_ma                             0.9869998693793518
mkr_market_Close_30d_ma                            0.9229258868966742
eth_a_vault_hypothetical_dai_ceiling               0.8637659988714612
eth_a_vault_collateral_usd                         0.858658252120344
btc_market_Close_30d_ma                            0.8568912513297117
eth_a_vault_annualized_revenues                    0.8545562136481638
eth_a_vault_usd_safety_value                       0.8531474760077917
eth_a_vault_daily_revenues                         0.850308910667979
eth_a_vault_collateral_usd_7d_ma                   0.8501129391500546
btc_market_Close_7d_ma                             0.839365007857942
btc_market_Close                                   0.8317313215728468
eth_a_vault_daily_revenues_7d_ma                   0.8305206072131643
eth_a_vault_collateral_usd_30d_ma                  0.8221686631787506
eth_a_vault_daily_revenues_30d_ma                  0.8107212159681845
eth_market_Close       

Next regression analysis (single, multiple) for target variable and strong correlating features

In your model, which aims to simulate collateral balance for a vault and use Reinforcement Learning (RL) for Maker Protocol financial management with the Dai Savings Rate (DSR) and Stability Fee as part of the action space, the variables can be categorized as follows:

Independent Variables
These are the input features that your model will use to make predictions or decisions. Based on the extensive list of features you've provided and the objectives of your model, independent variables may include:

Market data (e.g., eth_market_Close, btc_market_Close, dai_market_Close)
Operational metrics from the vault (e.g., eth_a_vault_market_price, eth_a_vault_collateral_usd)
Financial metrics (e.g., cumulative_revenues, cumulative_expenses, profit_margin)
DAI metrics (e.g., dai_circulating, dai_percent_in_dsr, dsr_balance)
Macro-economic indicators (e.g., effective_funds_rate, 3_m_tbill_yield, sticky_cpi)
These variables are considered independent because they are assumed not to be influenced by the model's outputs but rather to affect them.

Dependent Variables
These are the outcomes or targets your model aims to predict or optimize. In the context of your model's objectives, the dependent variables could be:

For the simulation segment:

eth_a_vault_cumulative_collateral: This represents the total collateral balance in an ETH vault, which you aim to simulate as a response to changes in policy variables.
For the RL segment:

dsr_rate: The Dai Savings Rate, which is a policy tool MakerDAO uses to influence various aspects of the ecosystem.
stability_fee: The fee applied to vaults, which acts as another policy lever to maintain the peg of DAI to its underlying asset and control supply dynamics.
The choice of dependent and independent variables will depend on the specific objectives of each segment of your model (simulation and RL). The model aims to learn how changes in independent variables (market conditions, operational metrics, financial metrics, macroeconomic indicators, etc.) influence the dependent variables (collateral balance for the simulation part and the optimal DSR and Stability Fee rates for the RL part). This setup allows the model to simulate the impact of policy decisions on the vault's collateral balance and to optimize these decisions to achieve desired outcomes in the MakerDAO ecosystem.

When the Debt Ceiling parameter for a vault type is set lower than the current amount of DAI outstanding (i.e., the total amount of DAI minted from that vault type), it does not directly affect the existing DAI or vaults. Here's what happens:

No Immediate Effect on Existing DAI: Lowering the Debt Ceiling below the current amount of DAI outstanding does not trigger automatic liquidations or directly impact the existing DAI in circulation. The existing DAI remains valid, and holders of DAI are not directly affected by this change in the Debt Ceiling.

Prevention of New DAI Minting: The primary consequence of lowering the Debt Ceiling below the current DAI outstanding is the prevention of new DAI being minted from that specific vault type. This means that users cannot increase their debt (i.e., mint more DAI) from vaults of that type until the total debt is reduced below the new Debt Ceiling level or until the Debt Ceiling is raised again.

Encourages DAI Repayment: By setting a lower Debt Ceiling, the Maker Governance effectively limits the potential for further exposure to the collateral type associated with that vault. This may encourage users to repay some of their DAI debt to free up space under the Debt Ceiling, allowing for new minting activities, or to shift their operations to other vault types with available capacity.

Governance and Risk Management Tool: Lowering the Debt Ceiling can be a proactive measure by Maker Governance to manage risk, especially if there are concerns about overexposure to a particular asset type or if there's a desire to mitigate potential impacts of the 'OSM Timing Attack.' It's a governance tool used to control the risk profile of the MakerDAO system and ensure the stability and security of the DAI peg to the USD.

In summary, setting a Debt Ceiling lower than the current amount of DAI minted does not affect the existing DAI but prevents further minting of DAI from the affected vault type. This measure serves as a risk management and governance tool, allowing Maker Governance to control exposure to specific assets and maintain the system's stability.

# Forecasting/Simulating Balances 

Now, we need to forecast the balances.  Having accurate forecasting on backtesting will infer that it will accurately simulate vault balance changes as result of key parameter changes, namely Liquidation Ratio and Dai Ceiling.

Correlation analysis with collateral balance shows that parameters with most correlation are Liquidation Ratio, Dai Ceiling
Therefore, we will see which data science methods will work best for the forecasting portion

## Linear Regression Methods

## Non-Linear Regression Methods 

## ARIMA

## LSTM