In [2]:
import altair as alt
alt.data_transformers.disable_max_rows()
import pandas as pd
import numpy as np
import statsmodels.api as sm
from datetime import datetime

In [3]:
startDate = pd.to_datetime('2021-08-19')
#startDate = pd.to_datetime('2022-09-12')
endDate = pd.to_datetime('2023-07-31')

In [4]:
liability_matrix = pd.read_csv('../data/balance_sheets/daily_liability_matrix.csv')
liability_matrix = liability_matrix.rename(columns={'date': 'timestamp'})
liability_matrix['timestamp'] = pd.to_datetime(liability_matrix['timestamp'])
liability_matrix.drop(['account', 'effectiveUSD'], axis=1, inplace=True)
liability_matrix = liability_matrix[liability_matrix['timestamp'].between(startDate, endDate)]

In [5]:
selfBorrow = liability_matrix[liability_matrix['symbol_bor'] == liability_matrix['symbol_col']]
selfBorrow = selfBorrow.groupby('timestamp').sum().drop(['symbol_col', 'symbol_bor'], axis=1).reset_index()
selfBorrow = selfBorrow.rename(columns={'matchBorrowUSD': 'selfBorrowUSD', 'matchCollateralUSD': 'selfCollateralUSD'})
selfBorrow

Unnamed: 0,timestamp,selfBorrowUSD,selfCollateralUSD
0,2021-08-19,5.416844e+09,7.598608e+09
1,2021-08-20,5.411134e+09,7.590484e+09
2,2021-08-21,5.430208e+09,7.604733e+09
3,2021-08-22,5.400847e+09,7.575981e+09
4,2021-08-23,5.333646e+09,7.491634e+09
...,...,...,...
707,2023-07-27,1.336131e+08,2.551809e+08
708,2023-07-28,1.352411e+08,2.577466e+08
709,2023-07-29,1.354130e+08,2.575257e+08
710,2023-07-30,1.344180e+08,2.554818e+08


In [6]:
totalBorrow = liability_matrix.groupby('timestamp').sum().drop(['symbol_bor', 'symbol_col'], axis=1).reset_index()
totalBorrow = totalBorrow.rename(columns={'matchBorrowUSD': 'totalBorrowUSD', 'matchCollateralUSD': 'totalCollateralUSD'})
totalBorrow

Unnamed: 0,timestamp,totalBorrowUSD,totalCollateralUSD
0,2021-08-19,7.914928e+09,1.444673e+10
1,2021-08-20,8.025064e+09,1.487075e+10
2,2021-08-21,8.088935e+09,1.506277e+10
3,2021-08-22,8.091316e+09,1.503731e+10
4,2021-08-23,8.139425e+09,1.519342e+10
...,...,...,...
707,2023-07-27,5.696505e+08,1.259088e+09
708,2023-07-28,5.718996e+08,1.268614e+09
709,2023-07-29,5.725436e+08,1.268509e+09
710,2023-07-30,5.577766e+08,1.237535e+09


In [7]:
matrix = selfBorrow.merge(totalBorrow, on='timestamp')
matrix['selfBorrowShare'] = matrix['selfBorrowUSD'] / matrix['totalBorrowUSD']
matrix['selfCollateralShare'] = matrix['selfCollateralUSD'] / matrix['totalCollateralUSD']
matrix.drop(['selfBorrowUSD', 'totalBorrowUSD', 'totalCollateralUSD', 'selfCollateralUSD'], axis=1, inplace=True)
matrix

Unnamed: 0,timestamp,selfBorrowShare,selfCollateralShare
0,2021-08-19,0.684383,0.525974
1,2021-08-20,0.674279,0.510431
2,2021-08-21,0.671313,0.504869
3,2021-08-22,0.667487,0.503812
4,2021-08-23,0.655285,0.493084
...,...,...,...
707,2023-07-27,0.234553,0.202671
708,2023-07-28,0.236477,0.203172
709,2023-07-29,0.236511,0.203014
710,2023-07-30,0.240989,0.206444


In [8]:
alt.Chart(matrix).mark_line().encode(
    x='timestamp:T',
    y='selfBorrowShare:Q'
).interactive(bind_y = False)

In [9]:
price = pd.read_csv('../data/COMP_USD.csv', sep=';')
price = price[['timestamp', 'close']]
price['timestamp'] = price['timestamp'].apply(lambda date: datetime.strptime(date, '%Y-%m-%dT%H:%M:%S.%fZ'))
price['timestamp'] = price['timestamp'].dt.strftime('%Y-%m-%d')
price['timestamp'] = pd.to_datetime(price['timestamp'])
price = price.rename(columns={'close': 'priceUSD'})
price = price[price['timestamp'].between(startDate, endDate)]
price

Unnamed: 0,timestamp,priceUSD
192,2023-07-31,66.138300
193,2023-07-30,71.036632
194,2023-07-29,72.280781
195,2023-07-28,72.094845
196,2023-07-27,70.095282
...,...,...
899,2021-08-23,465.558598
900,2021-08-22,460.365323
901,2021-08-21,462.325055
902,2021-08-20,480.044869


In [10]:
matrix = matrix.merge(price, on='timestamp')
matrix

Unnamed: 0,timestamp,selfBorrowShare,selfCollateralShare,priceUSD
0,2021-08-19,0.684383,0.525974,457.666976
1,2021-08-20,0.674279,0.510431,480.044869
2,2021-08-21,0.671313,0.504869,462.325055
3,2021-08-22,0.667487,0.503812,460.365323
4,2021-08-23,0.655285,0.493084,465.558598
...,...,...,...,...
707,2023-07-27,0.234553,0.202671,70.095282
708,2023-07-28,0.236477,0.203172,72.094845
709,2023-07-29,0.236511,0.203014,72.280781
710,2023-07-30,0.240989,0.206444,71.036632


In [11]:
alt.Chart(matrix).mark_line().encode(
    x='timestamp:T',
    y='priceUSD:Q'
).interactive(bind_y = False)

In [12]:
import json

json_file = '../data/daily_TVL.json'

with open(json_file, 'r') as f:
    data = json.load(f)

tvl = []
for entry in data['tvl']:
    tvl.append({'TVL': entry['totalLiquidityUSD']})

tvl = pd.DataFrame(tvl)
end_date = pd.to_datetime('2024-02-28')
start_date = end_date - pd.Timedelta(days=len(tvl) - 1)
tvl['timestamp'] = pd.date_range(start=start_date, end=end_date, freq='D')

tvl = tvl[tvl['timestamp'].between(startDate, endDate)].reset_index()

tvl

Unnamed: 0,index,TVL,timestamp
0,1046,1.037066e+10,2021-08-19
1,1047,1.024546e+10,2021-08-20
2,1048,1.025178e+10,2021-08-21
3,1049,1.029383e+10,2021-08-22
4,1050,1.014308e+10,2021-08-23
...,...,...,...
707,1753,2.266831e+09,2023-07-27
708,1754,2.263040e+09,2023-07-28
709,1755,2.245773e+09,2023-07-29
710,1756,2.229968e+09,2023-07-30


In [13]:
matrix = matrix.merge(tvl, on='timestamp')
matrix

Unnamed: 0,timestamp,selfBorrowShare,selfCollateralShare,priceUSD,index,TVL
0,2021-08-19,0.684383,0.525974,457.666976,1046,1.037066e+10
1,2021-08-20,0.674279,0.510431,480.044869,1047,1.024546e+10
2,2021-08-21,0.671313,0.504869,462.325055,1048,1.025178e+10
3,2021-08-22,0.667487,0.503812,460.365323,1049,1.029383e+10
4,2021-08-23,0.655285,0.493084,465.558598,1050,1.014308e+10
...,...,...,...,...,...,...
707,2023-07-27,0.234553,0.202671,70.095282,1753,2.266831e+09
708,2023-07-28,0.236477,0.203172,72.094845,1754,2.263040e+09
709,2023-07-29,0.236511,0.203014,72.280781,1755,2.245773e+09
710,2023-07-30,0.240989,0.206444,71.036632,1756,2.229968e+09


In [14]:
alt.Chart(matrix).mark_line().encode(
    x='timestamp:T',
    y='TVL:Q'
).interactive(bind_y = False)

## Token distribution

In [15]:
import requests
import csv

# Etherscan API endpoint
url = "https://api.etherscan.io/api"

# Parameters
params = {
    "address": "0x1B0e765F6224C21223AeA2af16c1C46E38885a40",  # COMP token contract address
    "apikey": "TS725IGM1MHRMKQQE2BAJ71K7B5J1DDDF3",
    "module": "account", 
    "action": "tokentx",
}

# Make the API request
response = requests.get(url, params=params)

# Check if the request was successful
if response.status_code == 200:
    # Parse the response JSON
    data = response.json()
    
    # Check if the response contains data
    if data["status"] == "1":
        # Extract token transfers
        transfers = data["result"]
        print(len(transfers))
        
        # Define CSV file path
        csv_file_path = "../data/comp_transfers.csv"
        
        # Write transfers to CSV file
        with open(csv_file_path, "w", newline="") as csvfile:
            fieldnames = [
                    "timeStamp",
                    "from",
                    "contractAddress",
                    "value",
                    "to",
                    "tokenSymbol",
                    "tokenDecimal",
                    ]
            writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
            
            writer.writeheader()
            
            for transfer in transfers:
                writer.writerow({
                    "timeStamp": transfer["timeStamp"],
                    "from": transfer["from"],
                    "contractAddress": transfer["contractAddress"],
                    "value": transfer["value"],
                    "to": transfer["to"],
                    "tokenSymbol": transfer["tokenSymbol"],
                    "tokenDecimal": transfer["tokenDecimal"],
                })

4289


In [16]:
transfersCOMP = pd.read_csv('../data/comp_transfers.csv')
transfersCOMP['timestamp'] = pd.to_datetime(transfersCOMP['timeStamp'], unit='s').apply(lambda x: x.strftime('%Y-%m-%d'))
transfersCOMP['timestamp'] = pd.to_datetime(transfersCOMP['timestamp'])

transfersCOMP

Unnamed: 0,timeStamp,from,contractAddress,value,to,tokenSymbol,tokenDecimal,timestamp
0,1662795640,0x3d9819210a31b4961b30ef54be2aed79b9c9cd3b,0xc00e94cb662c3520282e6f5717214004a7f26888,25000000000000000000000,0x1b0e765f6224c21223aea2af16c1c46e38885a40,COMP,18,2022-09-10
1,1662953866,0x1b0e765f6224c21223aea2af16c1c46e38885a40,0xc00e94cb662c3520282e6f5717214004a7f26888,37872670000000000000,0x5dc50055bc813c9910e406b786e45a5ae4f8ad5b,COMP,18,2022-09-12
2,1663012699,0x1b0e765f6224c21223aea2af16c1c46e38885a40,0xc00e94cb662c3520282e6f5717214004a7f26888,9409086000000000000,0x5dc50055bc813c9910e406b786e45a5ae4f8ad5b,COMP,18,2022-09-12
3,1663047368,0x1b0e765f6224c21223aea2af16c1c46e38885a40,0xc00e94cb662c3520282e6f5717214004a7f26888,10923779000000000000,0x227e93bc160fc34f1f31500dbf83619c6147aedb,COMP,18,2022-09-13
4,1663112265,0x1b0e765f6224c21223aea2af16c1c46e38885a40,0xc00e94cb662c3520282e6f5717214004a7f26888,7480537000000000000,0x227e93bc160fc34f1f31500dbf83619c6147aedb,COMP,18,2022-09-13
...,...,...,...,...,...,...,...,...
4284,1712203967,0x1b0e765f6224c21223aea2af16c1c46e38885a40,0xc00e94cb662c3520282e6f5717214004a7f26888,6841170000000000000,0x75097442e2a86cd0b696fcb6c9ffaf849a05c8df,COMP,18,2024-04-04
4285,1712213363,0x1b0e765f6224c21223aea2af16c1c46e38885a40,0xc00e94cb662c3520282e6f5717214004a7f26888,1819902000000000000,0xa6309fe7647a82072b6707ac0f8fe78891ea6fd4,COMP,18,2024-04-04
4286,1712217815,0x1b0e765f6224c21223aea2af16c1c46e38885a40,0xc00e94cb662c3520282e6f5717214004a7f26888,18238965000000000000,0x0f1dfef1a40557d279d0de6e49ab306891a638b8,COMP,18,2024-04-04
4287,1712226323,0x1b0e765f6224c21223aea2af16c1c46e38885a40,0xc00e94cb662c3520282e6f5717214004a7f26888,43798690000000000000,0x4c9f7207be28278b9dca129f2e211acfff48fb01,COMP,18,2024-04-04


In [17]:
distribution = transfersCOMP[transfersCOMP['from'] == '0x1b0e765f6224c21223aea2af16c1c46e38885a40']
distribution['value'] = distribution['value'].astype(float) / (10**18)
distribution['timestamp'] = pd.to_datetime(distribution['timeStamp'], unit='s').apply(lambda x: x.strftime('%Y-%m-%d'))
distribution['timestamp'] = pd.to_datetime(distribution['timestamp'])
distribution = distribution[distribution['timestamp'].between(startDate, endDate)]
distribution = distribution[['value', 'timestamp']].groupby('timestamp').sum().reset_index()

df = pd.DataFrame({'timestamp': pd.date_range(start=startDate, end=endDate, freq='D')})
distribution = distribution.merge(df, on='timestamp', how='right')
distribution = distribution.fillna(0)
distribution = distribution.rename(columns={'value': 'distributionCOMP'})
distribution

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  distribution['value'] = distribution['value'].astype(float) / (10**18)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  distribution['timestamp'] = pd.to_datetime(distribution['timeStamp'], unit='s').apply(lambda x: x.strftime('%Y-%m-%d'))
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  distribution['

Unnamed: 0,timestamp,distributionCOMP
0,2021-08-19,0.000000
1,2021-08-20,0.000000
2,2021-08-21,0.000000
3,2021-08-22,0.000000
4,2021-08-23,0.000000
...,...,...
707,2023-07-27,3101.800857
708,2023-07-28,19.917410
709,2023-07-29,1167.361256
710,2023-07-30,40.318422


In [18]:
alt.Chart(distribution).mark_bar().encode(
    x='timestamp:T',
    y='distributionCOMP:Q'
).interactive(bind_y = False)

In [19]:
matrix = matrix.merge(distribution, on='timestamp', how='left')
matrix

Unnamed: 0,timestamp,selfBorrowShare,selfCollateralShare,priceUSD,index,TVL,distributionCOMP
0,2021-08-19,0.684383,0.525974,457.666976,1046,1.037066e+10,0.000000
1,2021-08-20,0.674279,0.510431,480.044869,1047,1.024546e+10,0.000000
2,2021-08-21,0.671313,0.504869,462.325055,1048,1.025178e+10,0.000000
3,2021-08-22,0.667487,0.503812,460.365323,1049,1.029383e+10,0.000000
4,2021-08-23,0.655285,0.493084,465.558598,1050,1.014308e+10,0.000000
...,...,...,...,...,...,...,...
707,2023-07-27,0.234553,0.202671,70.095282,1753,2.266831e+09,3101.800857
708,2023-07-28,0.236477,0.203172,72.094845,1754,2.263040e+09,19.917410
709,2023-07-29,0.236511,0.203014,72.280781,1755,2.245773e+09,1167.361256
710,2023-07-30,0.240989,0.206444,71.036632,1756,2.229968e+09,40.318422


In [20]:
from sklearn.preprocessing import StandardScaler

Y  = matrix['selfBorrowShare']
X1 = matrix['priceUSD']
X2 = matrix['TVL']
X3 = matrix['distributionCOMP']

scaler = StandardScaler()
X2= scaler.fit_transform(X2.to_frame())
X3= scaler.fit_transform(X3.to_frame())

# Single regressions

In [21]:
single_model = sm.OLS(Y, sm.add_constant(X1)).fit()
print(single_model.summary())

                            OLS Regression Results                            
Dep. Variable:        selfBorrowShare   R-squared:                       0.566
Model:                            OLS   Adj. R-squared:                  0.565
Method:                 Least Squares   F-statistic:                     925.0
Date:                Thu, 04 Apr 2024   Prob (F-statistic):          9.90e-131
Time:                        14:46:42   Log-Likelihood:                 622.38
No. Observations:                 712   AIC:                            -1241.
Df Residuals:                     710   BIC:                            -1232.
Df Model:                           1                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
const          0.2484      0.005     45.584      0.0

In [22]:
single_model = sm.OLS(Y, sm.add_constant(X2)).fit()
print(single_model.summary())

                            OLS Regression Results                            
Dep. Variable:        selfBorrowShare   R-squared:                       0.558
Model:                            OLS   Adj. R-squared:                  0.557
Method:                 Least Squares   F-statistic:                     895.6
Date:                Thu, 04 Apr 2024   Prob (F-statistic):          6.22e-128
Time:                        14:46:42   Log-Likelihood:                 615.92
No. Observations:                 712   AIC:                            -1228.
Df Residuals:                     710   BIC:                            -1219.
Df Model:                           1                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
const          0.3675      0.004     96.133      0.0

In [23]:
single_model = sm.OLS(Y, sm.add_constant(X3)).fit()
print(single_model.summary())

                            OLS Regression Results                            
Dep. Variable:        selfBorrowShare   R-squared:                       0.022
Model:                            OLS   Adj. R-squared:                  0.021
Method:                 Least Squares   F-statistic:                     15.89
Date:                Thu, 04 Apr 2024   Prob (F-statistic):           7.40e-05
Time:                        14:46:43   Log-Likelihood:                 333.30
No. Observations:                 712   AIC:                            -662.6
Df Residuals:                     710   BIC:                            -653.5
Df Model:                           1                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
const          0.3675      0.006     64.637      0.0

# Multi regressions

In [24]:
multi_model = sm.OLS(Y, sm.add_constant(np.column_stack((X1, X2, X3)))).fit()
print(multi_model.summary())

                            OLS Regression Results                            
Dep. Variable:        selfBorrowShare   R-squared:                       0.586
Model:                            OLS   Adj. R-squared:                  0.584
Method:                 Least Squares   F-statistic:                     334.0
Date:                Thu, 04 Apr 2024   Prob (F-statistic):          4.39e-135
Time:                        14:46:43   Log-Likelihood:                 639.35
No. Observations:                 712   AIC:                            -1271.
Df Residuals:                     708   BIC:                            -1252.
Df Model:                           3                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
const          0.2990      0.011     27.887      0.0