# **Coins market features and users behavior on Compound**
-  The main purpose of this notebook is to present a data exploratory visualization and see if there are some explicit correlation between market price of crypto assets & user behavior (with utilization_ratio as proxy)
-	Use Granger causality to test the lead/lag autocorrelation between two variables. Don't forget to check the model's assumption (e.g., stationary).
    - https://www.youtube.com/watch?v=XqsSB_vpHLs (Granger Causality)
    - https://www.youtube.com/watch?v=5jOW6baXYI4 (5 ways to fail with time series)
-	Check if the utilization ratio is correlated with the liquidity mining activities (use cETH to borrow ETH). Maybe we should find a way to remove self-borrowing from calculating the utilization ratio. (cf. daily liability matrix)


### Packages init

In [1]:
import altair as alt
import pandas as pd
import numpy as np

# Load data

market_price = pd.read_csv('../data/balance_sheets/daily_markets.csv') # Load market price data
full_market_price = market_price
market_price = market_price[market_price['symbol'].isin(['cWBTC', 'cETH', 'cDAI', 'cUSDC'])] # Filter out non-ETH/WBTC markets
print(market_price.columns) # Print columns
#alt.data_transformers.enable('csv') # Enable csv data transformer
alt.data_transformers.disable_max_rows()


Index(['borrowRate', 'cash', 'collateralFactor', 'exchangeRate',
       'interestRateModelAddress', 'name', 'reserves', 'supplyRate', 'symbol',
       'cTokenAddress', 'totalBorrows', 'totalSupply', 'underlyingAddress',
       'underlyingName', 'underlyingPrice', 'underlyingSymbol',
       'accrualBlockNumber', 'blockTimestamp', 'borrowIndex', 'reserveFactor',
       'underlyingPriceUSD', 'underlyingDecimals', 'date', 'newPrice'],
      dtype='object')


DataTransformerRegistry.enable('default')

### Plotting Data

### Price

In [2]:
# Plot market prices double y-axis
prices = alt.Chart(market_price).mark_line().encode(
    x='date:T',
    y='underlyingPriceUSD:Q',
    color='symbol:N',
)

prices

## Interest Rate

See https://altair-viz.github.io/gallery/select_detail.html to enhance rendering of plot

In [3]:
# Plot market borrow rates
interest_rate = alt.Chart(market_price).mark_line().encode(
    x='date:T',
    y='borrowRate:Q',
    color='symbol:N',
)


## Utilization Ratio

In [None]:
market_price['utilization_ratio'] = market_price['totalBorrows'] / ( market_price['totalBorrows'] + market_price['cash'] - market_price['reserves'] )
print(market_price.columns)

u_ratio = alt.Chart(market_price).mark_line().encode(
    x='date:T',
    y='utilization_ratio:Q',
    color='symbol:N',
)

u_ratio | interest_rate


In [None]:
u_ratio_borrow_rate = alt.Chart(market_price).mark_circle().encode(
    x='utilization_ratio:Q',
    y='borrowRate:Q',
    color='symbol:N',
)

#u_ratio_borrow_rate.save('../charts/u_ratio_borrow_rate.png')
u_ratio_borrow_rate

In [None]:
# chart giving market price and utilization ratio for each coin

u_ratio_price = alt.Chart(market_price).mark_circle().encode(
    x='utilization_ratio:Q',
    y='underlyingPriceUSD:Q',
    color='symbol:N',
)

## Log Return

In [None]:
import datetime
# Log return of crypto market prices grouped by underlyingSymbol

market_price['yield'] = market_price.groupby('underlyingSymbol')['underlyingPriceUSD'].pct_change()


# plot yield

yield_chart = alt.Chart(market_price).mark_line().encode(
    x='date:T',
    y='yield:Q',
    color='symbol:N',
)

print(market_price.columns)
# Display
 
prices | yield_chart | u_ratio



In [21]:
#overlap WBTC yield and WBTC utilization ratio

wbtc = market_price[market_price['underlyingSymbol'] == 'WBTC']
eth = market_price[market_price['underlyingSymbol'] == 'ETH']
dai = market_price[market_price['underlyingSymbol'] == 'DAI']
usdc = market_price[market_price['underlyingSymbol'] == 'USDC']

def chart_feature(df, feature,my_color):
    chart = alt.Chart(df).mark_line(color=my_color).encode(
        x='date:T',
        y=feature + ':Q',)
    
    return chart
    
wbtc_utilization_ratio = chart_feature(wbtc, 'utilization_ratio', 'green').properties(title='BTC Utilization Ratio')
wbtc_price = chart_feature(wbtc, 'underlyingPriceUSD', 'blue').properties(title='BTC Market Price')

eth_utilization_ratio = chart_feature(eth, 'utilization_ratio', 'green').properties(title='ETH Utilization Ratio')
eth_price = chart_feature(eth, 'underlyingPriceUSD', 'blue').properties(title='ETH Market Price')

dai_utilization_ratio = chart_feature(dai, 'utilization_ratio', 'green').properties(title='DAI Utilization Ratio')
dai_price = chart_feature(dai, 'underlyingPriceUSD', 'blue').properties(title='DAI Market Price')

usdc_utilization_ratio = chart_feature(usdc, 'utilization_ratio', 'green').properties(title='USDC Utilization Ratio')
usdc_price = chart_feature(usdc, 'underlyingPriceUSD', 'blue').properties(title='USDC Market Price')

chart_volatile = ((wbtc_price | wbtc_utilization_ratio )& (eth_price | eth_utilization_ratio)) 
chart_stable = ((dai_price | dai_utilization_ratio) & (usdc_price | usdc_utilization_ratio))
chart_volatile.save('../charts/vc_price_utilization_ratio.png', ppi=300)
chart_stable.save('../charts/sc_price_utilization_ratio.png', ppi=300)

## Max Yield

In [None]:
alt.Chart(wbtc).mark_rect().encode(
    alt.X("date(date):O").axis(labelAngle=0, format="%e").title("Day"),
    alt.Y("month(date):O").title("Month"),
    alt.Color("max(yield):Q").title("Yield"),
)

## Max Utilization Ratio

In [None]:
alt.Chart(wbtc).mark_rect().encode(
    alt.X("date(date):O").axis(labelAngle=0, format="%e").title("Day"),
    alt.Y("month(date):O").title("Month"),
    alt.Color("max(utilization_ratio):Q").title("Utilization Ratio"),
)

## Max Price

In [None]:
alt.Chart(wbtc).mark_rect().encode(
    alt.X("date(date):O").axis(labelAngle=0, format="%e").title("Day"),
    alt.Y("month(date):O").title("Month"),
    alt.Color("max(underlyingPriceUSD):Q").title("Price"),
)

## **Investigating on potential correlation between coins market price and utilization_ratio**
- Time series analysis
    - Granger Causality
    - Time Lagged Cross Correlation
    - Dynamic Time Warping
    - Instantaneous phase synchrony
    - Fourier Transform
    - Power Spectrum Distribution
- Focus on Granger Causality
    - Computing the GC = log(V(e1)/V(e2)) on a **time-window** *(parameter s)* and the **model order** *(parameter k)* of the Autoregressive Model
        - Short **time-window** => worse model estimation but better temporal resolution
        - Long **time-window** => better model estimation but worse temporal resolution
        - Lower **order** => better model estimation (faster compuationt time) but less sensitive to history
        - Higher **order** => worse model estimation (longer compuationt time) but more sensitive to history
    - The MVGC multivariate Granger causality toolbox: a new approach to Granger-causal inference
        -The standard method of computing G-causality involves estimation of parameters for both a full and a nested (reduced) VAR model. The MVGC approach, by contrast, avoids explicit estimation of the reduced model, thus eliminating a source of estimation error and improving statistical power, and in addition facilitates fast and accurate estimation of the computationally awkward case of conditional G-causality in the frequency domain.
    - Trying to make GC test on differents time periods (2019-2020 / 2020-2021 / 2021-2022 / 2022-2023)

 - Let's check granger causality between coin market price (wtbc or eth) and utilization ratio
 - https://youtu.be/4TkNZviNJC0 (Granger causality tests in python)

## Example of stationnary time serie, adfuller test and granger causality test

In [None]:
from statsmodels.tsa.stattools import grangercausalitytests, adfuller

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

In [None]:
#build the time series, just a simple AR(1)
t1 = [0.1*np.random.normal()]
for _ in range(100):
    t1.append(0.5*t1[-1] + 0.1*np.random.normal())

In [None]:
#build the time series that is granger caused by t1
t2 = [item + 0.1*np.random.normal() for item in t1]
# adding a little bit of noise to t1 to make it more realistic

In [None]:
#adjust t1 and t2 creating a 3 day lag in t1
t1 = t1[3:]
t2 = t2[:-3]

In [None]:
# testing adf for t1
adf1 = adfuller(t1)
adf1

In [None]:
# testing adf for t2
adf2 = adfuller(t2)
adf2

In [None]:
#plotting t1 and t2

plt.figure(figsize=(10,4))
plt.plot(t1, color='b')
plt.plot(t2, color='r')

plt.legend(['t1', 't2'], fontsize=16)

In [None]:

ts_df = pd.DataFrame(columns=['t2', 't1'], data=zip(t2,t1))
ts_df.head()


In [None]:
#testing if t2 granger causes t1 up to 5 lags

gc_results = grangercausalitytests(ts_df, 5)

- Clearly we see that reaching a GC 3-lag test leads us with p-value = 0.0 implying that 3 is the good order to explain t1 from t2

### Testing GC between market_price of wtbc and ETH and their respective utilization_ratio **without data cleaning** in a first time

In [None]:

# granger causality test for wbtc underlyingPriceUSD on utilization_ratio

wbtc_ts_df = pd.DataFrame(columns=['utilization_ratio','underlyingPriceUSD'], data=zip(wbtc['utilization_ratio'],wbtc['underlyingPriceUSD']))
wbtc_ts_df.head()

In [None]:
# adfuller test for wbtc underlyingPriceUSD
wbtc_adf1 = adfuller(wbtc_ts_df['underlyingPriceUSD'])
wbtc_adf1

In [None]:
#testing if underlyingPriceUSD granger causes utilization_ratio up to 5 lags
gc_results = grangercausalitytests(wbtc_ts_df, 5, addconst=True)

- Really bad result : the best p-value is 0.69 for a 1-lag assumption and increase to 1 with the order test.
- Let's do the same with eth price and its utilization ratio

In [None]:
# filtering for eth only
eth = market_price[market_price['underlyingSymbol'] == 'ETH']
eth.head()

In [None]:
# granger causality test for eth underlyingPriceUSD on utilization_ratio
eth_ts_df = pd.DataFrame(columns=['utilization_ratio','underlyingPriceUSD'], data=zip(eth['utilization_ratio'],eth['underlyingPriceUSD']))
eth_ts_df.columns

In [None]:
# adfuller test for eth underlyingPriceUSD
eth_adf1 = adfuller(eth_ts_df['underlyingPriceUSD'])
eth_adf1

In [None]:
#testing if underlyingPriceUSD granger causes utilization_ratio up to 5 lags
gc_results = grangercausalitytests(eth_ts_df, 5)

- better results than WBTC GC test but still high for the p-value = 0.15 for 2 lag
### Let's try again  **with data cleaning** this time
- https://youtu.be/7_Js8h709Dw (Cleaning data for time-series)

In [None]:
# select row 54 of eth_ts_df
eth_ts_df.loc[54]

In [None]:
# normalize with pandas the data for eth and wbtc

eth_ts_df = (eth_ts_df - eth_ts_df.mean()) / eth_ts_df.std()
wbtc_ts_df = (wbtc_ts_df - wbtc_ts_df.mean()) / wbtc_ts_df.std()


# removing trend from the data (first property of a stationary time series)

eth_ts_df = eth_ts_df.diff().dropna()
wbtc_ts_df = wbtc_ts_df.diff().dropna()

# normalizing by a periodic, lets say 1 year, volatility (second property of a stationary time series)
"""
for i in range(0, len(eth_ts_df),365):
    eth_ts_df[i:i+365] /= eth_ts_df[i:i+365].std()

for i in range(0, len(wbtc_ts_df),365):
    wbtc_ts_df[i:i+365] /= wbtc_ts_df[i:i+365].std()
"""
#plotting eth_price and wbt_price

plt.figure(figsize=(10,4))
plt.plot(eth_ts_df['underlyingPriceUSD'], color='b')
plt.plot(wbtc_ts_df['underlyingPriceUSD'], color='r')

plt.legend(['eth_price', 'wbtc_price'], fontsize=16)
plt.title('Cleaned ETH and WBTC Price', fontsize=20)

#plotting eth_utilization_ratio and wbtc_utilization_ratio

plt.figure(figsize=(10,4))
plt.plot(eth_ts_df['utilization_ratio'], color='b')
plt.plot(wbtc_ts_df['utilization_ratio'], color='r')

plt.legend(['eth_utilization_ratio', 'wbtc_utilization_ratio'], fontsize=16)
plt.title('Cleaned ETH and WBTC Utilization Ratio', fontsize=20)


In [None]:
#adfuller test for wbtc underlyingPriceUSD
wbtc_adf1 = adfuller(wbtc_ts_df['underlyingPriceUSD'])
wbtc_adf1


In [None]:
gc_results = grangercausalitytests(wbtc_ts_df, 10)

In [None]:
#adfuller test for eth underlyingPriceUSD
eth_adf1 = adfuller(wbtc_ts_df['underlyingPriceUSD'])
eth_adf1

In [None]:
gc_results = grangercausalitytests(eth_ts_df, 5)

In [None]:
#testing stationnarity of utilization_ratio for eth and wbtc

eth_adf2 = adfuller(eth_ts_df['utilization_ratio'])
wbtc_adf2 = adfuller(wbtc_ts_df['utilization_ratio'])

print("ETH URATIO",eth_adf2)
print("WBTC URATIO", wbtc_adf2)

## Trying grangercausality with price changes (daily return) instead of market price

In [None]:
# granger causality test for eth yield on utilization_ratio

eth_ts2_df = pd.DataFrame(columns=['yield','utilization_ratio'], data=zip(eth['yield'],eth['utilization_ratio'])).dropna()
eth_ts2_df.head()

# granger causality test for wbtc yield on utilization_ratio

wbtc_ts2_df = pd.DataFrame(columns=['utilization_ratio','yield'], data=zip(wbtc['utilization_ratio'],wbtc['yield'])).dropna()
wbtc_ts2_df.head()

In [None]:
# normalize with pandas the data for eth and wbtc

eth_ts2_df = (eth_ts2_df - eth_ts2_df.mean()) / eth_ts2_df.std()
wbtc_ts2_df = (wbtc_ts2_df - wbtc_ts2_df.mean()) / wbtc_ts2_df.std()


# removing trend from the data (first property of a stationary time series)

eth_ts2_df = eth_ts2_df.diff().dropna()
wbtc_ts2_df = wbtc_ts2_df.diff().dropna()

In [None]:
# adfuller test for eth yield

eth_adf3 = adfuller(eth_ts2_df['yield'])
eth_adf3

In [None]:
#testing if yield granger causes utilization_ratio up to 5 lags
gc_results = grangercausalitytests(eth_ts2_df, 5)

In [None]:
#plotting eth_utilization_ratio and eth_yield

plt.figure(figsize=(10,4))
plt.plot(eth_ts2_df['yield'], color='r')
plt.plot(eth_ts2_df['utilization_ratio'], color='b')


plt.legend(['eth_utilization_ratio', 'eth_yield'], fontsize=16)
plt.title('Cleaned ETH Utilization Ratio and Yield', fontsize=20)

In [None]:
# adfuller test for wbtc yield

wbtc_adf3 = adfuller(wbtc_ts2_df['yield'])
wbtc_adf3

In [None]:
# granger causality test for wbtc yield on utilization_ratio on 10 lags
gc_results = grangercausalitytests(wbtc_ts2_df, 10)

#plotting p_value of granger causality test for wbtc yield in function of lags

p_values = [gc_results[i+1][0]['ssr_ftest'][1] for i in range(10)]
plt.figure(figsize=(10,4))
plt.plot(p_values, color='b')
plt.title('P-value of Granger Causality Test for WBTC Yield', fontsize=20)
plt.xlabel('Lags')
plt.ylabel('P-value')

# function that return the best p-value for a granger causality test and its regarding lag

def best_p_value(gc_results):
    p_values = [gc_results[i+1][0]['ssr_ftest'][1] for i in range(10)]
    return min(p_values), p_values.index(min(p_values)) + 1

print(best_p_value(gc_results))

In [None]:
#plotting wbtc_utilization_ratio and wbtc_yield

plt.figure(figsize=(10,4))
plt.plot(wbtc_ts2_df['yield'], color='r')
plt.plot(wbtc_ts2_df['utilization_ratio'], color='b')


plt.legend(['wbtc_utilization_ratio', 'wbtc_yield'], fontsize=16)
plt.title('Cleaned WBTC Utilization Ratio and Yield', fontsize=20)




## Next Step
<input type="checkbox"> Dickey-Fuller Test to quantify the degree of stationarity </input>

<input type="checkbox"> Improve stationarity cleaning (seasonal normalization) </input>

<input type="checkbox" checked> Is u-ratio very stationnary </input>

<input type="checkbox"> Trying grangercausality with price changes (daily return) instead of market price </input>

<input type="checkbox"> cETH, cDAI and cUSDT and utilization ratio </input>

<input type="checkbox"> grangercausality in the other side (t2,t1) </input>

<input type="checkbox"> generalizing the process of gc test on others crypto-assets and see if we can classify differents families of crypto-assets (wbtc like or eth like) </input>


In [None]:
# Function that takes : data, a max number of lag, a symbol, two columns (t1,t2) 
# and return the best p-value and its regarding lag for a granger causality test after cleaning the data

def Granger(data, max_lag, symbol, t1, t2, plotting=False):
    
    # filtering for symbol only
    data = data[data['symbol'] == symbol]
    
    # build the time series
    ts_df = pd.DataFrame(columns=[t1,t2], data=zip(data[t1],data[t2])).dropna()
    
    #if the data is not stationary, make it stationary
    if adfuller(ts_df[t1])[1] > 0.05:
        ts_df[t1] = ( ts_df[t1] - ts_df[t1].mean() ) / ts_df[t1].std()
        ts_df[t1] = ts_df[t1].diff().dropna()
    if adfuller(ts_df[t2])[1] > 0.05:
        ts_df[t2] = ( ts_df[t2] - ts_df[t2].mean() ) / ts_df[t2].std()
        ts_df[t2] = ts_df[t2].diff().dropna()
    ts_df = ts_df.dropna()
    
    # testing if t2 granger causes t1 up to max_lag
    gc_results = grangercausalitytests(ts_df, max_lag,verbose=False)
    
    # return the best p-value and its regarding lag
    p_values = [gc_results[i+1][0]['ssr_ftest'][1] for i in range(max_lag)]
    
    #plotting p_value of granger causality test for t2 on t1 in function of lags
    if plotting:
        plt.figure(figsize=(10,4))
        plt.plot(p_values, color='b')
        plt.title('P-value of Granger Causality Test for '+symbol+' '+t2+' on '+t1, fontsize=20)
        plt.xlabel('Lags')
        plt.ylabel('P-value')

    return min(p_values), p_values.index(min(p_values)) + 1


In [None]:
full_market_price['yield'] = full_market_price.groupby('underlyingSymbol')['underlyingPriceUSD'].pct_change(fill_method=None)
full_market_price['utilization_ratio'] = full_market_price['totalBorrows'] / ( full_market_price['totalBorrows'] + full_market_price['cash'] - full_market_price['reserves'] )
symbols = full_market_price['symbol'].unique()
symbols

In [None]:
stable_coin = ['cUSDC', 'cUSDT', 'cDAI']
volatile = ['cETH', 'cWBTC', 'cCOMP']

focused_coin = stable_coin + volatile
focused_coin

In [58]:
# create a dataframe with the Granger function for each symbol and for each pair of columns among ['underlyingPriceUSD', 'utilization_ratio', 'yield']

granger_list = []
for symbol in focused_coin:
    for t1 in ['underlyingPriceUSD', 'utilization_ratio', 'yield', 'totalSupply', 'collateralFactor']:
        for t2 in ['underlyingPriceUSD', 'utilization_ratio', 'yield', 'totalSupply', 'collateralFactor']:
            if t1 != t2:
                try:
                    p_value, lag = Granger(full_market_price, 10, symbol, t1, t2)
                    granger_list.append({'symbol': symbol, 't1': t1, 't2': t2, 'p_value': p_value, 'lag': lag})
                except:
                    print("Error with ", symbol, t1, t2)

granger_dfs = pd.DataFrame(granger_list)
granger_dfs.head()

Error with  cUSDC underlyingPriceUSD utilization_ratio
Error with  cUSDC underlyingPriceUSD yield
Error with  cUSDC underlyingPriceUSD totalSupply
Error with  cUSDC underlyingPriceUSD collateralFactor
Error with  cUSDC utilization_ratio underlyingPriceUSD
Error with  cUSDC utilization_ratio yield




Error with  cUSDC yield underlyingPriceUSD
Error with  cUSDC yield utilization_ratio
Error with  cUSDC yield totalSupply
Error with  cUSDC yield collateralFactor
Error with  cUSDC totalSupply underlyingPriceUSD




Error with  cUSDC totalSupply yield




Error with  cUSDC collateralFactor underlyingPriceUSD




Error with  cUSDC collateralFactor yield




Error with  cUSDT underlyingPriceUSD collateralFactor




Error with  cUSDT utilization_ratio collateralFactor




Error with  cUSDT yield collateralFactor




Error with  cUSDT totalSupply collateralFactor
Error with  cUSDT collateralFactor underlyingPriceUSD
Error with  cUSDT collateralFactor utilization_ratio
Error with  cUSDT collateralFactor yield
Error with  cUSDT collateralFactor totalSupply




Unnamed: 0,symbol,t1,t2,p_value,lag
0,cUSDC,utilization_ratio,totalSupply,0.391831,1
1,cUSDC,utilization_ratio,collateralFactor,0.831098,1
2,cUSDC,totalSupply,utilization_ratio,0.787699,1
3,cUSDC,totalSupply,collateralFactor,0.666793,1
4,cUSDC,collateralFactor,utilization_ratio,0.91961,5


In [68]:
# draw a matrix of the number of minimal p-values below 0.05 of the granger causality test for each pair of features
# use mark_rect

max_chart = alt.Chart(granger_dfs).mark_rect().encode(
    alt.X('t1:N'),
    alt.Y('t2:N'),
    color=alt.Color('max(p_value):Q', scale=alt.Scale(scheme='darkblue')),
)

mean_chart = alt.Chart(granger_dfs).mark_rect().encode(
    alt.X('t1:N'),
    alt.Y('t2:N'),
    color=alt.Color('mean(p_value):Q', scale=alt.Scale(scheme='darkblue')),
)

max_chart | mean_chart

In [None]:
# for each pair of features draw the histogram of minimal p-values from granger_dfs

for t1 in ['underlyingPriceUSD', 'utilization_ratio', 'yield', 'totalSupply', 'collateralFactor']:
    for t2 in ['underlyingPriceUSD', 'utilization_ratio', 'yield', 'totalSupply', 'collateralFactor']:
        if t1 != t2:
            plt.figure(figsize=(10,4))
            plt.hist(granger_dfs[(granger_dfs['t1'] == t1) & (granger_dfs['t2'] == t2)]['p_value'], bins=np.arange(0,1,0.05))
            plt.title('Histogram of minimal P-values for GCTest of '+t2+' on '+t1, fontsize=10)
            plt.xlabel('P-value')
            plt.ylabel('Frequency')

In [59]:
granger_dfs['t1'].unique()

array(['utilization_ratio', 'totalSupply', 'collateralFactor',
       'underlyingPriceUSD', 'yield'], dtype=object)

In [60]:
granger_dfs['t1'].replace({'underlyingPriceUSD':'Market Price', 'utilization_ratio':'Utilization Ratio', 'yield':'Daily Return', 'totalSupply':'Total Supply', 'collateralFactor':'Collateral Factor'}, inplace=True)
granger_dfs['t2'].replace({'underlyingPriceUSD':'Market Price', 'utilization_ratio':'Utilization Ratio', 'yield':'Daily Return', 'totalSupply':'Total Supply', 'collateralFactor':'Collateral Factor'}, inplace=True)

In [61]:
granger_dfs

Unnamed: 0,symbol,t1,t2,p_value,lag
0,cUSDC,Utilization Ratio,Total Supply,0.391831,1
1,cUSDC,Utilization Ratio,Collateral Factor,0.831098,1
2,cUSDC,Total Supply,Utilization Ratio,0.787699,1
3,cUSDC,Total Supply,Collateral Factor,0.666793,1
4,cUSDC,Collateral Factor,Utilization Ratio,0.919610,5
...,...,...,...,...,...
93,cCOMP,Total Supply,Collateral Factor,0.824141,1
94,cCOMP,Collateral Factor,Market Price,0.669610,1
95,cCOMP,Collateral Factor,Utilization Ratio,0.882412,1
96,cCOMP,Collateral Factor,Daily Return,0.684966,1


In [None]:

# Vizualizing the p-value of the granger causality test for each pair of columns and for each symbol with altair make darker the p-value is low 

alt.Chart(granger_dfs).mark_rect().encode(
    x='t1:N',
    y='t2:N',
    color=alt.Color('p_value:Q', scale=alt.Scale(scheme='darkblue')),
).properties(width=200, height=200).facet('symbol:N', columns=3)

In [62]:
# Configure common options. We specify the aggregation
# as a transform here so we can reuse it in both layers.
base = alt.Chart(granger_dfs).encode(
    alt.X('t1:N', title='Feature 1'),
    alt.Y('t2:N', title='Feature 2'),
)

# Configure heatmap
heatmap = base.mark_rect().encode(
    color=alt.Color('p_value:Q', scale=alt.Scale(scheme='darkblue'), title='Best p-value'),
)

# Configure text
text = base.mark_text(baseline='middle').encode(
    alt.Text('lag:Q', format=".0f"),
    color=alt.condition(
        alt.datum.p_value > 0.5,
        alt.value('black'),
        alt.value('white')
    )
)

# Draw the chart
chart = heatmap + text

chart = chart.properties(width=200, height=200).facet('symbol:N', columns=3)
chart.save('../charts/GCTest-Best-p-values-cross-features.png',ppi=300)


In [None]:
# ganger causality test between WBTC prices and ETH

def GrangerCrossSymbol(data, max_lag, symbol1, symbol2, t1, t2, plotting=False):
        
    # filtering for symbol only
    data1 = data[data['underlyingSymbol'] == symbol1]
    data2 = data[data['underlyingSymbol'] == symbol2]
    
    # build the time series
    c1 = symbol1+'_'+t1
    c2 = symbol2+'_'+t2
    ts_df = pd.DataFrame(columns=[c1,c2], data=zip(data1[t1],data2[t2])).dropna()
    
    #if the data is not stationary, make it stationary
    if adfuller(ts_df[c1])[1] > 0.05:
        ts_df[c1] = ( ts_df[c1] - ts_df[c1].mean() ) / ts_df[c1].std()
        ts_df[c1] = ts_df[c1].diff().dropna()
    if adfuller(ts_df[c2])[1] > 0.05:
        ts_df[c2] = ( ts_df[c2] - ts_df[c2].mean() ) / ts_df[c2].std()
        ts_df[c2] = ts_df[c2].diff().dropna()
    ts_df = ts_df.dropna()
    
    # testing if t2 granger causes t1 up to max_lag
    gc_results = grangercausalitytests(ts_df, max_lag,verbose=False)
    
    # return the best p-value and its regarding lag
    p_values = [gc_results[i+1][0]['ssr_ftest'][1] for i in range(max_lag)]
    
    #plotting p_value of granger causality test for t2 on t1 in function of lags
    if plotting:
        plt.figure(figsize=(10,4))
        plt.plot(p_values, color='b')
        plt.title('P-value of Granger Causality Test for '+symbol2+' '+t2+' on '+symbol1+' '+t1, fontsize=20)
        plt.xlabel('Lags')
        plt.ylabel('P-value')

    return min(p_values), p_values.index(min(p_values)) + 1
    

In [None]:
GrangerCrossSymbol(full_market_price, 20, 'WBTC', 'ETH', 'underlyingPriceUSD', 'underlyingPriceUSD', True)

In [None]:
def Granger_p_values(data, max_lag, symbol1, symbol2, t1, t2):
        
    # filtering for symbol only
    data1 = data[data['symbol'] == symbol1]
    data2 = data[data['symbol'] == symbol2]
    
    # build the time series
    c1 = symbol1+'_'+t1
    c2 = symbol2+'_'+t2
    ts_df = pd.DataFrame(columns=[c1,c2], data=zip(data1[t1],data2[t2])).dropna()
    
    #if the data is not stationary, make it stationary
    if adfuller(ts_df[c1])[1] > 0.05:
        ts_df[c1] = ( ts_df[c1] - ts_df[c1].mean() ) / ts_df[c1].std()
        ts_df[c1] = ts_df[c1].diff().dropna()
    if adfuller(ts_df[c2])[1] > 0.05:
        ts_df[c2] = ( ts_df[c2] - ts_df[c2].mean() ) / ts_df[c2].std()
        ts_df[c2] = ts_df[c2].diff().dropna()
    ts_df = ts_df.dropna()
    
    # testing if t2 granger causes t1 up to max_lag
    gc_results = grangercausalitytests(ts_df, max_lag,verbose=False)
    
    # return the best p-value and its regarding lag
    p_values = [gc_results[i+1][0]['ssr_ftest'][1] for i in range(max_lag)]

    return p_values

In [66]:
# Function that takes : data, a max number of lag, a list of symbol, two columns (t1,t2) 
# and return a dataframe with granger causality test p-value for all the lags from 0 to max_lag between t1 and t2 for each symbol after cleaning the data
# then plot with alatair vega the dataframe with different color for each symbol on a same graph

def GrangerSymbols(data, max_lag, symbols, t1, t2, plotting=False):
    
    granger_list = []
    for token in symbols: 
        try:
            p_values = Granger_p_values(data, max_lag, token, token, t1, t2)
            for i in range(len(p_values)):
                granger_list.append({'symbol': token, 'lag': i, 'p_value': p_values[i]})
        except:
            print("Error with ", token, t1, t2)
        
    
    granger_dfs = pd.DataFrame(granger_list)

    if plotting:
        chart = alt.Chart(granger_dfs).mark_line().encode(
            x='lag',
            y='p_value:Q',
            color='symbol:N'
        ).properties(
            #title='Granger Causality Test: ' + t2 + ' causing ' + t1
        )

        rule = alt.Chart(pd.DataFrame({'y': [0.05]})).mark_rule(color='red', strokeDash=[5, 5]).encode(y='y')
        chart += rule
        chart.save('../charts/GCTest-'+t2+'-causing-'+t1+'.png',ppi=300)
        return chart
    
    return granger_dfs

In [67]:

for feature in ['underlyingPriceUSD', 'yield', 'totalSupply', 'collateralFactor']:
    GrangerSymbols(full_market_price, 50, focused_coin , 'utilization_ratio', feature, True).display()


Error with  cUSDC utilization_ratio underlyingPriceUSD




Error with  cUSDC utilization_ratio yield








Error with  cUSDT utilization_ratio collateralFactor




- [x] Focus study on : SC (Stable coin) (DAI  USDC USDT) / VC (Volatile Coin) (WTBC  ETH  COMP)
- [x] Scaling the plot of p-value in absolute axis (for the y axis)
- [ ] Trying to classify the different patterns on GrangerSymbols plot
- [ ] Discuss asymetry between two sides of the diagonal of cross feature best p values diagram
- [ ] Some interpretation and hypothesis of utilization ratio correlation with other columns
- [x] See Defi Leverage paper [link](https://www.bis.org/publ/work1171.htm)
- [ ] Plot historical events on charts with https://bitcoin.fr/Histoire/ or http://help.cryptosheets.com/en/articles/2262493-how-to-pull-events-and-calendar-data-for-cryptocurrencies-into-excel


In [None]:
import requests
import pandas as pd
import json
token = "79798:OHNwqWruL4iVKIfFruY"
url = "https://coindar.org/api/v2/coins?access_token=" + token
# make a request to coindar API v2 that Returns a cryptocurrencies list

coin_dump = json.dumps(requests.get(url).json(), indent=4)
coin_df = pd.DataFrame(json.loads(coin_dump))

coin_df

In [None]:
import json
import requests
import pandas as pd
# make a request to coindar API v2 that Returns a list of events tags
token = "79798:OHNwqWruL4iVKIfFruY"
url = "https://coindar.org/api/v2/tags?access_token=" + token

coin_df = pd.read_csv("../data/coins.csv")
# stringify 'id' column
coin_df['id'] = coin_df['id'].astype(str)

tag_dump = json.dumps(requests.get(url).json(), indent=4)
tag_df = pd.DataFrame(json.loads(tag_dump))


In [None]:
tag_df

In [None]:

# make a request to coindar API v2 that Returns cryptocurrency events

"""
Parameters :

access_token (required) — access token.

page — page number.
By default: 1

page_size — page size.
By default: 30
Possible values: from 1 to 100

filter_date_start — the lower limit of the event start date in the format yyyy-mm-dd.
By default: date of the first event on Coindar.

filter_date_end — the upper date limit of the event start date in the format yyyy-mm-dd.
By default: date of the last event on Coindar by the time of request.

filter_coins — cryptocurrencies IDs, relating to the requested events.
By default: all cryptocurrencies
Example: 1,2,3,4,10

filter_tags — tags IDs (list of tags IDs can be received by the tags method).
By default: all tags
Example: 1,2,3

sort_by — parameter, defining events sorting.
By default: date_start
Possible values: date_start (start date), date_added (publication date), views (amount of views). Remember, that views accounting started in June 2018.

order_by — sorting order
By default: 0
Possible values: 0 (in the increasing order) or 1 (in the decreasing order)

Example : 
https://coindar.org/api/v2/events?access_token={token}&page=1&page_size=30&filter_date_start=2018-06-01&filter_date_end=2018-07-01&filter_coins=1,4,20,34&filter_tags=1,2,3&sort_by=views&order_by=1

Response example :
[
{
"caption": "Listing on OTCBTC",
"source": "https://coindar.org/en/event/oraclechain-oct-listing-on-otcbtc-11299",
"source_reliable": "true",
"important": "false",
"date_public": "2018-7-09 17:23",
"date_start": "2018-7-11 06:00",
"date_end": "",
"coin_id": "280",
"coin_price_changes": "-12.89",
"tags": "9"
}, 
... ]

"""

def to_id(df, column = 'name', list = ['AMA', 'General']) -> str:
    return ",".join(df[df[column].isin(list)]['id'].to_list())

def get_events(
filter_date_start = "2018-06-01",
filter_date_end = "2018-07-01",
filter_coins = ['BTC', 'ETH', 'USDT'],
filter_tags = ['AMA', 'General'],
sort_by = "date_start",
order_by = "1",
page = 1,
page_size = 99, important = False):

    filter_coins = to_id(coin_df, 'symbol', filter_coins)
    filter_tags = to_id(tag_df, 'name', filter_tags)

    event_response = requests.get(f"https://coindar.org/api/v2/events?access_token={token}&page={page}&page_size={page_size}&filter_date_start={filter_date_start}&filter_date_end={filter_date_end}&filter_coins={filter_coins}&filter_tags={filter_tags}&sort_by={sort_by}&order_by={order_by}")

    # get the data from the response
    event_dump = json.dumps(event_response.json(), indent=4)
    event_df = pd.DataFrame(json.loads(event_dump))

    if important:
        event_df = event_df[event_df['important'] == "true"]

    return event_df


major_event = get_events("2019-05-07","2023-12-31", ['COMP'], tag_df['name'].to_list())


In [None]:
major_event

In [None]:
# chart of wbtc and eth price
import altair as alt

eth_uratio = alt.Chart(market_price[market_price['underlyingSymbol'] == 'ETH']).mark_line(color='blue').encode( x='date:T', y='utilization_ratio:Q').properties(title=alt.Title(text='ETH Utilization Ratio'))
eth_price = alt.Chart(market_price[market_price['underlyingSymbol'] == 'ETH']).mark_line(color='blue').encode( x='date:T', y='underlyingPriceUSD:Q').properties(title=alt.Title(text='ETH Price'))
event = alt.Chart(major_event).mark_rule(color='red').encode(x='date_start:T',color='caption:N', tooltip='caption')

# chart of wbtc filtered on major_events[date_start]

(eth_price + event | eth_uratio + event).save('../charts/eth_price_uratio_coindar.png', ppi=300)

