# Imports and env Variables

In [54]:
import pandas as pd
import numpy as np
from dune_client.client import DuneClient
from flipside import Flipside
import plotly
import datetime as dt
from dotenv import load_dotenv
from prophet import Prophet
import os
from plotly.subplots import make_subplots
import plotly.graph_objects as go
import plotly.express as px

from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.multioutput import MultiOutputRegressor
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score


# import yfinance as yf

from utils import flipside_api_results
from sql import trader_classifier_query, prices_and_vol_query

In [55]:
load_dotenv()

True

In [56]:
dune_api_key = os.getenv('DUNE_API_KEY')
flipside_api_key = os.getenv('FLIPSIDE_API_KEY')
dune = DuneClient(dune_api_key)
# print(dune_api_key, flipside_api_key)

# Data Retrieval and Analysis

## Dune Queries

Uniswap Arbitrum Pool Data
https://dune.com/queries/3930878

Arbitrum Gas Data
https://dune.com/queries/3931017

In [57]:
def dune_api_results(query_num, save_csv=False, csv_path=None):
    results = dune.get_latest_result(query_num)
    df = pd.DataFrame(results.result.rows)

    if save_csv and csv_path:
        df.to_csv(csv_path, index=False)
    return df


arb_pool_path = 'data/arb_pool_data.csv'
arb_pool_df = dune_api_results(3930878, True, arb_pool_path)
arb_pool_df

In [58]:
arb_pool_path = 'data/arb_pool_data.csv'
arb_pool_df = pd.read_csv(arb_pool_path)
arb_pool_df.columns = [f'arbitrum_{col}' if col != 'day' else col for col in arb_pool_df.columns]
print(arb_pool_df.columns)

Index(['arbitrum_avg_liquidity', 'day', 'arbitrum_fee_apr',
       'arbitrum_fee_tier', 'arbitrum_fees_usd', 'arbitrum_lp_addr',
       'arbitrum_num_trades', 'arbitrum_token_pair', 'arbitrum_tvl_usd',
       'arbitrum_volume_to_tvl', 'arbitrum_volume_usd'],
      dtype='object')


arbitrum_gas = dune.get_latest_result(3931017)
arbitrum_gas_df = pd.DataFrame(arbitrum_gas.result.rows)
arbitrum_gas_path = 'data/arb_gas.csv'
arbitrum_gas_df.to_csv(arbitrum_gas_path, index=False)

In [59]:
arbitrum_gas_path = 'data/arb_gas.csv'
arbitrum_gas_df = pd.read_csv(arbitrum_gas_path)
if 'Unnamed: 0' in arbitrum_gas_df.columns:
    arbitrum_gas_df = arbitrum_gas_df.drop('Unnamed: 0', axis=1)
arbitrum_gas_df.columns = [f'arbitrum_{col}' if col != 'dt' else col for col in arbitrum_gas_df.columns]
arbitrum_gas_df.columns


Index(['dt', 'arbitrum_gas_usd_per_tx', 'arbitrum_median_gas_usd'], dtype='object')

Uniswap Optimism Pool Data
https://dune.com/queries/3930989

Optimism Gas Data 
https://dune.com/queries/3930989

op_pool_path = 'data/op_pool_data.csv'
op_pool_df = dune_api_results(3930989, True, op_pool_path)
op_pool_df

In [60]:
op_pool_path = 'data/op_pool_data.csv'
op_pool_df = pd.read_csv(op_pool_path)
op_pool_df.columns = [f'optimism_{col}' if col != 'day' else col for col in op_pool_df.columns]
print(op_pool_df.columns)

Index(['optimism_avg_liquidity', 'day', 'optimism_fee_apr',
       'optimism_fee_tier', 'optimism_fees_usd', 'optimism_lp_addr',
       'optimism_num_trades', 'optimism_token_pair', 'optimism_tvl_usd',
       'optimism_volume_to_tvl', 'optimism_volume_usd'],
      dtype='object')


optimism_gas = dune.get_latest_result_dataframe(3931019)
optimism_gas_path = 'data/op_gas.csv'
optimism_gas.to_csv(optimism_gas_path, index=False)

In [61]:
optimism_gas_path = 'data/op_gas.csv'
optimism_gas_df = pd.read_csv(optimism_gas_path)
optimism_gas_df = optimism_gas_df.drop('_col3', axis=1) if '_col3' in optimism_gas_df.columns else optimism_gas_df
optimism_gas_df.columns = [f'optimism_{col}' if col != 'dt' else col for col in optimism_gas_df.columns]
optimism_gas_df.columns

Index(['dt', 'optimism_gas_usd_per_tx', 'optimism_median_gas_usd'], dtype='object')

Uniswap Base Pool Data
https://dune.com/queries/3930954

Base Gas Data
https://dune.com/queries/3931021

base_pool_path = 'data/base_pool_data.csv'
base_pool_df = dune_api_results(3930954, True, base_pool_path)
base_pool_df

In [62]:
base_pool_path = 'data/base_pool_data.csv'
base_pool_df = pd.read_csv(base_pool_path)
base_pool_df.columns = [f'base_{col}' if col != 'day' else col for col in base_pool_df.columns]
print(base_pool_df.columns)

Index(['base_avg_liquidity', 'day', 'base_fee_apr', 'base_fee_tier',
       'base_fees_usd', 'base_lp_addr', 'base_num_trades', 'base_token_pair',
       'base_tvl_usd', 'base_volume_to_tvl', 'base_volume_usd'],
      dtype='object')


base_gas = dune.get_latest_result_dataframe(3931021)
base_gas_path = 'data/base_gas.csv'
base_gas.to_csv(base_gas_path, index=False)

In [63]:
base_gas_path = 'data/base_gas.csv'
base_gas_df = pd.read_csv(base_gas_path)
base_gas_df.columns = [f'base_{col}' if col != 'dt' else col for col in base_gas_df.columns]
base_gas_df.columns

Index(['dt', 'base_gas_usd_per_tx', 'base_median_gas_usd'], dtype='object')

## Flipside Queries 

Trader Classifier - Arbitrum Data
https://flipsidecrypto.xyz/Brandyn/q/7NlPxrKU5KQb/2024-07-20-06-36-pm

classifier_data_path = 'data/classifier.csv'
trader_classifier_data = flipside_api_results(trader_classifier_query, flipside_api_key)
print(trader_classifier_data)
trader_classifier_data.to_csv(classifier_data_path, index=False)

In [64]:
classifier_data_path = 'data/classifier.csv'
trader_classifier_df = pd.read_csv(classifier_data_path)
trader_classifier_df.drop(columns=['__row_index'], inplace=True)
trader_classifier_df.head()

Unnamed: 0,trader_type,dt,tx_count,total_volume_usd,avg_order_size_usd,unique_contracts
0,Professional,2024-07-21T15:00:00.000Z,928,1047810.93,1183.967153,104
1,Retail,2024-07-21T15:00:00.000Z,176,91856.49,540.332294,60
2,Professional,2024-07-21T14:00:00.000Z,7552,12913378.38,1766.53603,252
3,Retail,2024-07-21T14:00:00.000Z,1790,294765.21,170.680492,196
4,Professional,2024-07-21T13:00:00.000Z,4472,5883981.27,1367.731583,211


Crypto Prices - https://flipsidecrypto.xyz/Brandyn/q/mScUOHdMvxki/2024-07-21-12-20-pm

prices_path = 'data/prices_vol.csv'
prices_data = flipside_api_results(prices_and_vol_query, flipside_api_key)
prices_data.to_csv(prices_path, index=False)

In [65]:
prices_path = 'data/prices_vol.csv'
prices_vol_df = pd.read_csv(prices_path)
prices_vol_df.drop(columns=['__row_index'], inplace=True)
prices_vol_df['dt'] = pd.to_datetime(prices_vol_df['dt'])
prices_vol_df.rename(columns={'dt':'day'}, inplace=True)
prices_vol_df.head()

Unnamed: 0,day,symbol,price,arbitrum_vol_ex_uni
0,2024-07-23 01:00:00+00:00,WETH,3444.47,3162515.13
1,2024-07-23 01:00:00+00:00,WBTC,67598.0,3162515.13
2,2024-07-23 00:00:00+00:00,WBTC,67606.0,10836507.71
3,2024-07-23 00:00:00+00:00,WETH,3447.68,10836507.71
4,2024-07-22 23:00:00+00:00,WBTC,67788.0,12103597.64


## Data Cleaning/Processing

Each token pair has several fee tiers and some have differente lp addresses.  

Can aggregate metrics to token pair and/or seperate each fee-tier out, aggregate by addresses

In [66]:
arbitrum_gas_df.rename(columns={'dt':'day'}, inplace=True)
arbitrum_gas_df['day'] = pd.to_datetime(arbitrum_gas_df['day'])

In [67]:
arb_pool_df['day'] = pd.to_datetime(arb_pool_df['day'])

trader_pivot_df = trader_classifier_df.pivot(
    index='day',
    columns='trader_type',
    values=['tx_count', 'total_volume_usd', 'avg_order_size_usd', 'unique_contracts']
)
trader_pivot_df.columns = [f'{col[0]}_{col[1]}' for col in trader_pivot_df.columns]
trader_pivot_df

In [176]:
prices_vol_df_pivot = prices_vol_df.drop(columns=['arbitrum_vol_ex_uni']).pivot(
    index='day',
    columns='symbol',
    values='price'
    
    )

prices_vol_df_pivot.columns = [f'{col[0]}_{col[1]}' for col in prices_vol_df_pivot.columns]
prices_vol_df_pivot.rename(columns={"W_B":"BTC_Price","W_E":"ETH_Price"}, inplace=True)
prices_vol_df_pivot

Unnamed: 0_level_0,BTC_Price,ETH_Price
day,Unnamed: 1_level_1,Unnamed: 2_level_1
2023-07-01 00:00:00+00:00,30455.140252,1933.301804
2023-07-01 01:00:00+00:00,30444.441517,1932.612190
2023-07-01 02:00:00+00:00,30467.154009,1938.738233
2023-07-01 03:00:00+00:00,30463.502821,1931.559012
2023-07-01 04:00:00+00:00,30413.699209,1921.631704
...,...,...
2024-07-22 21:00:00+00:00,68080.000000,3492.530000
2024-07-22 22:00:00+00:00,68045.000000,3490.770000
2024-07-22 23:00:00+00:00,67788.000000,3461.760000
2024-07-23 00:00:00+00:00,67606.000000,3447.680000


In [177]:
prices_vol_df_pivot = prices_vol_df_pivot.merge(prices_vol_df[['day','arbitrum_vol_ex_uni']],
                                                how='inner', on='day'
                                                
                                                
                                                
                                                )

prices_vol_df_pivot

Unnamed: 0,day,BTC_Price,ETH_Price,arbitrum_vol_ex_uni
0,2023-07-01 00:00:00+00:00,30455.140252,1933.301804,2672462.92
1,2023-07-01 00:00:00+00:00,30455.140252,1933.301804,2672462.92
2,2023-07-01 01:00:00+00:00,30444.441517,1932.612190,1794246.45
3,2023-07-01 01:00:00+00:00,30444.441517,1932.612190,1794246.45
4,2023-07-01 02:00:00+00:00,30467.154009,1938.738233,1328019.33
...,...,...,...,...
18623,2024-07-22 23:00:00+00:00,67788.000000,3461.760000,12103597.64
18624,2024-07-23 00:00:00+00:00,67606.000000,3447.680000,10836507.71
18625,2024-07-23 00:00:00+00:00,67606.000000,3447.680000,10836507.71
18626,2024-07-23 01:00:00+00:00,67598.000000,3444.470000,3162515.13


## Arbitrum

### Summary Statistics

In [69]:
print(arb_pool_df.describe())

       arbitrum_avg_liquidity  arbitrum_fee_apr  arbitrum_fees_usd  \
count            3.468000e+03      3.468000e+03       3.468000e+03   
mean             1.782774e+24      1.458773e-02       2.966866e+02   
std              7.617222e+24      5.095527e-02       7.349982e+02   
min              9.140293e+07      3.241851e-13       6.002412e-15   
25%              6.812052e+15      8.222112e-04       2.920990e-01   
50%              1.903847e+18      4.227947e-03       1.321334e+01   
75%              2.049945e+22      1.363400e-02       2.495379e+02   
max              4.600563e+25      1.606219e+00       8.186259e+03   

       arbitrum_num_trades  arbitrum_tvl_usd  arbitrum_volume_to_tvl  \
count          3468.000000      3.468000e+03            3.468000e+03   
mean            190.908304      9.310402e+06            5.508794e-02   
std             338.143117      1.496615e+07            1.409350e-01   
min               1.000000      6.708087e+00            2.982674e-13   
25%      

In [70]:
print(arbitrum_gas_df.describe())

       arbitrum_gas_usd_per_tx  arbitrum_median_gas_usd
count              2162.000000              2162.000000
mean                  0.023634                 0.012860
std                   0.221630                 0.129315
min                   0.004397                 0.001491
25%                   0.006888                 0.004532
50%                   0.008472                 0.005564
75%                   0.010704                 0.007182
max                   6.944713                 4.181090


### Data Visualizations

#### Aggregated Data

In [71]:
arb_pool_df.columns

Index(['arbitrum_avg_liquidity', 'day', 'arbitrum_fee_apr',
       'arbitrum_fee_tier', 'arbitrum_fees_usd', 'arbitrum_lp_addr',
       'arbitrum_num_trades', 'arbitrum_token_pair', 'arbitrum_tvl_usd',
       'arbitrum_volume_to_tvl', 'arbitrum_volume_usd'],
      dtype='object')

In [72]:
arb_pool_df_copy = arb_pool_df.set_index('day')
arb_pool_df_copy[(arb_pool_df_copy['arbitrum_token_pair']=='USDC-WETH')&(arb_pool_df_copy['arbitrum_fee_tier']=='0.01%')][['arbitrum_fee_tier','arbitrum_tvl_usd']]

Unnamed: 0_level_0,arbitrum_fee_tier,arbitrum_tvl_usd
day,Unnamed: 1_level_1,Unnamed: 2_level_1
2024-07-14 00:00:00+00:00,0.01%,70089.797979
2024-07-14 01:00:00+00:00,0.01%,70127.520219
2024-07-14 02:00:00+00:00,0.01%,70051.354131
2024-07-14 03:00:00+00:00,0.01%,70068.423443
2024-07-14 04:00:00+00:00,0.01%,70186.064684
...,...,...
2024-07-20 21:00:00+00:00,0.01%,48460.898146
2024-07-20 22:00:00+00:00,0.01%,48491.764876
2024-07-20 23:00:00+00:00,0.01%,48117.867454
2024-07-21 00:00:00+00:00,0.01%,48120.543828


In [73]:
aggregated_arb_hour = arb_pool_df[['day','arbitrum_fees_usd','arbitrum_tvl_usd','arbitrum_volume_usd','arbitrum_num_trades']].groupby('day').sum()
aggregated_arb_hour = aggregated_arb_hour.merge(arbitrum_gas_df, how='left', on='day')

# Averages per hour among token pairs
aggregated_arb_tp = arb_pool_df[['arbitrum_token_pair','arbitrum_fees_usd','arbitrum_tvl_usd','arbitrum_volume_usd','arbitrum_num_trades']].groupby('arbitrum_token_pair').mean() 

In [74]:
aggregated_arb_tp

Unnamed: 0_level_0,arbitrum_fees_usd,arbitrum_tvl_usd,arbitrum_volume_usd,arbitrum_num_trades
arbitrum_token_pair,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
ARB-WETH,123.049496,7323853.0,236746.2,150.150943
DAI-USDC,5.937179,802553.8,59244.97,14.883721
USDC-WBTC,257.499019,6181301.0,512955.2,217.636364
USDC-WETH,741.293099,15742440.0,1412900.0,380.783333
USDT-WETH,230.414451,8072348.0,410541.0,282.114964
WBTC-WETH,323.289718,20068010.0,627351.0,156.197125
WETH-XAI,25.963443,311109.5,8609.368,32.870968
WETH-ZRO,449.440307,2632211.0,138740.1,123.250794
WETH-wstETH,18.332452,8319976.0,183234.1,18.208738


In [75]:
aggregated_arb_hour.describe()

Unnamed: 0,arbitrum_fees_usd,arbitrum_tvl_usd,arbitrum_volume_usd,arbitrum_num_trades,arbitrum_gas_usd_per_tx,arbitrum_median_gas_usd
count,170.0,170.0,170.0,170.0,170.0,170.0
mean,6052.406474,189932200.0,10374510.0,3894.529412,0.011762,0.007035
std,3545.725661,8890367.0,6642967.0,1644.030155,0.010017,0.003911
min,912.69363,165983600.0,1035510.0,547.0,0.005264,0.001491
25%,3332.511041,183956200.0,5430933.0,2604.5,0.007909,0.005254
50%,5324.330184,190311600.0,8846839.0,3592.0,0.00941,0.006077
75%,7908.212068,196928900.0,14231710.0,4913.75,0.011557,0.007356
max,18511.980815,204910600.0,34207140.0,8707.0,0.087784,0.038661


In [76]:
aggregated_arb_tp.columns

Index(['arbitrum_fees_usd', 'arbitrum_tvl_usd', 'arbitrum_volume_usd',
       'arbitrum_num_trades'],
      dtype='object')

In [77]:
agg_tp_tvl_graph = px.bar(aggregated_arb_tp, x=aggregated_arb_tp.index, y='arbitrum_tvl_usd', color=aggregated_arb_tp.index)
agg_tp_tvl_graph.show()

In [78]:
agg_tp_vol_graph = px.bar(aggregated_arb_tp, x=aggregated_arb_tp.index, y='arbitrum_volume_usd', color=aggregated_arb_tp.index)
agg_tp_vol_graph.show()

In [79]:
agg_tp_fee_graph = px.bar(aggregated_arb_tp, x=aggregated_arb_tp.index, y='arbitrum_fees_usd', color=aggregated_arb_tp.index)
agg_tp_fee_graph.show()

In [80]:
agg_tp_trades_graph = px.bar(aggregated_arb_tp, x=aggregated_arb_tp.index, y='arbitrum_num_trades', color=aggregated_arb_tp.index)
agg_tp_trades_graph.show()

In [81]:
aggregated_arb_hour['arbitrum_volume_to_tvl'] = aggregated_arb_hour['arbitrum_volume_usd'] / aggregated_arb_hour['arbitrum_tvl_usd']
aggregated_arb_hour

Unnamed: 0,day,arbitrum_fees_usd,arbitrum_tvl_usd,arbitrum_volume_usd,arbitrum_num_trades,arbitrum_gas_usd_per_tx,arbitrum_median_gas_usd,arbitrum_volume_to_tvl
0,2024-07-14 00:00:00+00:00,4305.423434,1.770941e+08,7.647457e+06,3429,0.010340,0.006240,0.043183
1,2024-07-14 01:00:00+00:00,2620.052008,1.728960e+08,4.751922e+06,2542,0.007959,0.005059,0.027484
2,2024-07-14 02:00:00+00:00,2509.557949,1.704295e+08,4.138064e+06,2252,0.005898,0.004483,0.024280
3,2024-07-14 03:00:00+00:00,1305.792138,1.703676e+08,2.057988e+06,1547,0.005264,0.004377,0.012080
4,2024-07-14 04:00:00+00:00,5493.702700,1.787923e+08,9.360130e+06,4237,0.006068,0.004599,0.052352
...,...,...,...,...,...,...,...,...
165,2024-07-20 21:00:00+00:00,5024.962017,2.048495e+08,8.463321e+06,3254,0.009192,0.005052,0.041315
166,2024-07-20 22:00:00+00:00,2853.101471,1.965587e+08,3.837792e+06,2190,0.010094,0.005563,0.019525
167,2024-07-20 23:00:00+00:00,5020.902254,1.957108e+08,2.632626e+06,2107,0.009521,0.005383,0.013452
168,2024-07-21 00:00:00+00:00,5235.640922,1.999137e+08,6.726266e+06,2649,0.009502,0.005291,0.033646


In [82]:
aggregated_arb_hour_vttvl_fig = make_subplots(specs=[[{"secondary_y": True}]])
    
aggregated_arb_hour_vttvl_fig.add_trace(
    go.Bar(
        x=aggregated_arb_hour['day'],
        y=aggregated_arb_hour['arbitrum_volume_to_tvl'],
        name='Fee Revenue'
    ),
    secondary_y=False
)


aggregated_arb_hour_vttvl_fig.add_trace(
    go.Scatter(
        x=aggregated_arb_hour['day'],
        y=aggregated_arb_hour['arbitrum_median_gas_usd'],
        name='Median Gas',
        mode='lines'
    ),
    secondary_y=True
)

aggregated_arb_hour_vttvl_fig.add_trace(
    go.Scatter(
        x=aggregated_arb_hour['day'],
        y=aggregated_arb_hour['arbitrum_gas_usd_per_tx'],
        name='Avg Gas',
        mode='lines'
    ),
    secondary_y=True
)

aggregated_arb_hour_vttvl_fig.update_layout(
    title='Volume/TVL Ratio to Gas',
    barmode='group'  # Set the bar mode to either 'group' for side-by-side or 'stack' for stacked
)

aggregated_arb_hour_vttvl_fig.update_xaxes(title_text="Date")

aggregated_arb_hour_vttvl_fig.show()

In [83]:
aggregated_arb_hour_fees_fig = make_subplots(specs=[[{"secondary_y": True}]])
    
aggregated_arb_hour_fees_fig.add_trace(
    go.Bar(
        x=aggregated_arb_hour['day'],
        y=aggregated_arb_hour['arbitrum_fees_usd'],
        name='Fee Revenue'
    ),
    secondary_y=False
)
aggregated_arb_hour_fees_fig.add_trace(
    go.Bar(
        x=aggregated_arb_hour['day'],
        y=aggregated_arb_hour['arbitrum_num_trades'],
        name='# of Trades',
    ),
    secondary_y=False
)


aggregated_arb_hour_fees_fig.add_trace(
    go.Scatter(
        x=aggregated_arb_hour['day'],
        y=aggregated_arb_hour['arbitrum_median_gas_usd'],
        name='Median Gas',
        mode='lines'
    ),
    secondary_y=True
)

aggregated_arb_hour_fees_fig.add_trace(
    go.Scatter(
        x=aggregated_arb_hour['day'],
        y=aggregated_arb_hour['arbitrum_gas_usd_per_tx'],
        name='Avg Gas',
        mode='lines'
    ),
    secondary_y=True
)

aggregated_arb_hour_fees_fig.update_layout(
    title='Fee Revenue and Trades to Gas',
    barmode='group'  # Set the bar mode to either 'group' for side-by-side or 'stack' for stacked
)

aggregated_arb_hour_fees_fig.update_xaxes(title_text="Date")

aggregated_arb_hour_fees_fig.show()

In [84]:
aggregated_arb_hour_fig = make_subplots(specs=[[{"secondary_y": True}]])
    
aggregated_arb_hour_fig.add_trace(
    go.Bar(
        x=aggregated_arb_hour['day'],
        y=aggregated_arb_hour['arbitrum_tvl_usd'],
        name='tvl'
    ),
    secondary_y=False
)
aggregated_arb_hour_fig.add_trace(
    go.Bar(
        x=aggregated_arb_hour['day'],
        y=aggregated_arb_hour['arbitrum_volume_usd'],
        name='volume'
    ),
    secondary_y=False
)


aggregated_arb_hour_fig.add_trace(
    go.Scatter(
        x=aggregated_arb_hour['day'],
        y=aggregated_arb_hour['arbitrum_median_gas_usd'],
        name='Median Gas',
        mode='lines'
    ),
    secondary_y=True
)

aggregated_arb_hour_fig.add_trace(
    go.Scatter(
        x=aggregated_arb_hour['day'],
        y=aggregated_arb_hour['arbitrum_gas_usd_per_tx'],
        name='Avg Gas',
        mode='lines'
    ),
    secondary_y=True
)

aggregated_arb_hour_fig.update_layout(
    title='TVL and Vol to Gas',
    barmode='group'  # Set the bar mode to either 'group' for side-by-side or 'stack' for stacked
)

aggregated_arb_hour_fig.update_xaxes(title_text="Date")

aggregated_arb_hour_fig.show()

filtered_cols = ['day','arbitrum_avg_liquidity','arbitrum_fees_usd','arbitrum_tvl_usd','arbitrum_volume_usd','arbitrum_num_trades','arbitrum_volume_to_tvl','arbitrum_token_pair']

#### Individual Token Pairs

In [85]:
def fee_tier_cleaning(df, network):
    fee_tiers = df[f'{network}_fee_tier'].unique()
    merged_df = None
    
    # Determine the complete date range
    min_date = df['day'].min()
    max_date = df['day'].max()
    complete_date_range = pd.date_range(start=min_date, end=max_date, freq='H')

    for fee_tier in fee_tiers:
        # Filter the DataFrame for the specific fee tier
        filtered_df = df[df[f'{network}_fee_tier'] == fee_tier].copy()
        
        # Rename the columns to include the fee tier
        fee_tier_suffix = fee_tier.replace('.', '').replace('%', '')
        filtered_df.columns = [f'{col}_{fee_tier_suffix}' if col not in ['day', f'{network}_gas_usd_per_tx', f'{network}_median_gas_usd'] else col for col in filtered_df.columns]
        
        # Set the 'day' column as the index
        filtered_df.set_index('day', inplace=True)
        
        # Reindex the DataFrame to the complete date range and fill NaN values with 0
        filtered_df = filtered_df.reindex(complete_date_range, fill_value=0).rename_axis('day').reset_index()
        
        # Calculate the net change in liquidity
        liquidity_col = f'{network}_avg_liquidity_{fee_tier_suffix}'
        net_liquidity_col = f'{network}_net_liquidity_{fee_tier_suffix}'
        filtered_df[net_liquidity_col] = filtered_df[liquidity_col] - filtered_df[liquidity_col].shift(1)
        filtered_df[net_liquidity_col].fillna(0, inplace=True)

        # Merge with the main DataFrame
        if merged_df is None:
            merged_df = filtered_df
        else:
            merged_df = pd.merge(merged_df, filtered_df, on='day', how='inner')

    arb_gas_df = arbitrum_gas_df.rename(columns={'dt':'day'})
    arb_gas_df['day'] = pd.to_datetime(arb_gas_df['day'])
    merged_df = merged_df.merge(arb_gas_df, how='left', on=['day'])
        
    return merged_df

In [86]:
# All types of fee tiers in dataset 

fee_tiers_unique = arb_pool_df['arbitrum_fee_tier'].unique()
token_pairs_unique = arb_pool_df['arbitrum_token_pair'].unique()

print(f'token pairs: {token_pairs_unique}')

token pairs: ['WETH-ZRO' 'USDC-WETH' 'USDC-WBTC' 'USDT-WETH' 'WBTC-WETH' 'ARB-WETH'
 'WETH-XAI' 'WETH-wstETH' 'DAI-USDC']


In [87]:
def token_pair_df_cleaned(df, token_pair, network):
    token_pair_df = df[df[f'{network}_token_pair']==f'{token_pair}']
    cleaned_token_pair_df = fee_tier_cleaning(token_pair_df, network)
    return cleaned_token_pair_df

In [88]:
usdc_weth = token_pair_df_cleaned(arb_pool_df, 'USDC-WETH', 'arbitrum') 
usdc_weth.columns


'H' is deprecated and will be removed in a future version, please use 'h' instead.


A value is trying to be set on a copy of a DataFrame or Series through chained assignment using an inplace method.
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.




A value is trying to be set on a copy of a DataFrame or Series through chained assignment using an inplace method.
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].

Index(['day', 'arbitrum_avg_liquidity_001', 'arbitrum_fee_apr_001',
       'arbitrum_fee_tier_001', 'arbitrum_fees_usd_001',
       'arbitrum_lp_addr_001', 'arbitrum_num_trades_001',
       'arbitrum_token_pair_001', 'arbitrum_tvl_usd_001',
       'arbitrum_volume_to_tvl_001', 'arbitrum_volume_usd_001',
       'arbitrum_net_liquidity_001', 'arbitrum_avg_liquidity_005',
       'arbitrum_fee_apr_005', 'arbitrum_fee_tier_005',
       'arbitrum_fees_usd_005', 'arbitrum_lp_addr_005',
       'arbitrum_num_trades_005', 'arbitrum_token_pair_005',
       'arbitrum_tvl_usd_005', 'arbitrum_volume_to_tvl_005',
       'arbitrum_volume_usd_005', 'arbitrum_net_liquidity_005',
       'arbitrum_avg_liquidity_030', 'arbitrum_fee_apr_030',
       'arbitrum_fee_tier_030', 'arbitrum_fees_usd_030',
       'arbitrum_lp_addr_030', 'arbitrum_num_trades_030',
       'arbitrum_token_pair_030', 'arbitrum_tvl_usd_030',
       'arbitrum_volume_to_tvl_030', 'arbitrum_volume_usd_030',
       'arbitrum_net_liquidity_0

In [89]:
arb_pool_df[(arb_pool_df['arbitrum_token_pair']=='DAI-USDC')]['arbitrum_fee_tier'].unique()

array(['0.01%', '0.05%', '0.30%'], dtype=object)

In [90]:
weth_zro_df = arb_pool_df[(arb_pool_df['arbitrum_token_pair']=='WETH-ZRO')]
weth_zro = fee_tier_cleaning(weth_zro_df, 'arbitrum') 


'H' is deprecated and will be removed in a future version, please use 'h' instead.


A value is trying to be set on a copy of a DataFrame or Series through chained assignment using an inplace method.
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.




A value is trying to be set on a copy of a DataFrame or Series through chained assignment using an inplace method.
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].

In [91]:
weth_zro.columns

Index(['day', 'arbitrum_avg_liquidity_030', 'arbitrum_fee_apr_030',
       'arbitrum_fee_tier_030', 'arbitrum_fees_usd_030',
       'arbitrum_lp_addr_030', 'arbitrum_num_trades_030',
       'arbitrum_token_pair_030', 'arbitrum_tvl_usd_030',
       'arbitrum_volume_to_tvl_030', 'arbitrum_volume_usd_030',
       'arbitrum_net_liquidity_030', 'arbitrum_avg_liquidity_100',
       'arbitrum_fee_apr_100', 'arbitrum_fee_tier_100',
       'arbitrum_fees_usd_100', 'arbitrum_lp_addr_100',
       'arbitrum_num_trades_100', 'arbitrum_token_pair_100',
       'arbitrum_tvl_usd_100', 'arbitrum_volume_to_tvl_100',
       'arbitrum_volume_usd_100', 'arbitrum_net_liquidity_100',
       'arbitrum_gas_usd_per_tx', 'arbitrum_median_gas_usd'],
      dtype='object')

In [92]:
dai_usdc_df = arb_pool_df[(arb_pool_df['arbitrum_token_pair']=='DAI-USDC')]
dai_usdc = fee_tier_cleaning(dai_usdc_df, 'arbitrum') 



'H' is deprecated and will be removed in a future version, please use 'h' instead.


A value is trying to be set on a copy of a DataFrame or Series through chained assignment using an inplace method.
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.




A value is trying to be set on a copy of a DataFrame or Series through chained assignment using an inplace method.
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].

In [93]:
dai_usdc.columns

Index(['day', 'arbitrum_avg_liquidity_001', 'arbitrum_fee_apr_001',
       'arbitrum_fee_tier_001', 'arbitrum_fees_usd_001',
       'arbitrum_lp_addr_001', 'arbitrum_num_trades_001',
       'arbitrum_token_pair_001', 'arbitrum_tvl_usd_001',
       'arbitrum_volume_to_tvl_001', 'arbitrum_volume_usd_001',
       'arbitrum_net_liquidity_001', 'arbitrum_avg_liquidity_005',
       'arbitrum_fee_apr_005', 'arbitrum_fee_tier_005',
       'arbitrum_fees_usd_005', 'arbitrum_lp_addr_005',
       'arbitrum_num_trades_005', 'arbitrum_token_pair_005',
       'arbitrum_tvl_usd_005', 'arbitrum_volume_to_tvl_005',
       'arbitrum_volume_usd_005', 'arbitrum_net_liquidity_005',
       'arbitrum_avg_liquidity_030', 'arbitrum_fee_apr_030',
       'arbitrum_fee_tier_030', 'arbitrum_fees_usd_030',
       'arbitrum_lp_addr_030', 'arbitrum_num_trades_030',
       'arbitrum_token_pair_030', 'arbitrum_tvl_usd_030',
       'arbitrum_volume_to_tvl_030', 'arbitrum_volume_usd_030',
       'arbitrum_net_liquidity_0

weth_zro_30['arbitrum_net_liquidity_.30%'] = weth_zro_30['arbitrum_avg_liquidity_.30%'] - weth_zro_30['arbitrum_avg_liquidity_.30%'].shift(1)
weth_zro_30['arbitrum_net_liquidity_.30%'].fillna(0, inplace=True) 
print(weth_zro_30[['day', 'arbitrum_avg_liquidity_.30%', 'arbitrum_net_liquidity_.30%']].head())

weth_zro_100['arbitrum_net_liquidity_1.0%'] = weth_zro_100['arbitrum_avg_liquidity_1.0%'] - weth_zro_100['arbitrum_avg_liquidity_1.0%'].shift(1)
weth_zro_100['arbitrum_net_liquidity_1.0%'].fillna(0, inplace=True) 
print(weth_zro_100[['day', 'arbitrum_avg_liquidity_1.0%', 'arbitrum_net_liquidity_1.0%']].head())

weth_zro = pd.merge(weth_zro_30, weth_zro_100, how='left', on=['day']) 
weth_zro = weth_zro.fillna(0)

In [94]:
def corr_matrix_graph(df, fee_tier):
    fee_tier_cols = [f'arbitrum_avg_liquidity_{fee_tier}', f'arbitrum_fees_usd_{fee_tier}', f'arbitrum_tvl_usd_{fee_tier}', f'arbitrum_volume_usd_{fee_tier}']
    corr_matrix = df[fee_tier_cols].corr()
    fig = go.Figure(data=go.Heatmap(
                   z=corr_matrix.values,
                   x=corr_matrix.columns,
                   y=corr_matrix.columns,
                   colorscale='Viridis',
                   zmin=-1, zmax=1))

    fig.update_layout(
        title='Correlation Matrix',
        xaxis_nticks=36
    )

    fig.show()
    return fig 

In [95]:
def trades_to_gas_chart(df, barmode='group'):
    df_fig = make_subplots(specs=[[{"secondary_y": True}]])
    
    suffixes = ['_030', '_005', '_001', '_100']
    for suffix in suffixes:
        liq_column = f'arbitrum_num_trades{suffix}'
        if liq_column in df.columns:
            df_fig.add_trace(
                go.Bar(
                    x=df['day'],
                    y=df[liq_column],
                    name=f'{suffix.replace("_", "")}% # of Trades'
                ),
                secondary_y=False
            )

    df_fig.add_trace(
        go.Scatter(
            x=df['day'],
            y=df['arbitrum_median_gas_usd'],
            name='Median Gas',
            mode='lines'
        ),
        secondary_y=True
    )

    df_fig.add_trace(
        go.Scatter(
            x=df['day'],
            y=df['arbitrum_gas_usd_per_tx'],
            name='Avg Gas',
            mode='lines'
        ),
        secondary_y=True
    )

    df_fig.update_layout(
        title='Trades to Gas Comparison',
        barmode=barmode  # Set the bar mode to either 'group' for side-by-side or 'stack' for stacked
    )

    df_fig.update_xaxes(title_text="Date")

    df_fig.show()

    return df_fig

In [96]:
def liq_to_gas_chart(df, barmode='group'):
    df_fig = make_subplots(specs=[[{"secondary_y": True}]])
    
    suffixes = ['_030', '_005', '_001', '_100']
    for suffix in suffixes:
        liq_column = f'arbitrum_avg_liquidity{suffix}'
        if liq_column in df.columns:
            df_fig.add_trace(
                go.Bar(
                    x=df['day'],
                    y=df[liq_column],
                    name=f'{suffix.replace("_", "")}% Liquidity'
                ),
                secondary_y=False
            )

    df_fig.add_trace(
        go.Scatter(
            x=df['day'],
            y=df['arbitrum_median_gas_usd'],
            name='Median Gas',
            mode='lines'
        ),
        secondary_y=True
    )

    df_fig.add_trace(
        go.Scatter(
            x=df['day'],
            y=df['arbitrum_gas_usd_per_tx'],
            name='Avg Gas',
            mode='lines'
        ),
        secondary_y=True
    )

    df_fig.update_layout(
        title='Liquidity to Gas Comparison',
        barmode=barmode  # Set the bar mode to either 'group' for side-by-side or 'stack' for stacked
    )

    df_fig.update_xaxes(title_text="Date")

    df_fig.show()

    return df_fig

In [97]:
def net_liq_to_gas_chart(df, barmode='group'):
    df_fig = make_subplots(specs=[[{"secondary_y": True}]])
    
    suffixes = ['_030', '_005', '_001', '_100']
    for suffix in suffixes:
        net_liq_column = f'arbitrum_net_liquidity{suffix}'
        if net_liq_column in df.columns:
            df_fig.add_trace(
                go.Bar(
                    x=df['day'],
                    y=df[net_liq_column],
                    name=f'{suffix.replace("_", "")}% Net Liquidity Added'
                ),
                secondary_y=False
            )

    df_fig.add_trace(
        go.Scatter(
            x=df['day'],
            y=df['arbitrum_median_gas_usd'],
            name='Median Gas',
            mode='lines'
        ),
        secondary_y=True
    )

    df_fig.add_trace(
        go.Scatter(
            x=df['day'],
            y=df['arbitrum_gas_usd_per_tx'],
            name='Avg Gas',
            mode='lines'
        ),
        secondary_y=True
    )

    df_fig.update_layout(
        title='Net Liquidity to Gas Comparison',
        barmode=barmode  # Set the bar mode to either 'group' for side-by-side or 'stack' for stacked
    )

    df_fig.update_xaxes(title_text="Date")

    df_fig.show()

    return df_fig

In [98]:
def vol_tvl_to_gas_chart(df, barmode='group'):
    df_fig = make_subplots(specs=[[{"secondary_y": True}]])
    
    suffixes = ['_030', '_005', '_001', '_100']
    for suffix in suffixes:
        vol_tvl_column = f'arbitrum_volume_to_tvl{suffix}'
        if vol_tvl_column in df.columns:
            df_fig.add_trace(
                go.Bar(
                    x=df['day'],
                    y=df[vol_tvl_column],
                    name=f'{suffix.replace("_", "")}% Volume to TVL Ratio'
                ),
                secondary_y=False
            )

    df_fig.add_trace(
        go.Scatter(
            x=df['day'],
            y=df['arbitrum_median_gas_usd'],
            name='Median Gas',
            mode='lines'
        ),
        secondary_y=True
    )

    df_fig.add_trace(
        go.Scatter(
            x=df['day'],
            y=df['arbitrum_gas_usd_per_tx'],
            name='Avg Gas',
            mode='lines'
        ),
        secondary_y=True
    )

    df_fig.update_layout(
        title='Volume/TVL Ratio to Gas Comparison',
        barmode=barmode  # Set the bar mode to either 'group' for side-by-side or 'stack' for stacked
    )

    df_fig.update_xaxes(title_text="Date")

    df_fig.show()

    return df_fig

In [99]:
def liquidity_to_trades_chart(df, barmode='group'):
    df_fig = make_subplots(specs=[[{"secondary_y": True}]])

    suffixes = ['_030', '_005', '_001', '_100']
    for suffix in suffixes:
        liquidity_column = f'arbitrum_avg_liquidity{suffix}'
        if liquidity_column in df.columns:
            df_fig.add_trace(
                go.Bar(
                    x=df['day'],
                    y=df[liquidity_column],
                    name=f'{suffix.replace("_", "")}% Liquidity'
                ),
                secondary_y=False
            )
            
    for suffix in suffixes:
        trades_column = f'arbitrum_num_trades{suffix}'
        if trades_column in df.columns:
            df_fig.add_trace(
                go.Scatter(
                x=df['day'],
                y=df[trades_column],
                name=f'{suffix.replace("_", "")}% # Of Trades',
                mode='lines'
            ),
            secondary_y=True
            )
    df_fig.update_layout(title='Liquidity to # of Trades Comparison',
        barmode=barmode)

    df_fig.update_xaxes(title_text="Date")

    df_fig.show()
    return df_fig


In [100]:
def gas_to_rev_chart(df, barmode='group'):
    df_fig = make_subplots(specs=[[{"secondary_y": True}]])
    
    suffixes = ['_030', '_005', '_001', '_100']
    for suffix in suffixes:
        fee_column = f'arbitrum_fees_usd{suffix}'
        if fee_column in df.columns:
            df_fig.add_trace(
                go.Bar(
                    x=df['day'],
                    y=df[fee_column],
                    name=f'{suffix.replace("_", "")}% Fee Revenue'
                ),
                secondary_y=False
            )

    df_fig.add_trace(
        go.Scatter(
            x=df['day'],
            y=df['arbitrum_median_gas_usd'],
            name='Median Gas',
            mode='lines'
        ),
        secondary_y=True
    )

    df_fig.add_trace(
        go.Scatter(
            x=df['day'],
            y=df['arbitrum_gas_usd_per_tx'],
            name='Avg Gas',
            mode='lines'
        ),
        secondary_y=True
    )

    df_fig.update_layout(
        title='Fee Revenue to Gas Comparison',
        barmode=barmode  # Set the bar mode to either 'group' for side-by-side or 'stack' for stacked
    )

    df_fig.update_xaxes(title_text="Date")

    df_fig.show()

    return df_fig

##### Volatile-Volatile Pair Visualizations
- WETH-ZRO

In [101]:
print([col for col in weth_zro.columns if col.startswith('arbitrum_avg_liquidity')])

['arbitrum_avg_liquidity_030', 'arbitrum_avg_liquidity_100']


In [102]:
weth_zro_corr_matrix = corr_matrix_graph(weth_zro, '030')

In [103]:
weth_zro_gas_to_rev_fig = gas_to_rev_chart(weth_zro, barmode='group')

In [104]:
weth_zro_liquidity_to_trades_fig = liquidity_to_trades_chart(weth_zro)

In [105]:
weth_zro_vol_tvl_ratio_fig = vol_tvl_to_gas_chart(weth_zro)

In [106]:
weth_zro_net_liq_to_gas_fig = net_liq_to_gas_chart(weth_zro)

In [107]:
weth_zro_liq_to_gas_fig = liq_to_gas_chart(weth_zro)

In [108]:
weth_zro_trades_to_gas_fig = trades_to_gas_chart(weth_zro)

##### Volatile-Stable Pair Visualizations

- USDC-WETH

In [109]:
usdc_weth_gas_to_rev_fig = gas_to_rev_chart(usdc_weth, barmode='stack')

In [110]:
usdc_weth_liquidity_to_trades_fig = liquidity_to_trades_chart(usdc_weth, barmode='stack')

In [111]:
usdc_weth_vol_tvl_ratio_fig = vol_tvl_to_gas_chart(usdc_weth, barmode='stack')

In [112]:
usdc_weth_net_liq_to_gas_fig = net_liq_to_gas_chart(usdc_weth, barmode='stack')

In [113]:
usdc_weth_liq_to_gas_fig = liq_to_gas_chart(usdc_weth, barmode='stack')

In [114]:
usdc_weth_trades_to_gas_fig = trades_to_gas_chart(usdc_weth, barmode='stack')

##### Stable-Stable Pair Visualizations
- DAI-USDC

In [115]:
dai_usdc_gas_to_rev_fig = gas_to_rev_chart(dai_usdc, barmode='group')

In [116]:
dai_usdc_liquidity_to_trades_fig = liquidity_to_trades_chart(dai_usdc, barmode='stack')

In [117]:
dai_usdc_vol_tvl_ratio_fig = vol_tvl_to_gas_chart(dai_usdc, barmode='stack')

In [118]:
dai_usdc_net_liq_to_gas_fig = net_liq_to_gas_chart(dai_usdc, barmode='stack')

In [119]:
dai_usdc_liq_to_gas_fig = liq_to_gas_chart(dai_usdc)

In [120]:
dai_usdc_trades_to_gas_fig = trades_to_gas_chart(dai_usdc)

## Optimism

## Base

## All Network Aggregation

### Aggregate Pool Data 

In [121]:
vol_vol = ['WBTC-WETH','ARB-WETH','WETH-ZRO','WETH-wstETH','WETH-XAI','OP-WETH','WETH-WLD','SNX-WETH','MAI-WETH',
           'MIGGLES-WETH','BRETT-WETH','AERO-WETH','DEGEN-WETH','TOSHI-WETH']
vol_stable = ['USDC-WETH','USDT-WETH','USDC-WBTC','USDC-WLD','USDbC-WETH']
stable_stable = ['DAI-USDC','USDC-USDC','USDC-USDT']

arb_pool_df.columns

arb_pool_filtered = arb_pool_df.drop(columns=['arbitrum_lp_addr','arbitrum_fee_tier','arbitrum_avg_liquidity'])

arb_vol_vol = arb_pool_filtered[arb_pool_filtered['arbitrum_token_pair'].isin(vol_vol)]
arb_vol_stable = arb_pool_filtered[arb_pool_filtered['arbitrum_token_pair'].isin(vol_stable)]
arb_stable_stable = arb_pool_filtered[arb_pool_filtered['arbitrum_token_pair'].isin(stable_stable)]


In [122]:
base_pool_df['day'] = pd.to_datetime(base_pool_df['day'])
base_pool_df.columns

base_pool_filtered = base_pool_df.drop(columns=['base_lp_addr','base_fee_tier','base_avg_liquidity']) 

base_vol_vol = base_pool_filtered[base_pool_filtered['base_token_pair'].isin(vol_vol)]
base_vol_stable = base_pool_filtered[base_pool_filtered['base_token_pair'].isin(vol_stable)]
base_stable_stable = base_pool_filtered[base_pool_filtered['base_token_pair'].isin(stable_stable)]


In [123]:
op_pool_df['day'] = pd.to_datetime(op_pool_df['day'])
op_pool_df.columns

op_pool_filtered = op_pool_df.drop(columns=['optimism_lp_addr','optimism_fee_tier','optimism_avg_liquidity']) 
op_vol_vol = op_pool_filtered[op_pool_filtered['optimism_token_pair'].isin(vol_vol)]
op_vol_stable = op_pool_filtered[op_pool_filtered['optimism_token_pair'].isin(vol_stable)]
op_stable_stable = op_pool_filtered[op_pool_filtered['optimism_token_pair'].isin(stable_stable)]


In [124]:
merged_stable_stable = pd.merge(op_stable_stable, base_stable_stable, on='day', how='outer')
merged_stable_stable = merged_stable_stable.merge(arb_stable_stable, on='day', how='outer')

merged_vol_vol = pd.merge(op_vol_vol, base_vol_vol, on='day', how='outer')
merged_vol_vol = merged_vol_vol.merge(arb_vol_vol, on='day', how='outer')

merged_vol_stable = pd.merge(op_vol_stable, base_vol_stable, on='day', how='outer')
merged_vol_stable = merged_vol_stable.merge(arb_vol_stable, on='day', how='outer')


In [125]:
merged_vol_vol['fee_apr'] = merged_vol_vol[['optimism_fee_apr', 'base_fee_apr','arbitrum_fee_apr']].mean(axis=1)
merged_vol_vol['fees_usd'] = merged_vol_vol[['optimism_fees_usd', 'base_fees_usd','arbitrum_fees_usd']].sum(axis=1)
merged_vol_vol['num_trades'] = merged_vol_vol[['optimism_num_trades', 'base_num_trades','arbitrum_num_trades']].sum(axis=1)
merged_vol_vol['tvl_usd'] = merged_vol_vol[['optimism_tvl_usd', 'base_tvl_usd','arbitrum_tvl_usd']].sum(axis=1)
merged_vol_vol['volume_to_tvl'] = merged_vol_vol[['optimism_volume_to_tvl', 'base_volume_to_tvl','arbitrum_volume_to_tvl']].mean(axis=1)
merged_vol_vol['volume_usd'] = merged_vol_vol[['optimism_volume_usd', 'base_volume_usd','arbitrum_volume_usd']].sum(axis=1)

In [126]:
merged_vol_stable['fee_apr'] = merged_vol_stable[['optimism_fee_apr', 'base_fee_apr','arbitrum_fee_apr']].mean(axis=1)
merged_vol_stable['fees_usd'] = merged_vol_stable[['optimism_fees_usd', 'base_fees_usd','arbitrum_fees_usd']].sum(axis=1)
merged_vol_stable['num_trades'] = merged_vol_stable[['optimism_num_trades', 'base_num_trades','arbitrum_num_trades']].sum(axis=1)
merged_vol_stable['tvl_usd'] = merged_vol_stable[['optimism_tvl_usd', 'base_tvl_usd','arbitrum_tvl_usd']].sum(axis=1)
merged_vol_stable['volume_to_tvl'] = merged_vol_stable[['optimism_volume_to_tvl', 'base_volume_to_tvl','arbitrum_volume_to_tvl']].mean(axis=1)
merged_vol_stable['volume_usd'] = merged_vol_stable[['optimism_volume_usd', 'base_volume_usd','arbitrum_volume_usd']].sum(axis=1)

In [127]:
merged_stable_stable['fee_apr'] = merged_stable_stable[['optimism_fee_apr', 'base_fee_apr','arbitrum_fee_apr']].mean(axis=1)
merged_stable_stable['fees_usd'] = merged_stable_stable[['optimism_fees_usd', 'base_fees_usd','arbitrum_fees_usd']].sum(axis=1)
merged_stable_stable['num_trades'] = merged_stable_stable[['optimism_num_trades', 'base_num_trades','arbitrum_num_trades']].sum(axis=1)
merged_stable_stable['tvl_usd'] = merged_stable_stable[['optimism_tvl_usd', 'base_tvl_usd','arbitrum_tvl_usd']].sum(axis=1)
merged_stable_stable['volume_to_tvl'] = merged_stable_stable[['optimism_volume_to_tvl', 'base_volume_to_tvl','arbitrum_volume_to_tvl']].mean(axis=1)
merged_stable_stable['volume_usd'] = merged_stable_stable[['optimism_volume_usd', 'base_volume_usd','arbitrum_volume_usd']].sum(axis=1)

In [128]:
agg_funcs = {
    'tvl_usd': 'sum',
    'num_trades': 'sum',
    'fee_apr': 'mean',
    'fees_usd': 'sum',
    'volume_usd': 'sum',
    'volume_to_tvl': 'mean'
}

def aggregate_metrics(df, agg_funcs):
    aggregated_df = df.groupby('day').agg(agg_funcs).reset_index()
    return aggregated_df

In [129]:
merged_stable_stable = merged_stable_stable[['day', 'fee_apr', 'fees_usd', 'num_trades', 'tvl_usd', 'volume_to_tvl', 'volume_usd']]
merged_vol_stable = merged_vol_stable[['day', 'fee_apr', 'fees_usd', 'num_trades', 'tvl_usd', 'volume_to_tvl', 'volume_usd']]
merged_vol_vol = merged_vol_vol[['day', 'fee_apr', 'fees_usd', 'num_trades', 'tvl_usd', 'volume_to_tvl', 'volume_usd']]

agg_stable_stable = aggregate_metrics(merged_stable_stable, agg_funcs)
agg_vol_stable = aggregate_metrics(merged_vol_stable, agg_funcs)
agg_vol_vol = aggregate_metrics(merged_vol_vol, agg_funcs)

### Aggregate Gas

In [130]:
arbitrum_gas_df

Unnamed: 0,day,arbitrum_gas_usd_per_tx,arbitrum_median_gas_usd
0,2024-04-22 00:00:00+00:00,0.010387,0.006123
1,2024-04-22 01:00:00+00:00,0.010970,0.006445
2,2024-04-22 02:00:00+00:00,0.009510,0.005550
3,2024-04-22 03:00:00+00:00,0.010174,0.005812
4,2024-04-22 04:00:00+00:00,0.009947,0.006139
...,...,...,...
2157,2024-07-20 21:00:00+00:00,0.009192,0.005052
2158,2024-07-20 22:00:00+00:00,0.010094,0.005563
2159,2024-07-20 23:00:00+00:00,0.009521,0.005383
2160,2024-07-21 00:00:00+00:00,0.009502,0.005291


In [131]:
optimism_gas_df.rename(columns={"dt":"day"}, inplace=True)
optimism_gas_df['day'] = pd.to_datetime(optimism_gas_df['day']) 

In [132]:
base_gas_df.rename(columns={"dt":"day"}, inplace=True)
base_gas_df['day'] = pd.to_datetime(base_gas_df['day']) 

In [133]:
merged_gas_df = pd.merge(arbitrum_gas_df,optimism_gas_df,how='inner',on='day' )
merged_gas_df = merged_gas_df.merge(base_gas_df, how='inner', on='day')
merged_gas_df.columns

Index(['day', 'arbitrum_gas_usd_per_tx', 'arbitrum_median_gas_usd',
       'optimism_gas_usd_per_tx', 'optimism_median_gas_usd',
       'base_gas_usd_per_tx', 'base_median_gas_usd'],
      dtype='object')

In [134]:
merged_gas_df['avg_gas'] = merged_gas_df[['arbitrum_gas_usd_per_tx','optimism_gas_usd_per_tx','base_gas_usd_per_tx']].mean(axis=1)
merged_gas_df['median_gas'] = merged_gas_df[['arbitrum_median_gas_usd','optimism_median_gas_usd','base_median_gas_usd']].mean(axis=1)
merged_gas_df = merged_gas_df[['day','avg_gas','median_gas']]
agg_gas_df = merged_gas_df.groupby('day').mean()
agg_gas_df


Unnamed: 0_level_0,avg_gas,median_gas
day,Unnamed: 1_level_1,Unnamed: 2_level_1
2024-04-22 00:00:00+00:00,0.032086,0.006833
2024-04-22 01:00:00+00:00,0.041133,0.007923
2024-04-22 02:00:00+00:00,0.030572,0.007440
2024-04-22 03:00:00+00:00,0.067419,0.010030
2024-04-22 04:00:00+00:00,0.038169,0.010339
...,...,...
2024-07-20 21:00:00+00:00,0.085266,0.014616
2024-07-20 22:00:00+00:00,0.056465,0.014726
2024-07-20 23:00:00+00:00,0.162665,0.011997
2024-07-21 00:00:00+00:00,0.059415,0.013390


### Combined Gas and Pools

In [135]:
agg_stable_stable = agg_stable_stable.merge(agg_gas_df, how='inner', on='day')
agg_vol_stable = agg_vol_stable.merge(agg_gas_df, how='inner', on='day')
agg_vol_vol = agg_vol_vol.merge(agg_gas_df, how='inner', on='day')

## Trader Behavior

In [136]:
trader_classifier_df['dt'] = pd.to_datetime(trader_classifier_df['dt'])
trader_classifier_df.rename(columns={"dt":"day"}, inplace=True)
trader_classifier_df

Unnamed: 0,trader_type,day,tx_count,total_volume_usd,avg_order_size_usd,unique_contracts
0,Professional,2024-07-21 15:00:00+00:00,928,1047810.93,1183.967153,104
1,Retail,2024-07-21 15:00:00+00:00,176,91856.49,540.332294,60
2,Professional,2024-07-21 14:00:00+00:00,7552,12913378.38,1766.536030,252
3,Retail,2024-07-21 14:00:00+00:00,1790,294765.21,170.680492,196
4,Professional,2024-07-21 13:00:00+00:00,4472,5883981.27,1367.731583,211
...,...,...,...,...,...,...
18553,Retail,2023-07-01 02:00:00+00:00,1170,278669.36,432.716398,108
18554,Professional,2023-07-01 01:00:00+00:00,4263,12052488.42,3328.497216,161
18555,Retail,2023-07-01 01:00:00+00:00,1254,301856.49,417.505519,110
18556,Professional,2023-07-01 00:00:00+00:00,4124,11335892.35,3270.597908,176


In [137]:
trader_pivot_df = trader_classifier_df.pivot(
    index='day',
    columns='trader_type',
    values=['tx_count', 'total_volume_usd', 'avg_order_size_usd', 'unique_contracts']
)
trader_pivot_df.columns = [f'{col[0]}_{col[1]}' for col in trader_pivot_df.columns]
trader_pivot_df


Unnamed: 0_level_0,tx_count_Professional,tx_count_Retail,total_volume_usd_Professional,total_volume_usd_Retail,avg_order_size_usd_Professional,avg_order_size_usd_Retail,unique_contracts_Professional,unique_contracts_Retail
day,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2023-07-01 00:00:00+00:00,4124.0,1084.0,11335892.35,402822.79,3270.597908,613.124490,176.0,116.0
2023-07-01 01:00:00+00:00,4263.0,1254.0,12052488.42,301856.49,3328.497216,417.505519,161.0,110.0
2023-07-01 02:00:00+00:00,2584.0,1170.0,6595664.68,278669.36,3111.162585,432.716398,135.0,108.0
2023-07-01 03:00:00+00:00,1365.0,1203.0,3831481.06,164496.99,3534.576624,239.442489,127.0,115.0
2023-07-01 04:00:00+00:00,3140.0,1137.0,7284528.45,242939.29,2797.437961,354.655898,157.0,107.0
...,...,...,...,...,...,...,...,...
2024-07-21 11:00:00+00:00,3237.0,1605.0,3745368.28,212027.58,1203.911373,136.703791,206.0,169.0
2024-07-21 12:00:00+00:00,2894.0,1769.0,3148389.76,269737.81,1120.423402,159.419509,176.0,174.0
2024-07-21 13:00:00+00:00,4472.0,1802.0,5883981.27,228260.23,1367.731583,132.401526,211.0,178.0
2024-07-21 14:00:00+00:00,7552.0,1790.0,12913378.38,294765.21,1766.536030,170.680492,252.0,196.0


In [138]:
arbitrum_gas_df.describe()

Unnamed: 0,arbitrum_gas_usd_per_tx,arbitrum_median_gas_usd
count,2162.0,2162.0
mean,0.023634,0.01286
std,0.22163,0.129315
min,0.004397,0.001491
25%,0.006888,0.004532
50%,0.008472,0.005564
75%,0.010704,0.007182
max,6.944713,4.18109


In [139]:
combined_trader_data = pd.merge(trader_pivot_df, arbitrum_gas_df, how='inner', on='day')


In [140]:
combined_trader_data.isna().sum().sum()

np.int64(0)

In [141]:
trader_classifier_tx_fig = make_subplots(specs=[[{"secondary_y": True}]])
    
trader_classifier_tx_fig.add_trace(
    go.Bar(
        x=combined_trader_data['day'],
        y=combined_trader_data['tx_count_Professional'],
        name='Professional Tx'
    ),
    secondary_y=False
)
trader_classifier_tx_fig.add_trace(
    go.Bar(
        x=combined_trader_data['day'],
        y=combined_trader_data['tx_count_Retail'],
        name='Retail Tx'
    ),
    secondary_y=False
)


trader_classifier_tx_fig.add_trace(
    go.Scatter(
        x=combined_trader_data['day'],
        y=combined_trader_data['arbitrum_median_gas_usd'],
        name='Median Gas',
        mode='lines'
    ),
    secondary_y=True
)

trader_classifier_tx_fig.add_trace(
    go.Scatter(
        x=combined_trader_data['day'],
        y=combined_trader_data['arbitrum_gas_usd_per_tx'],
        name='Avg Gas',
        mode='lines'
    ),
    secondary_y=True
)

trader_classifier_tx_fig.update_layout(
    title='Trader Type Tx to Gas',
    barmode='stack'  # Set the bar mode to either 'group' for side-by-side or 'stack' for stacked
)

trader_classifier_tx_fig.update_xaxes(title_text="Date")

trader_classifier_tx_fig.show()

In [142]:
trader_classifier_vol_fig = make_subplots(specs=[[{"secondary_y": True}]])
    
trader_classifier_vol_fig.add_trace(
    go.Bar(
        x=combined_trader_data['day'],
        y=combined_trader_data['total_volume_usd_Professional'],
        name='Professional Vol'
    ),
    secondary_y=False
)
trader_classifier_vol_fig.add_trace(
    go.Bar(
        x=combined_trader_data['day'],
        y=combined_trader_data['total_volume_usd_Retail'],
        name='Retail Vol'
    ),
    secondary_y=False
)


trader_classifier_vol_fig.add_trace(
    go.Scatter(
        x=combined_trader_data['day'],
        y=combined_trader_data['arbitrum_median_gas_usd'],
        name='Median Gas',
        mode='lines'
    ),
    secondary_y=True
)

trader_classifier_vol_fig.add_trace(
    go.Scatter(
        x=combined_trader_data['day'],
        y=combined_trader_data['arbitrum_gas_usd_per_tx'],
        name='Avg Gas',
        mode='lines'
    ),
    secondary_y=True
)

trader_classifier_vol_fig.update_layout(
    title='Trader Type Vol to Gas',
    barmode='stack'  # Set the bar mode to either 'group' for side-by-side or 'stack' for stacked
)

trader_classifier_vol_fig.update_xaxes(title_text="Date")

trader_classifier_vol_fig.show()

In [143]:
trader_classifier_order_fig = make_subplots(specs=[[{"secondary_y": True}]])
    
trader_classifier_order_fig.add_trace(
    go.Bar(
        x=combined_trader_data['day'],
        y=combined_trader_data['avg_order_size_usd_Professional'],
        name='Professional Avg Order Size'
    ),
    secondary_y=False
)
trader_classifier_order_fig.add_trace(
    go.Bar(
        x=combined_trader_data['day'],
        y=combined_trader_data['avg_order_size_usd_Retail'],
        name='Retail Avg Order Size'
    ),
    secondary_y=False
)


trader_classifier_order_fig.add_trace(
    go.Scatter(
        x=combined_trader_data['day'],
        y=combined_trader_data['arbitrum_median_gas_usd'],
        name='Median Gas',
        mode='lines'
    ),
    secondary_y=True
)

trader_classifier_order_fig.add_trace(
    go.Scatter(
        x=combined_trader_data['day'],
        y=combined_trader_data['arbitrum_gas_usd_per_tx'],
        name='Avg Gas',
        mode='lines'
    ),
    secondary_y=True
)

trader_classifier_order_fig.update_layout(
    title='Trader Type Avg Order Size to Gas',
    barmode='stack'  # Set the bar mode to either 'group' for side-by-side or 'stack' for stacked
)

trader_classifier_order_fig.update_xaxes(title_text="Date")

trader_classifier_order_fig.show()

In [144]:
trader_classifier_contracts_fig = make_subplots(specs=[[{"secondary_y": True}]])
    
trader_classifier_contracts_fig.add_trace(
    go.Bar(
        x=combined_trader_data['day'],
        y=combined_trader_data['unique_contracts_Professional'],
        name='Professional # Unique Contracts'
    ),
    secondary_y=False
)
trader_classifier_contracts_fig.add_trace(
    go.Bar(
        x=combined_trader_data['day'],
        y=combined_trader_data['unique_contracts_Retail'],
        name='Retail # Unique Contracts'
    ),
    secondary_y=False
)


trader_classifier_contracts_fig.add_trace(
    go.Scatter(
        x=combined_trader_data['day'],
        y=combined_trader_data['arbitrum_median_gas_usd'],
        name='Median Gas',
        mode='lines'
    ),
    secondary_y=True
)

trader_classifier_contracts_fig.add_trace(
    go.Scatter(
        x=combined_trader_data['day'],
        y=combined_trader_data['arbitrum_gas_usd_per_tx'],
        name='Avg Gas',
        mode='lines'
    ),
    secondary_y=True
)

trader_classifier_contracts_fig.update_layout(
    title='Trader Type # of Contract Interactions to Gas',
    barmode='stack'  # Set the bar mode to either 'group' for side-by-side or 'stack' for stacked
)

trader_classifier_contracts_fig.update_xaxes(title_text="Date")

trader_classifier_contracts_fig.show()

# Gas Fee Correlation Analyses

## Pearson Correlation

### Arbitrum

#### Aggregate Correlations

In [145]:
aggregated_arb_hour.columns

Index(['day', 'arbitrum_fees_usd', 'arbitrum_tvl_usd', 'arbitrum_volume_usd',
       'arbitrum_num_trades', 'arbitrum_gas_usd_per_tx',
       'arbitrum_median_gas_usd', 'arbitrum_volume_to_tvl'],
      dtype='object')

##### Avg Gas

In [146]:
aggregated_correlations = aggregated_arb_hour.corr()
avg_gas_correlations = aggregated_correlations['arbitrum_gas_usd_per_tx'].sort_values(ascending=False)
for index, value in avg_gas_correlations.items():
    print(f"{index:50} {value}")

arbitrum_gas_usd_per_tx                            1.0
arbitrum_median_gas_usd                            0.9219147473986707
arbitrum_volume_usd                                0.4242897969256834
arbitrum_fees_usd                                  0.41528832482832984
arbitrum_volume_to_tvl                             0.40799098455068333
arbitrum_num_trades                                0.37050902566466637
arbitrum_tvl_usd                                   0.26224181865538926
day                                                0.09103870271185989


##### Median Gas

In [147]:
aggregated_correlations = aggregated_arb_hour.corr()
avg_gas_correlations = aggregated_correlations['arbitrum_median_gas_usd'].sort_values(ascending=False)
for index, value in avg_gas_correlations.items():
    print(f"{index:50} {value}")

arbitrum_median_gas_usd                            1.0
arbitrum_gas_usd_per_tx                            0.9219147473986707
arbitrum_volume_usd                                0.49951630776691286
arbitrum_volume_to_tvl                             0.48199175789463616
arbitrum_fees_usd                                  0.4768814541769718
arbitrum_num_trades                                0.44492753979708344
arbitrum_tvl_usd                                   0.2713983969351232
day                                                0.05780970281930259


#### Volatile-Volatile Pair

In [148]:
numeric_weth_zro = weth_zro.select_dtypes(include=[np.number])
numeric_weth_zro.columns

Index(['arbitrum_avg_liquidity_030', 'arbitrum_fee_apr_030',
       'arbitrum_fees_usd_030', 'arbitrum_num_trades_030',
       'arbitrum_tvl_usd_030', 'arbitrum_volume_to_tvl_030',
       'arbitrum_volume_usd_030', 'arbitrum_net_liquidity_030',
       'arbitrum_avg_liquidity_100', 'arbitrum_fee_apr_100',
       'arbitrum_fees_usd_100', 'arbitrum_num_trades_100',
       'arbitrum_tvl_usd_100', 'arbitrum_volume_to_tvl_100',
       'arbitrum_volume_usd_100', 'arbitrum_net_liquidity_100',
       'arbitrum_gas_usd_per_tx', 'arbitrum_median_gas_usd'],
      dtype='object')

##### Avg Gas

In [149]:
volatile_volatile_correlations = numeric_weth_zro.corr()
avg_gas_vol_vol_correlations = volatile_volatile_correlations['arbitrum_gas_usd_per_tx'].sort_values(ascending=False)
for index, value in avg_gas_vol_vol_correlations.items():
    print(f"{index:50} {value}")

arbitrum_gas_usd_per_tx                            1.0
arbitrum_median_gas_usd                            0.9219147473986707
arbitrum_avg_liquidity_030                         0.25354986008866914
arbitrum_net_liquidity_030                         0.21042355710465704
arbitrum_tvl_usd_030                               0.017530381349635242
arbitrum_fee_apr_030                               -0.051862560348301154
arbitrum_volume_to_tvl_030                         -0.05186256034831916
arbitrum_fees_usd_030                              -0.05353216728653922
arbitrum_volume_usd_030                            -0.053532167286539256
arbitrum_avg_liquidity_100                         -0.05831016881479734
arbitrum_net_liquidity_100                         -0.060491327241270335
arbitrum_num_trades_030                            -0.06552538246788381
arbitrum_volume_usd_100                            -0.09753257858185442
arbitrum_fees_usd_100                              -0.0975325785818545
arbitrum_fe

##### Median Gas

In [150]:
volatile_volatile_correlations = numeric_weth_zro.corr()
avg_gas_vol_vol_correlations = volatile_volatile_correlations['arbitrum_median_gas_usd'].sort_values(ascending=False)
for index, value in avg_gas_vol_vol_correlations.items():
    print(f"{index:50} {value}")

arbitrum_median_gas_usd                            1.0
arbitrum_gas_usd_per_tx                            0.9219147473986707
arbitrum_avg_liquidity_030                         0.3391716995209209
arbitrum_net_liquidity_030                         0.2115459602817172
arbitrum_avg_liquidity_100                         -0.02553279377052578
arbitrum_tvl_usd_030                               -0.030126927728474442
arbitrum_fee_apr_030                               -0.0965406755714302
arbitrum_volume_to_tvl_030                         -0.09654067557144701
arbitrum_fees_usd_030                              -0.10228510132660473
arbitrum_volume_usd_030                            -0.10228510132660476
arbitrum_net_liquidity_100                         -0.10530853063334707
arbitrum_fees_usd_100                              -0.1080173357661392
arbitrum_volume_usd_100                            -0.1080173357661392
arbitrum_num_trades_030                            -0.1115081580477517
arbitrum_fee_apr_1

#### Volatile-Stable Pair

In [151]:
numeric_usdc_weth = usdc_weth.select_dtypes(include=[np.number])
numeric_usdc_weth.columns

Index(['arbitrum_avg_liquidity_001', 'arbitrum_fee_apr_001',
       'arbitrum_fees_usd_001', 'arbitrum_num_trades_001',
       'arbitrum_tvl_usd_001', 'arbitrum_volume_to_tvl_001',
       'arbitrum_volume_usd_001', 'arbitrum_net_liquidity_001',
       'arbitrum_avg_liquidity_005', 'arbitrum_fee_apr_005',
       'arbitrum_fees_usd_005', 'arbitrum_num_trades_005',
       'arbitrum_tvl_usd_005', 'arbitrum_volume_to_tvl_005',
       'arbitrum_volume_usd_005', 'arbitrum_net_liquidity_005',
       'arbitrum_avg_liquidity_030', 'arbitrum_fee_apr_030',
       'arbitrum_fees_usd_030', 'arbitrum_num_trades_030',
       'arbitrum_tvl_usd_030', 'arbitrum_volume_to_tvl_030',
       'arbitrum_volume_usd_030', 'arbitrum_net_liquidity_030',
       'arbitrum_avg_liquidity_100', 'arbitrum_fee_apr_100',
       'arbitrum_fees_usd_100', 'arbitrum_num_trades_100',
       'arbitrum_tvl_usd_100', 'arbitrum_volume_to_tvl_100',
       'arbitrum_volume_usd_100', 'arbitrum_net_liquidity_100',
       'arbitrum_gas

##### Avg Gas

In [152]:
volatile_stable_correlations = numeric_usdc_weth.corr()
avg_gas_vol_stb_correlations = volatile_stable_correlations['arbitrum_gas_usd_per_tx'].sort_values(ascending=False)
for index, value in avg_gas_vol_stb_correlations.items():
    print(f"{index:50} {value}")

arbitrum_gas_usd_per_tx                            1.0
arbitrum_median_gas_usd                            0.9219147473986707
arbitrum_volume_usd_100                            0.4700260513977799
arbitrum_fees_usd_100                              0.4700260513977798
arbitrum_fee_apr_100                               0.45792402761956075
arbitrum_volume_to_tvl_100                         0.4579240276195266
arbitrum_fees_usd_005                              0.4409119569836468
arbitrum_volume_usd_005                            0.44091195698364677
arbitrum_volume_to_tvl_005                         0.4403496460484363
arbitrum_fee_apr_005                               0.44034964604820875
arbitrum_num_trades_005                            0.38073707616052516
arbitrum_num_trades_100                            0.35894404279143305
arbitrum_fee_apr_030                               0.34382203251440707
arbitrum_volume_to_tvl_030                         0.34382203251425403
arbitrum_volume_usd_030     

##### Median Gas

In [153]:
volatile_stable_correlations = numeric_usdc_weth.corr()
avg_gas_vol_stb_correlations = volatile_stable_correlations['arbitrum_median_gas_usd'].sort_values(ascending=False)
for index, value in avg_gas_vol_stb_correlations.items():
    print(f"{index:50} {value}")

arbitrum_median_gas_usd                            1.0
arbitrum_gas_usd_per_tx                            0.9219147473986707
arbitrum_volume_usd_005                            0.5214916410794086
arbitrum_fees_usd_005                              0.5214916410794082
arbitrum_volume_to_tvl_005                         0.5213532090575917
arbitrum_fee_apr_005                               0.5213532090574425
arbitrum_fees_usd_100                              0.5041360220302444
arbitrum_volume_usd_100                            0.5041360220302443
arbitrum_fee_apr_100                               0.4878219027281941
arbitrum_volume_to_tvl_100                         0.48782190272817083
arbitrum_num_trades_005                            0.4620831874842367
arbitrum_num_trades_100                            0.38330827055972777
arbitrum_fee_apr_030                               0.37184953806216436
arbitrum_volume_to_tvl_030                         0.37184953806192683
arbitrum_fees_usd_030          

#### Stable-Stable Pair

In [154]:
numeric_dai_usdc = dai_usdc.select_dtypes(include=[np.number])
numeric_dai_usdc.columns

Index(['arbitrum_avg_liquidity_001', 'arbitrum_fee_apr_001',
       'arbitrum_fees_usd_001', 'arbitrum_num_trades_001',
       'arbitrum_tvl_usd_001', 'arbitrum_volume_to_tvl_001',
       'arbitrum_volume_usd_001', 'arbitrum_net_liquidity_001',
       'arbitrum_avg_liquidity_005', 'arbitrum_fee_apr_005',
       'arbitrum_fees_usd_005', 'arbitrum_num_trades_005',
       'arbitrum_tvl_usd_005', 'arbitrum_volume_to_tvl_005',
       'arbitrum_volume_usd_005', 'arbitrum_net_liquidity_005',
       'arbitrum_avg_liquidity_030', 'arbitrum_fee_apr_030',
       'arbitrum_fees_usd_030', 'arbitrum_num_trades_030',
       'arbitrum_tvl_usd_030', 'arbitrum_volume_to_tvl_030',
       'arbitrum_volume_usd_030', 'arbitrum_net_liquidity_030',
       'arbitrum_gas_usd_per_tx', 'arbitrum_median_gas_usd'],
      dtype='object')

##### Avg Gas

In [155]:
stable_stable_correlations = numeric_dai_usdc.corr()
avg_gas_stb_stb_correlations = stable_stable_correlations['arbitrum_gas_usd_per_tx'].sort_values(ascending=False)
for index, value in avg_gas_stb_stb_correlations.items():
    print(f"{index:50} {value}")

arbitrum_gas_usd_per_tx                            1.0
arbitrum_median_gas_usd                            0.9219147473986707
arbitrum_avg_liquidity_001                         0.12730769752960172
arbitrum_tvl_usd_001                               0.12702007492519254
arbitrum_num_trades_001                            0.1097571037535162
arbitrum_net_liquidity_001                         0.0966246441697817
arbitrum_avg_liquidity_005                         0.09132628086178862
arbitrum_tvl_usd_005                               0.08729345948838327
arbitrum_fee_apr_005                               0.04273233854987819
arbitrum_volume_to_tvl_005                         0.04273233854973981
arbitrum_volume_usd_005                            0.042731153988575056
arbitrum_fees_usd_005                              0.04273115398857502
arbitrum_net_liquidity_005                         0.032751672942148376
arbitrum_fees_usd_001                              0.01138860930518961
arbitrum_volume_usd_001

##### Median Gas

In [156]:
stable_stable_correlations = numeric_dai_usdc.corr()
avg_gas_stb_stb_correlations = stable_stable_correlations['arbitrum_median_gas_usd'].sort_values(ascending=False)
for index, value in avg_gas_stb_stb_correlations.items():
    print(f"{index:50} {value}")

arbitrum_median_gas_usd                            1.0
arbitrum_gas_usd_per_tx                            0.9219147473986707
arbitrum_avg_liquidity_001                         0.16461007302190472
arbitrum_net_liquidity_001                         0.11581642662334317
arbitrum_tvl_usd_001                               0.11161078630659658
arbitrum_num_trades_001                            0.09823470599977475
arbitrum_avg_liquidity_005                         0.08183807364520998
arbitrum_tvl_usd_005                               0.07342033318636268
arbitrum_net_liquidity_005                         0.01689660681676992
arbitrum_volume_usd_005                            0.009952323851284528
arbitrum_fees_usd_005                              0.009952323851284497
arbitrum_fee_apr_005                               0.009951992001027395
arbitrum_volume_to_tvl_005                         0.009951992000977438
arbitrum_volume_usd_001                            0.0035851562419646345
arbitrum_fees_usd

### Aggregated Across Networks

#### Volatile-Volatile Pair

##### Avg Gas

In [157]:
agg_vol_vol_correlations = agg_vol_vol.corr()
agg_avg_gas_vol_vol_correlations = agg_vol_vol_correlations['avg_gas'].sort_values(ascending=False)
for index, value in agg_avg_gas_vol_vol_correlations.items():
    print(f"{index:50} {value}")

avg_gas                                            1.0
median_gas                                         0.7556881810808027
day                                                0.6349357401388742
fees_usd                                           0.4641661414650002
num_trades                                         0.24758714924538514
volume_usd                                         0.23513111586108928
tvl_usd                                            0.19710522465586838
fee_apr                                            0.1388552795661063
volume_to_tvl                                      -0.008537467197076324


##### Median Gas

In [158]:
agg_vol_vol_correlations = agg_vol_vol.corr()
agg_avg_gas_vol_vol_correlations = agg_vol_vol_correlations['median_gas'].sort_values(ascending=False)
for index, value in agg_avg_gas_vol_vol_correlations.items():
    print(f"{index:50} {value}")

median_gas                                         1.0
avg_gas                                            0.7556881810808027
day                                                0.7088782070530033
fees_usd                                           0.6713923646322593
num_trades                                         0.47421910479145085
volume_usd                                         0.42396820245968536
tvl_usd                                            0.3813483069075064
fee_apr                                            0.17192325674652204
volume_to_tvl                                      0.02417820954193431


#### Volatile-Stable Pair

##### Avg Gas

In [159]:
agg_vol_stable_correlations = agg_vol_stable.corr()
agg_avg_gas_vol_stb_correlations = agg_vol_stable_correlations['avg_gas'].sort_values(ascending=False)
for index, value in agg_avg_gas_vol_stb_correlations.items():
    print(f"{index:50} {value}")

avg_gas                                            1.0
median_gas                                         0.7556881810808027
day                                                0.6349357401388742
volume_to_tvl                                      0.14433603447796456
fees_usd                                           0.13681852676242903
num_trades                                         0.11477085153311657
volume_usd                                         0.10076710631015846
fee_apr                                            0.08825889001878766
tvl_usd                                            -0.2577297595719947


##### Median Gas

In [160]:
agg_vol_stable_correlations = agg_vol_stable.corr()
agg_avg_gas_vol_stb_correlations = agg_vol_stable_correlations['median_gas'].sort_values(ascending=False)
for index, value in agg_avg_gas_vol_stb_correlations.items():
    print(f"{index:50} {value}")

median_gas                                         1.0
avg_gas                                            0.7556881810808027
day                                                0.7088782070530033
volume_to_tvl                                      0.3038295895141003
num_trades                                         0.2840103450529939
fees_usd                                           0.27462290903342956
volume_usd                                         0.2596204240464476
fee_apr                                            0.1326250830489416
tvl_usd                                            -0.2154830528359862


#### Stable-Stable Pair

##### Avg Gas

In [161]:
agg_stable_stable_correlations = agg_stable_stable.corr()
agg_avg_gas_stb_stb_correlations = agg_stable_stable_correlations['avg_gas'].sort_values(ascending=False)
for index, value in agg_avg_gas_stb_stb_correlations.items():
    print(f"{index:50} {value}")

avg_gas                                            1.0
median_gas                                         0.7556881810808027
day                                                0.6349357401388742
volume_usd                                         0.2348138854250011
fees_usd                                           0.23461802966768375
tvl_usd                                            0.19109866207681434
volume_to_tvl                                      0.10929782283829688
num_trades                                         0.07397000720382309
fee_apr                                            -0.02254151114657157


##### Median Gas

In [162]:
agg_stable_stable_correlations = agg_stable_stable.corr()
agg_avg_gas_stb_stb_correlations = agg_stable_stable_correlations['median_gas'].sort_values(ascending=False)
for index, value in agg_avg_gas_stb_stb_correlations.items():
    print(f"{index:50} {value}")

median_gas                                         1.0
avg_gas                                            0.7556881810808027
day                                                0.7088782070530033
tvl_usd                                            0.2340635146259494
fees_usd                                           0.21940140535631358
volume_usd                                         0.21937800308445796
num_trades                                         0.16483781533510958
volume_to_tvl                                      0.08789957241293926
fee_apr                                            -0.046386271126893294


### Trader Classifier

#### Avg Gas

In [163]:
agg_trader_correlations = combined_trader_data.corr()
agg_avg_gas_trader_correlations = agg_trader_correlations['arbitrum_gas_usd_per_tx'].sort_values(ascending=False)
for index, value in agg_avg_gas_trader_correlations.items():
    print(f"{index:50} {value}")

arbitrum_gas_usd_per_tx                            1.0
arbitrum_median_gas_usd                            0.979236564615026
avg_order_size_usd_Professional                    0.310516802297874
avg_order_size_usd_Retail                          0.26060108805056065
total_volume_usd_Retail                            0.1921841901445734
tx_count_Retail                                    0.14835145999790766
total_volume_usd_Professional                      0.12720543393156963
tx_count_Professional                              0.044145634621361146
day                                                0.030522173314481066
unique_contracts_Professional                      -0.031806421361717775
unique_contracts_Retail                            -0.07169943478120121


#### Median Gas

In [164]:
agg_trader_correlations = combined_trader_data.corr()
agg_avg_gas_trader_correlations = agg_trader_correlations['arbitrum_median_gas_usd'].sort_values(ascending=False)
for index, value in agg_avg_gas_trader_correlations.items():
    print(f"{index:50} {value}")

arbitrum_median_gas_usd                            1.0
arbitrum_gas_usd_per_tx                            0.979236564615026
avg_order_size_usd_Professional                    0.2563694480545029
avg_order_size_usd_Retail                          0.21403421781193524
total_volume_usd_Retail                            0.15487955252868985
tx_count_Retail                                    0.1300906385398528
total_volume_usd_Professional                      0.06123288731210473
day                                                0.02557411266112926
tx_count_Professional                              0.004095209303444803
unique_contracts_Professional                      -0.05762517716873923
unique_contracts_Retail                            -0.08720646063151573


# Sim Target Variable Correlation Analyses
- Want to look at the potential to simulate individual vaults; potentially move on to aggregated metrics

## Volume Target

### Arbitrum Aggregated

In [165]:
aggregated_arb_hour.columns

Index(['day', 'arbitrum_fees_usd', 'arbitrum_tvl_usd', 'arbitrum_volume_usd',
       'arbitrum_num_trades', 'arbitrum_gas_usd_per_tx',
       'arbitrum_median_gas_usd', 'arbitrum_volume_to_tvl'],
      dtype='object')

In [166]:
combined_trader_data.columns

Index(['day', 'tx_count_Professional', 'tx_count_Retail',
       'total_volume_usd_Professional', 'total_volume_usd_Retail',
       'avg_order_size_usd_Professional', 'avg_order_size_usd_Retail',
       'unique_contracts_Professional', 'unique_contracts_Retail',
       'arbitrum_gas_usd_per_tx', 'arbitrum_median_gas_usd'],
      dtype='object')

In [167]:
prices_vol_df_pivot.columns

Index(['BTC_Price', 'ETH_Price'], dtype='object')

In [173]:
# Combine pool data with trader data
pool_trader_arb_data = pd.merge(aggregated_arb_hour.drop(columns=['arbitrum_gas_usd_per_tx',
       'arbitrum_median_gas_usd']), combined_trader_data, how='inner', on='day')
pool_trader_arb_data = pool_trader_arb_data.merge(prices_vol_df_pivot, how='left', on='day')

In [174]:
correlation_volume = pool_trader_arb_data.corr()['arbitrum_volume_usd'].sort_values(ascending=False)
print("Correlation with Volume:")
print(correlation_volume)

Correlation with Volume:
arbitrum_volume_usd                1.000000
arbitrum_volume_to_tvl             0.997916
total_volume_usd_Professional      0.995487
arbitrum_fees_usd                  0.974048
arbitrum_num_trades                0.962131
tx_count_Professional              0.955153
unique_contracts_Professional      0.820068
avg_order_size_usd_Professional    0.815792
total_volume_usd_Retail            0.675692
avg_order_size_usd_Retail          0.574786
unique_contracts_Retail            0.569104
arbitrum_median_gas_usd            0.499516
arbitrum_gas_usd_per_tx            0.424290
arbitrum_tvl_usd                   0.208078
tx_count_Retail                    0.142296
ETH_Price                          0.113486
arbitrum_vol_ex_uni                0.032681
BTC_Price                          0.015187
day                               -0.133610
Name: arbitrum_volume_usd, dtype: float64


### All Network Aggregated

#### Volatile-Volatile Pair

#### Volatile-Stable Pair

#### Stable-Stable Pair

## Fee Target

### Arbitrum Aggregated 

In [424]:
correlation_fees  = pool_trader_arb_data.corr()['arbitrum_fees_usd'].sort_values(ascending=False)
print("Correlation with Fees:")
print(correlation_fees)

Correlation with Fees:
arbitrum_fees_usd                  1.000000
arbitrum_volume_usd                0.974048
total_volume_usd_Professional      0.969017
arbitrum_num_trades                0.958059
tx_count_Professional              0.940292
unique_contracts_Professional      0.799207
avg_order_size_usd_Professional    0.765309
total_volume_usd_Retail            0.669009
unique_contracts_Retail            0.558453
avg_order_size_usd_Retail          0.553124
arbitrum_median_gas_usd            0.476881
arbitrum_gas_usd_per_tx            0.415288
arbitrum_tvl_usd                   0.250206
tx_count_Retail                    0.164652
day                               -0.059278
Name: arbitrum_fees_usd, dtype: float64


# ML Models

- For simulation, need to use non-dependent variables for x
- From inital correlation analyses on volume, fees, can use:
    - unique_contracts
    - avg_order_size
    - BTC, ETH prices
    - Avg, Median Gas price
    - Potentially tx count

## Linear Regression

### Individual Pairs

#### Volatile-Volatile Pair

Strongest Correlations (>20):

arbitrum_avg_liquidity_030                         0.25354986008866914
arbitrum_net_liquidity_030                         0.21042355710465704

avg_order_size_usd_Professional                    0.310516802297874
avg_order_size_usd_Retail                          0.26060108805056065

In [405]:
# We will try volume, fees as y/target variable

weth_zro.columns

Index(['day', 'arbitrum_avg_liquidity_030', 'arbitrum_fee_apr_030',
       'arbitrum_fee_tier_030', 'arbitrum_fees_usd_030',
       'arbitrum_lp_addr_030', 'arbitrum_num_trades_030',
       'arbitrum_token_pair_030', 'arbitrum_tvl_usd_030',
       'arbitrum_volume_to_tvl_030', 'arbitrum_volume_usd_030',
       'arbitrum_net_liquidity_030', 'arbitrum_avg_liquidity_100',
       'arbitrum_fee_apr_100', 'arbitrum_fee_tier_100',
       'arbitrum_fees_usd_100', 'arbitrum_lp_addr_100',
       'arbitrum_num_trades_100', 'arbitrum_token_pair_100',
       'arbitrum_tvl_usd_100', 'arbitrum_volume_to_tvl_100',
       'arbitrum_volume_usd_100', 'arbitrum_net_liquidity_100',
       'arbitrum_gas_usd_per_tx', 'arbitrum_median_gas_usd'],
      dtype='object')

In [407]:
combined_trader_data.columns

Index(['day', 'tx_count_Professional', 'tx_count_Retail',
       'total_volume_usd_Professional', 'total_volume_usd_Retail',
       'avg_order_size_usd_Professional', 'avg_order_size_usd_Retail',
       'unique_contracts_Professional', 'unique_contracts_Retail',
       'arbitrum_gas_usd_per_tx', 'arbitrum_median_gas_usd'],
      dtype='object')

In [408]:
vol_vol_ml = weth_zro[['day','arbitrum_avg_liquidity_030','arbitrum_net_liquidity_030','arbitrum_gas_usd_per_tx','arbitrum_median_gas_usd','arbitrum_volume_usd_030','arbitrum_num_trades_030']].copy()
vol_vol_ml = vol_vol_ml.merge(combined_trader_data[['day','avg_order_size_usd_Professional','avg_order_size_usd_Retail','tx_count_Professional', 'tx_count_Retail']], how='inner', on='day')
vol_vol_ml

Unnamed: 0,day,arbitrum_avg_liquidity_030,arbitrum_net_liquidity_030,arbitrum_gas_usd_per_tx,arbitrum_median_gas_usd,arbitrum_volume_usd_030,arbitrum_num_trades_030,avg_order_size_usd_Professional,avg_order_size_usd_Retail,tx_count_Professional,tx_count_Retail
0,2024-07-14 00:00:00+00:00,1.854139e+23,0.000000e+00,0.010340,0.006240,1.288951e+05,119,1543.167251,403.018389,6016.0,923.0
1,2024-07-14 01:00:00+00:00,1.760607e+23,-9.353201e+21,0.007959,0.005059,8.117022e+04,77,1448.775580,235.406901,4269.0,824.0
2,2024-07-14 02:00:00+00:00,1.613444e+23,-1.471624e+22,0.005898,0.004483,1.404074e+05,126,1370.081850,144.306807,3969.0,864.0
3,2024-07-14 03:00:00+00:00,1.508496e+23,-1.049480e+22,0.005264,0.004377,1.126164e+05,118,1139.371061,153.185287,2485.0,867.0
4,2024-07-14 04:00:00+00:00,1.610693e+23,1.021968e+22,0.006068,0.004599,2.334772e+05,237,1602.822830,159.502398,7053.0,1057.0
...,...,...,...,...,...,...,...,...,...,...,...
165,2024-07-20 21:00:00+00:00,1.557008e+23,-9.652175e+21,0.009192,0.005052,2.957397e+05,338,2064.634795,191.595372,5147.0,1440.0
166,2024-07-20 22:00:00+00:00,1.832527e+23,2.755196e+22,0.010094,0.005563,3.553526e+05,370,1557.006407,178.816927,3560.0,1221.0
167,2024-07-20 23:00:00+00:00,1.394003e+23,-4.385244e+22,0.009521,0.005383,1.318420e+06,958,1143.463802,238.774257,2531.0,1270.0
168,2024-07-21 00:00:00+00:00,1.747431e+23,3.534280e+22,0.009502,0.005291,7.531432e+05,642,1814.341095,255.054595,4848.0,1076.0


In [412]:
vol_vol_ml.set_index('day', inplace=True)

In [413]:
X = vol_vol_ml.drop(columns=['arbitrum_volume_usd_030'])  # Features
y = vol_vol_ml['arbitrum_volume_usd_030']  # Target variable

# Split data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train model
model = LinearRegression()
model.fit(X_train, y_train)

# Predictions
y_pred = model.predict(X_test)

# Evaluation
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print(f'Mean Squared Error: {mse}')
print(f'R-squared: {r2}')

Mean Squared Error: 70151589950.59904
R-squared: 0.12636214669042112


## Prophet

## Logistic Regression