# Imports and env Variables

In [143]:
import pandas as pd
import numpy as np
from dune_client.client import DuneClient
from flipside import Flipside
import plotly
import datetime as dt
from dotenv import load_dotenv
from prophet import Prophet
import os
from plotly.subplots import make_subplots
import plotly.graph_objects as go
import plotly.express as px

import random

from sklearn.model_selection import train_test_split, GridSearchCV, TimeSeriesSplit
from sklearn.linear_model import LinearRegression
from sklearn.linear_model import Ridge
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.multioutput import MultiOutputRegressor
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score


# import yfinance as yf

from utils import flipside_api_results
from sql import trader_classifier_query, prices_and_vol_query

In [144]:
def set_random_seed(seed):
    random.seed(seed)
    np.random.seed(seed)
    # torch.manual_seed(seed)
    # if torch.cuda.is_available():
    #     torch.cuda.manual_seed(seed)
    #     torch.cuda.manual_seed_all(seed)

In [145]:
seed = 20

set_random_seed(seed)

In [146]:
load_dotenv()

True

In [147]:
dune_api_key = os.getenv('DUNE_API_KEY')
flipside_api_key = os.getenv('FLIPSIDE_API_KEY')
dune = DuneClient(dune_api_key)
# print(dune_api_key, flipside_api_key)

# Data Retrieval and Analysis

## Dune Queries

Uniswap Arbitrum Pool Data
https://dune.com/queries/3930878

Arbitrum Gas Data
https://dune.com/queries/3931017

In [148]:
def dune_api_results(query_num, save_csv=False, csv_path=None):
    results = dune.get_latest_result(query_num)
    df = pd.DataFrame(results.result.rows)

    if save_csv and csv_path:
        df.to_csv(csv_path, index=False)
    return df


arb_pool_path = 'data/arb_pool_data.csv'
arb_pool_df = dune_api_results(3930878, True, arb_pool_path)
arb_pool_df

In [149]:
arb_pool_path = 'data/arb_pool_data.csv'
arb_pool_df = pd.read_csv(arb_pool_path)
arb_pool_df.columns = [f'arbitrum_{col}' if col != 'day' else col for col in arb_pool_df.columns]
print(arb_pool_df.columns)

Index(['arbitrum_avg_liquidity', 'day', 'arbitrum_fee_apr',
       'arbitrum_fee_tier', 'arbitrum_fees_usd', 'arbitrum_lp_addr',
       'arbitrum_num_trades', 'arbitrum_token_pair', 'arbitrum_tvl_usd',
       'arbitrum_volume_to_tvl', 'arbitrum_volume_usd'],
      dtype='object')


arbitrum_gas = dune.get_latest_result(3931017)
arbitrum_gas_df = pd.DataFrame(arbitrum_gas.result.rows)
arbitrum_gas_path = 'data/arb_gas.csv'
arbitrum_gas_df.to_csv(arbitrum_gas_path, index=False)

In [150]:
arbitrum_gas_path = 'data/arb_gas.csv'
arbitrum_gas_df = pd.read_csv(arbitrum_gas_path)
if 'Unnamed: 0' in arbitrum_gas_df.columns:
    arbitrum_gas_df = arbitrum_gas_df.drop('Unnamed: 0', axis=1)
arbitrum_gas_df.columns = [f'arbitrum_{col}' if col != 'dt' else col for col in arbitrum_gas_df.columns]
arbitrum_gas_df.columns


Index(['dt', 'arbitrum_gas_usd_per_tx', 'arbitrum_median_gas_usd'], dtype='object')

Uniswap Optimism Pool Data
https://dune.com/queries/3930989

Optimism Gas Data 
https://dune.com/queries/3930989

op_pool_path = 'data/op_pool_data.csv'
op_pool_df = dune_api_results(3930989, True, op_pool_path)
op_pool_df

In [151]:
op_pool_path = 'data/op_pool_data.csv'
op_pool_df = pd.read_csv(op_pool_path)
op_pool_df.columns = [f'optimism_{col}' if col != 'day' else col for col in op_pool_df.columns]
print(op_pool_df.columns)

Index(['optimism_avg_liquidity', 'day', 'optimism_fee_apr',
       'optimism_fee_tier', 'optimism_fees_usd', 'optimism_lp_addr',
       'optimism_num_trades', 'optimism_token_pair', 'optimism_tvl_usd',
       'optimism_volume_to_tvl', 'optimism_volume_usd'],
      dtype='object')


optimism_gas = dune.get_latest_result_dataframe(3931019)
optimism_gas_path = 'data/op_gas.csv'
optimism_gas.to_csv(optimism_gas_path, index=False)

In [152]:
optimism_gas_path = 'data/op_gas.csv'
optimism_gas_df = pd.read_csv(optimism_gas_path)
optimism_gas_df = optimism_gas_df.drop('_col3', axis=1) if '_col3' in optimism_gas_df.columns else optimism_gas_df
optimism_gas_df.columns = [f'optimism_{col}' if col != 'dt' else col for col in optimism_gas_df.columns]
optimism_gas_df.columns

Index(['dt', 'optimism_gas_usd_per_tx', 'optimism_median_gas_usd'], dtype='object')

Uniswap Base Pool Data
https://dune.com/queries/3930954

Base Gas Data
https://dune.com/queries/3931021

base_pool_path = 'data/base_pool_data.csv'
base_pool_df = dune_api_results(3930954, True, base_pool_path)
base_pool_df

In [153]:
base_pool_path = 'data/base_pool_data.csv'
base_pool_df = pd.read_csv(base_pool_path)
base_pool_df.columns = [f'base_{col}' if col != 'day' else col for col in base_pool_df.columns]
print(base_pool_df.columns)

Index(['base_avg_liquidity', 'day', 'base_fee_apr', 'base_fee_tier',
       'base_fees_usd', 'base_lp_addr', 'base_num_trades', 'base_token_pair',
       'base_tvl_usd', 'base_volume_to_tvl', 'base_volume_usd'],
      dtype='object')


base_gas = dune.get_latest_result_dataframe(3931021)
base_gas_path = 'data/base_gas.csv'
base_gas.to_csv(base_gas_path, index=False)

In [154]:
base_gas_path = 'data/base_gas.csv'
base_gas_df = pd.read_csv(base_gas_path)
base_gas_df.columns = [f'base_{col}' if col != 'dt' else col for col in base_gas_df.columns]
base_gas_df.columns

Index(['dt', 'base_gas_usd_per_tx', 'base_median_gas_usd'], dtype='object')

eth_gas = dune.get_latest_result_dataframe(3947206)
eth_gas_path = 'data/eth_gas.csv'
eth_gas.to_csv(eth_gas_path, index=False)

In [155]:
eth_gas_path = 'data/eth_gas.csv'
eth_gas_df = pd.read_csv(eth_gas_path)
eth_gas_df = eth_gas_df.drop('_col3', axis=1) if '_col3' in eth_gas_df.columns else eth_gas_df
eth_gas_df.columns = [f'eth_{col}' if col != 'dt' else col for col in eth_gas_df.columns]
eth_gas_df.columns

Index(['dt', 'eth_gas_usd_per_tx', 'eth_median_gas_usd'], dtype='object')

## Flipside Queries 

Trader Classifier - Arbitrum Data
https://flipsidecrypto.xyz/Brandyn/q/7NlPxrKU5KQb/2024-07-20-06-36-pm

classifier_data_path = 'data/classifier.csv'
trader_classifier_data = flipside_api_results(trader_classifier_query, flipside_api_key)
print(trader_classifier_data)
trader_classifier_data.to_csv(classifier_data_path, index=False)

In [156]:
classifier_data_path = 'data/classifier.csv'
trader_classifier_df = pd.read_csv(classifier_data_path)
trader_classifier_df.drop(columns=['__row_index'], inplace=True)
trader_classifier_df.head()

Unnamed: 0,trader_type,dt,tx_count,total_volume_usd,avg_order_size_usd,unique_contracts
0,Professional,2024-07-21T15:00:00.000Z,928,1047810.93,1183.967153,104
1,Retail,2024-07-21T15:00:00.000Z,176,91856.49,540.332294,60
2,Professional,2024-07-21T14:00:00.000Z,7552,12913378.38,1766.53603,252
3,Retail,2024-07-21T14:00:00.000Z,1790,294765.21,170.680492,196
4,Professional,2024-07-21T13:00:00.000Z,4472,5883981.27,1367.731583,211


Crypto Prices - https://flipsidecrypto.xyz/Brandyn/q/mScUOHdMvxki/2024-07-21-12-20-pm

prices_path = 'data/prices_vol.csv'
prices_data = flipside_api_results(prices_and_vol_query, flipside_api_key)
prices_data.to_csv(prices_path, index=False)

In [157]:
prices_path = 'data/prices_vol.csv'
prices_vol_df = pd.read_csv(prices_path)
prices_vol_df.drop(columns=['__row_index'], inplace=True)
prices_vol_df['dt'] = pd.to_datetime(prices_vol_df['dt'])
prices_vol_df.rename(columns={'dt':'day'}, inplace=True)
prices_vol_df.head()

Unnamed: 0,day,symbol,price,arbitrum_vol_ex_uni
0,2024-07-23 01:00:00+00:00,WETH,3444.47,3162515.13
1,2024-07-23 01:00:00+00:00,WBTC,67598.0,3162515.13
2,2024-07-23 00:00:00+00:00,WBTC,67606.0,10836507.71
3,2024-07-23 00:00:00+00:00,WETH,3447.68,10836507.71
4,2024-07-22 23:00:00+00:00,WBTC,67788.0,12103597.64


## Data Cleaning/Processing

Each token pair has several fee tiers and some have differente lp addresses.  

Can aggregate metrics to token pair and/or seperate each fee-tier out, aggregate by addresses

In [158]:
def label_gas_fee(gas_fee, low_threshold, high_threshold):
    if gas_fee < low_threshold:
        return 0  # Low gas
    elif gas_fee > high_threshold:
        return 2  # High gas
    else:
        return 1  # Normal gas

In [159]:
def gas_classifier(df):
    """Categorizes gas fees by hour into separate columns for low, normal, and high categories."""
    for col in df.columns:
        if col.endswith('gas_usd_per_tx') or col.endswith('median_gas_usd') or col == 'avg_gas' or col == 'median_gas':
            low_threshold = df[col].quantile(0.25)
            high_threshold = df[col].quantile(0.75)
            
            # Create binary columns for each category
            df[f'{col}_low_gas'] = df[col].apply(lambda x: 1 if x < low_threshold else 0)
            df[f'{col}_normal_gas'] = df[col].apply(lambda x: 1 if low_threshold <= x <= high_threshold else 0)
            df[f'{col}_high_gas'] = df[col].apply(lambda x: 1 if x > high_threshold else 0)
    
    return df

In [160]:
base_gas_df = gas_classifier(base_gas_df)
optimism_gas_df = gas_classifier(optimism_gas_df)
arbitrum_gas_df = gas_classifier(arbitrum_gas_df)


In [161]:
arbitrum_gas_df.rename(columns={'dt':'day'}, inplace=True)
arbitrum_gas_df['day'] = pd.to_datetime(arbitrum_gas_df['day'])

In [162]:
arbitrum_gas_df['day']

0      2024-04-22 00:00:00+00:00
1      2024-04-22 01:00:00+00:00
2      2024-04-22 02:00:00+00:00
3      2024-04-22 03:00:00+00:00
4      2024-04-22 04:00:00+00:00
                  ...           
2157   2024-07-20 21:00:00+00:00
2158   2024-07-20 22:00:00+00:00
2159   2024-07-20 23:00:00+00:00
2160   2024-07-21 00:00:00+00:00
2161   2024-07-21 01:00:00+00:00
Name: day, Length: 2162, dtype: datetime64[ns, UTC]

In [163]:
arb_pool_df['day'] = pd.to_datetime(arb_pool_df['day'])

trader_pivot_df = trader_classifier_df.pivot(
    index='day',
    columns='trader_type',
    values=['tx_count', 'total_volume_usd', 'avg_order_size_usd', 'unique_contracts']
)
trader_pivot_df.columns = [f'{col[0]}_{col[1]}' for col in trader_pivot_df.columns]
trader_pivot_df

In [164]:
prices_vol_df_pivot = prices_vol_df.drop(columns=['arbitrum_vol_ex_uni']).pivot(
    index='day',
    columns='symbol',
    values='price'
    
    )

prices_vol_df_pivot.columns = [f'{col[0]}_{col[1]}' for col in prices_vol_df_pivot.columns]
prices_vol_df_pivot.rename(columns={"W_B":"BTC_Price","W_E":"ETH_Price"}, inplace=True)
prices_vol_df_pivot

Unnamed: 0_level_0,BTC_Price,ETH_Price
day,Unnamed: 1_level_1,Unnamed: 2_level_1
2023-07-01 00:00:00+00:00,30455.140252,1933.301804
2023-07-01 01:00:00+00:00,30444.441517,1932.612190
2023-07-01 02:00:00+00:00,30467.154009,1938.738233
2023-07-01 03:00:00+00:00,30463.502821,1931.559012
2023-07-01 04:00:00+00:00,30413.699209,1921.631704
...,...,...
2024-07-22 21:00:00+00:00,68080.000000,3492.530000
2024-07-22 22:00:00+00:00,68045.000000,3490.770000
2024-07-22 23:00:00+00:00,67788.000000,3461.760000
2024-07-23 00:00:00+00:00,67606.000000,3447.680000


In [165]:
prices_vol_df_pivot = prices_vol_df_pivot.merge(prices_vol_df[['day','arbitrum_vol_ex_uni']],
                                                how='inner', on='day'
                                                
                                                
                                                
                                                )



for col in prices_vol_df_pivot.columns:
    if col != 'day':
        prices_vol_df_pivot[f'{col}_pct_change'] = prices_vol_df_pivot[f'{col}'].pct_change()

In [166]:
prices_vol_df_pivot


Unnamed: 0,day,BTC_Price,ETH_Price,arbitrum_vol_ex_uni,BTC_Price_pct_change,ETH_Price_pct_change,arbitrum_vol_ex_uni_pct_change
0,2023-07-01 00:00:00+00:00,30455.140252,1933.301804,2672462.92,,,
1,2023-07-01 00:00:00+00:00,30455.140252,1933.301804,2672462.92,0.000000,0.000000,0.000000
2,2023-07-01 01:00:00+00:00,30444.441517,1932.612190,1794246.45,-0.000351,-0.000357,-0.328617
3,2023-07-01 01:00:00+00:00,30444.441517,1932.612190,1794246.45,0.000000,0.000000,0.000000
4,2023-07-01 02:00:00+00:00,30467.154009,1938.738233,1328019.33,0.000746,0.003170,-0.259846
...,...,...,...,...,...,...,...
18623,2024-07-22 23:00:00+00:00,67788.000000,3461.760000,12103597.64,0.000000,0.000000,0.000000
18624,2024-07-23 00:00:00+00:00,67606.000000,3447.680000,10836507.71,-0.002685,-0.004067,-0.104687
18625,2024-07-23 00:00:00+00:00,67606.000000,3447.680000,10836507.71,0.000000,0.000000,0.000000
18626,2024-07-23 01:00:00+00:00,67598.000000,3444.470000,3162515.13,-0.000118,-0.000931,-0.708161


## Arbitrum

### Summary Statistics

In [167]:
arb_num_rows, arb_num_columns = arb_pool_df.shape
arb_total_datapoints = arb_num_rows * arb_num_columns
print(f'Total number of datapoints: {arb_total_datapoints}')

Total number of datapoints: 38148


In [168]:
op_num_rows, op_num_columns = op_pool_df.shape
op_total_datapoints = op_num_rows * op_num_columns
print(f'Total number of datapoints: {op_total_datapoints}')

Total number of datapoints: 39017


In [169]:
base_num_rows, base_num_columns = base_pool_df.shape
base_total_datapoints = base_num_rows * base_num_columns
print(f'Total number of datapoints: {base_total_datapoints}')

Total number of datapoints: 36454


In [170]:
print(arb_pool_df.describe())

       arbitrum_avg_liquidity  arbitrum_fee_apr  arbitrum_fees_usd  \
count            3.468000e+03      3.468000e+03       3.468000e+03   
mean             1.782774e+24      1.458773e-02       2.966866e+02   
std              7.617222e+24      5.095527e-02       7.349982e+02   
min              9.140293e+07      3.241851e-13       6.002412e-15   
25%              6.812052e+15      8.222112e-04       2.920990e-01   
50%              1.903847e+18      4.227947e-03       1.321334e+01   
75%              2.049945e+22      1.363400e-02       2.495379e+02   
max              4.600563e+25      1.606219e+00       8.186259e+03   

       arbitrum_num_trades  arbitrum_tvl_usd  arbitrum_volume_to_tvl  \
count          3468.000000      3.468000e+03            3.468000e+03   
mean            190.908304      9.310402e+06            5.508794e-02   
std             338.143117      1.496615e+07            1.409350e-01   
min               1.000000      6.708087e+00            2.982674e-13   
25%      

In [171]:
print(arbitrum_gas_df.describe())

       arbitrum_gas_usd_per_tx  arbitrum_median_gas_usd  \
count              2162.000000              2162.000000   
mean                  0.023634                 0.012860   
std                   0.221630                 0.129315   
min                   0.004397                 0.001491   
25%                   0.006888                 0.004532   
50%                   0.008472                 0.005564   
75%                   0.010704                 0.007182   
max                   6.944713                 4.181090   

       arbitrum_gas_usd_per_tx_low_gas  arbitrum_gas_usd_per_tx_normal_gas  \
count                      2162.000000                         2162.000000   
mean                          0.250231                            0.499537   
std                           0.433246                            0.500115   
min                           0.000000                            0.000000   
25%                           0.000000                            0.000000   


### Data Visualizations

#### Aggregated Data

In [172]:
arb_pool_df.columns

arb_ranked_vol_stable = arb_pool_df[arb_pool_df['arbitrum_token_pair']=='USDC-WETH'] 
arb_ranked_vol_stable

Unnamed: 0,arbitrum_avg_liquidity,day,arbitrum_fee_apr,arbitrum_fee_tier,arbitrum_fees_usd,arbitrum_lp_addr,arbitrum_num_trades,arbitrum_token_pair,arbitrum_tvl_usd,arbitrum_volume_to_tvl,arbitrum_volume_usd
1,2.075720e+16,2024-07-14 00:00:00+00:00,0.015918,0.01%,3.056607,0x6f38e884725a116c9c7fbf208e79fe8828a2595f,287,USDC-WETH,7.008980e+04,0.436099,3.056607e+04
3,4.911041e+18,2024-07-14 00:00:00+00:00,0.012910,0.05%,1705.827484,0xc6962004f452be9203591991d15f6b388e09e8d0,845,USDC-WETH,4.822924e+07,0.070738,3.411655e+06
6,3.276466e+17,2024-07-14 00:00:00+00:00,0.006978,0.30%,66.002818,0xc473e2aee3441bf9240be85eb122abb059a3b57c,17,USDC-WETH,3.452572e+06,0.006372,2.200094e+04
20,2.004641e+16,2024-07-14 01:00:00+00:00,0.012878,0.01%,2.474180,0x6f38e884725a116c9c7fbf208e79fe8828a2595f,240,USDC-WETH,7.012752e+04,0.352812,2.474180e+04
22,5.013677e+18,2024-07-14 01:00:00+00:00,0.009503,0.05%,1251.246970,0xc6962004f452be9203591991d15f6b388e09e8d0,736,USDC-WETH,4.805992e+07,0.052070,2.502494e+06
...,...,...,...,...,...,...,...,...,...,...,...
3440,8.079644e+17,2024-07-21 00:00:00+00:00,0.022752,0.01%,2.999552,0x6f38e884725a116c9c7fbf208e79fe8828a2595f,113,USDC-WETH,4.812054e+04,0.623341,2.999552e+04
3443,6.643308e+18,2024-07-21 00:00:00+00:00,0.009120,0.05%,1365.480618,0xc6962004f452be9203591991d15f6b388e09e8d0,584,USDC-WETH,5.464729e+07,0.049974,2.730961e+06
3444,4.054406e+17,2024-07-21 00:00:00+00:00,0.008527,0.30%,87.044104,0xc473e2aee3441bf9240be85eb122abb059a3b57c,24,USDC-WETH,3.726082e+06,0.007787,2.901470e+04
3459,7.035460e+18,2024-07-21 01:00:00+00:00,0.001572,0.05%,235.281160,0xc6962004f452be9203591991d15f6b388e09e8d0,157,USDC-WETH,5.462336e+07,0.008615,4.705623e+05


In [173]:
arb_ranked_vol_stable = arb_ranked_vol_stable.groupby('arbitrum_fee_tier')['arbitrum_volume_usd'].sum()
arb_ranked_vol_stable.sort_values(inplace=True)
arb_ranked_vol_stable = arb_ranked_vol_stable.apply(lambda x: f'{x:,.2f}')
arb_ranked_vol_stable

arbitrum_fee_tier
1.00%         30,417.65
0.01%      2,588,022.22
0.30%      8,660,812.75
0.05%    836,460,884.57
Name: arbitrum_volume_usd, dtype: object

In [174]:
arb_pool_df_copy = arb_pool_df.set_index('day')
arb_pool_df_copy[(arb_pool_df_copy['arbitrum_token_pair']=='USDC-WETH')&(arb_pool_df_copy['arbitrum_fee_tier']=='0.01%')][['arbitrum_fee_tier','arbitrum_tvl_usd']]

Unnamed: 0_level_0,arbitrum_fee_tier,arbitrum_tvl_usd
day,Unnamed: 1_level_1,Unnamed: 2_level_1
2024-07-14 00:00:00+00:00,0.01%,70089.797979
2024-07-14 01:00:00+00:00,0.01%,70127.520219
2024-07-14 02:00:00+00:00,0.01%,70051.354131
2024-07-14 03:00:00+00:00,0.01%,70068.423443
2024-07-14 04:00:00+00:00,0.01%,70186.064684
...,...,...
2024-07-20 21:00:00+00:00,0.01%,48460.898146
2024-07-20 22:00:00+00:00,0.01%,48491.764876
2024-07-20 23:00:00+00:00,0.01%,48117.867454
2024-07-21 00:00:00+00:00,0.01%,48120.543828


In [175]:
aggregated_arb_hour = arb_pool_df[['day','arbitrum_fees_usd','arbitrum_tvl_usd','arbitrum_volume_usd','arbitrum_num_trades']].groupby('day').sum()
aggregated_arb_hour = aggregated_arb_hour.merge(arbitrum_gas_df, how='left', on='day')

# Averages per hour among token pairs
aggregated_arb_tp = arb_pool_df[['arbitrum_token_pair','arbitrum_fees_usd','arbitrum_tvl_usd','arbitrum_volume_usd','arbitrum_num_trades']].groupby('arbitrum_token_pair').mean() 

In [176]:
aggregated_arb_tp

Unnamed: 0_level_0,arbitrum_fees_usd,arbitrum_tvl_usd,arbitrum_volume_usd,arbitrum_num_trades
arbitrum_token_pair,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
ARB-WETH,123.049496,7323853.0,236746.2,150.150943
DAI-USDC,5.937179,802553.8,59244.97,14.883721
USDC-WBTC,257.499019,6181301.0,512955.2,217.636364
USDC-WETH,741.293099,15742440.0,1412900.0,380.783333
USDT-WETH,230.414451,8072348.0,410541.0,282.114964
WBTC-WETH,323.289718,20068010.0,627351.0,156.197125
WETH-XAI,25.963443,311109.5,8609.368,32.870968
WETH-ZRO,449.440307,2632211.0,138740.1,123.250794
WETH-wstETH,18.332452,8319976.0,183234.1,18.208738


In [177]:
aggregated_arb_hour.describe()

Unnamed: 0,arbitrum_fees_usd,arbitrum_tvl_usd,arbitrum_volume_usd,arbitrum_num_trades,arbitrum_gas_usd_per_tx,arbitrum_median_gas_usd,arbitrum_gas_usd_per_tx_low_gas,arbitrum_gas_usd_per_tx_normal_gas,arbitrum_gas_usd_per_tx_high_gas,arbitrum_median_gas_usd_low_gas,arbitrum_median_gas_usd_normal_gas,arbitrum_median_gas_usd_high_gas
count,170.0,170.0,170.0,170.0,170.0,170.0,170.0,170.0,170.0,170.0,170.0,170.0
mean,6052.406474,189932200.0,10374510.0,3894.529412,0.011762,0.007035,0.094118,0.611765,0.294118,0.088235,0.617647,0.294118
std,3545.725661,8890367.0,6642967.0,1644.030155,0.010017,0.003911,0.292855,0.488788,0.456991,0.284475,0.487398,0.456991
min,912.69363,165983600.0,1035510.0,547.0,0.005264,0.001491,0.0,0.0,0.0,0.0,0.0,0.0
25%,3332.511041,183956200.0,5430933.0,2604.5,0.007909,0.005254,0.0,0.0,0.0,0.0,0.0,0.0
50%,5324.330184,190311600.0,8846839.0,3592.0,0.00941,0.006077,0.0,1.0,0.0,0.0,1.0,0.0
75%,7908.212068,196928900.0,14231710.0,4913.75,0.011557,0.007356,0.0,1.0,1.0,0.0,1.0,1.0
max,18511.980815,204910600.0,34207140.0,8707.0,0.087784,0.038661,1.0,1.0,1.0,1.0,1.0,1.0


In [178]:
aggregated_arb_tp.columns

Index(['arbitrum_fees_usd', 'arbitrum_tvl_usd', 'arbitrum_volume_usd',
       'arbitrum_num_trades'],
      dtype='object')

In [179]:
agg_tp_tvl_graph = px.bar(aggregated_arb_tp, x=aggregated_arb_tp.index, y='arbitrum_tvl_usd', color=aggregated_arb_tp.index, title='Total Value Locked')
agg_tp_tvl_graph.show()

In [180]:
agg_tp_vol_graph = px.bar(aggregated_arb_tp, x=aggregated_arb_tp.index, y='arbitrum_volume_usd', color=aggregated_arb_tp.index,title=
                          'Volume (USD)')
agg_tp_vol_graph.show()

In [181]:
agg_tp_fee_graph = px.bar(aggregated_arb_tp, x=aggregated_arb_tp.index, y='arbitrum_fees_usd', color=aggregated_arb_tp.index, title='Fees (USD)')
agg_tp_fee_graph.show()

In [182]:
agg_tp_trades_graph = px.bar(aggregated_arb_tp, x=aggregated_arb_tp.index, y='arbitrum_num_trades', color=aggregated_arb_tp.index, title='Number of Trades')
agg_tp_trades_graph.show()

In [183]:
aggregated_arb_hour['arbitrum_volume_to_tvl'] = aggregated_arb_hour['arbitrum_volume_usd'] / aggregated_arb_hour['arbitrum_tvl_usd']
aggregated_arb_hour

Unnamed: 0,day,arbitrum_fees_usd,arbitrum_tvl_usd,arbitrum_volume_usd,arbitrum_num_trades,arbitrum_gas_usd_per_tx,arbitrum_median_gas_usd,arbitrum_gas_usd_per_tx_low_gas,arbitrum_gas_usd_per_tx_normal_gas,arbitrum_gas_usd_per_tx_high_gas,arbitrum_median_gas_usd_low_gas,arbitrum_median_gas_usd_normal_gas,arbitrum_median_gas_usd_high_gas,arbitrum_volume_to_tvl
0,2024-07-14 00:00:00+00:00,4305.423434,1.770941e+08,7.647457e+06,3429,0.010340,0.006240,0,1,0,0,1,0,0.043183
1,2024-07-14 01:00:00+00:00,2620.052008,1.728960e+08,4.751922e+06,2542,0.007959,0.005059,0,1,0,0,1,0,0.027484
2,2024-07-14 02:00:00+00:00,2509.557949,1.704295e+08,4.138064e+06,2252,0.005898,0.004483,1,0,0,1,0,0,0.024280
3,2024-07-14 03:00:00+00:00,1305.792138,1.703676e+08,2.057988e+06,1547,0.005264,0.004377,1,0,0,1,0,0,0.012080
4,2024-07-14 04:00:00+00:00,5493.702700,1.787923e+08,9.360130e+06,4237,0.006068,0.004599,1,0,0,0,1,0,0.052352
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
165,2024-07-20 21:00:00+00:00,5024.962017,2.048495e+08,8.463321e+06,3254,0.009192,0.005052,0,1,0,0,1,0,0.041315
166,2024-07-20 22:00:00+00:00,2853.101471,1.965587e+08,3.837792e+06,2190,0.010094,0.005563,0,1,0,0,1,0,0.019525
167,2024-07-20 23:00:00+00:00,5020.902254,1.957108e+08,2.632626e+06,2107,0.009521,0.005383,0,1,0,0,1,0,0.013452
168,2024-07-21 00:00:00+00:00,5235.640922,1.999137e+08,6.726266e+06,2649,0.009502,0.005291,0,1,0,0,1,0,0.033646


In [184]:
aggregated_arb_hour_vttvl_fig = make_subplots(specs=[[{"secondary_y": True}]])
    
aggregated_arb_hour_vttvl_fig.add_trace(
    go.Bar(
        x=aggregated_arb_hour['day'],
        y=aggregated_arb_hour['arbitrum_volume_to_tvl'],
        name='Volume/TVL'
    ),
    secondary_y=False
)


aggregated_arb_hour_vttvl_fig.add_trace(
    go.Scatter(
        x=aggregated_arb_hour['day'],
        y=aggregated_arb_hour['arbitrum_median_gas_usd'],
        name='Median Gas',
        mode='lines'
    ),
    secondary_y=True
)

aggregated_arb_hour_vttvl_fig.add_trace(
    go.Scatter(
        x=aggregated_arb_hour['day'],
        y=aggregated_arb_hour['arbitrum_gas_usd_per_tx'],
        name='Avg Gas',
        mode='lines'
    ),
    secondary_y=True
)

aggregated_arb_hour_vttvl_fig.update_layout(
    title='Volume/TVL Ratio to Gas',
    barmode='group'  # Set the bar mode to either 'group' for side-by-side or 'stack' for stacked
)

aggregated_arb_hour_vttvl_fig.update_xaxes(title_text="Date")

aggregated_arb_hour_vttvl_fig.show()

In [185]:
aggregated_arb_hour_fees_fig = make_subplots(specs=[[{"secondary_y": True}]])
    
aggregated_arb_hour_fees_fig.add_trace(
    go.Bar(
        x=aggregated_arb_hour['day'],
        y=aggregated_arb_hour['arbitrum_fees_usd'],
        name='Fee Revenue'
    ),
    secondary_y=False
)
aggregated_arb_hour_fees_fig.add_trace(
    go.Bar(
        x=aggregated_arb_hour['day'],
        y=aggregated_arb_hour['arbitrum_num_trades'],
        name='# of Trades',
    ),
    secondary_y=False
)


aggregated_arb_hour_fees_fig.add_trace(
    go.Scatter(
        x=aggregated_arb_hour['day'],
        y=aggregated_arb_hour['arbitrum_median_gas_usd'],
        name='Median Gas',
        mode='lines'
    ),
    secondary_y=True
)

aggregated_arb_hour_fees_fig.add_trace(
    go.Scatter(
        x=aggregated_arb_hour['day'],
        y=aggregated_arb_hour['arbitrum_gas_usd_per_tx'],
        name='Avg Gas',
        mode='lines'
    ),
    secondary_y=True
)

aggregated_arb_hour_fees_fig.update_layout(
    title='Fee Revenue and Trades to Gas',
    barmode='group'  # Set the bar mode to either 'group' for side-by-side or 'stack' for stacked
)

aggregated_arb_hour_fees_fig.update_xaxes(title_text="Date")

aggregated_arb_hour_fees_fig.show()

In [186]:
aggregated_arb_hour_fig = make_subplots(specs=[[{"secondary_y": True}]])
    
aggregated_arb_hour_fig.add_trace(
    go.Bar(
        x=aggregated_arb_hour['day'],
        y=aggregated_arb_hour['arbitrum_tvl_usd'],
        name='tvl'
    ),
    secondary_y=False
)
aggregated_arb_hour_fig.add_trace(
    go.Bar(
        x=aggregated_arb_hour['day'],
        y=aggregated_arb_hour['arbitrum_volume_usd'],
        name='volume'
    ),
    secondary_y=False
)


aggregated_arb_hour_fig.add_trace(
    go.Scatter(
        x=aggregated_arb_hour['day'],
        y=aggregated_arb_hour['arbitrum_median_gas_usd'],
        name='Median Gas',
        mode='lines'
    ),
    secondary_y=True
)

aggregated_arb_hour_fig.add_trace(
    go.Scatter(
        x=aggregated_arb_hour['day'],
        y=aggregated_arb_hour['arbitrum_gas_usd_per_tx'],
        name='Avg Gas',
        mode='lines'
    ),
    secondary_y=True
)

aggregated_arb_hour_fig.update_layout(
    title='TVL and Vol to Gas',
    barmode='group'  # Set the bar mode to either 'group' for side-by-side or 'stack' for stacked
)

aggregated_arb_hour_fig.update_xaxes(title_text="Date")

aggregated_arb_hour_fig.show()

filtered_cols = ['day','arbitrum_avg_liquidity','arbitrum_fees_usd','arbitrum_tvl_usd','arbitrum_volume_usd','arbitrum_num_trades','arbitrum_volume_to_tvl','arbitrum_token_pair']

#### Individual Token Pairs

In [187]:
def fee_tier_cleaning(df, network):
    fee_tiers = df[f'{network}_fee_tier'].unique()
    merged_df = None
    
    # Determine the complete date range
    min_date = df['day'].min()
    max_date = df['day'].max()
    complete_date_range = pd.date_range(start=min_date, end=max_date, freq='H')

    for fee_tier in fee_tiers:
        # Filter the DataFrame for the specific fee tier
        filtered_df = df[df[f'{network}_fee_tier'] == fee_tier].copy()
        
        # Rename the columns to include the fee tier
        fee_tier_suffix = fee_tier.replace('%', '')
        filtered_df.columns = [f'{col}_{fee_tier_suffix}' if col not in ['day', f'{network}_gas_usd_per_tx', f'{network}_median_gas_usd'] else col for col in filtered_df.columns]
        
        # Set the 'day' column as the index
        filtered_df.set_index('day', inplace=True)
        
        # Reindex the DataFrame to the complete date range and fill NaN values with 0
        filtered_df = filtered_df.reindex(complete_date_range, fill_value=0).rename_axis('day').reset_index()
        
        # Calculate the net change in liquidity
        liquidity_col = f'{network}_avg_liquidity_{fee_tier_suffix}'
        net_liquidity_col = f'{network}_net_liquidity_{fee_tier_suffix}'
        filtered_df[net_liquidity_col] = filtered_df[liquidity_col] - filtered_df[liquidity_col].shift(1)
        filtered_df[net_liquidity_col].fillna(0, inplace=True)
        
        added_liquidity_col = f'{network}_liquidity_added_{fee_tier_suffix}'
        removed_liquidity_col = f'{network}_liquidity_removed_{fee_tier_suffix}'
        filtered_df[added_liquidity_col] = filtered_df[net_liquidity_col].apply(lambda x: x if x > 0 else 0)
        filtered_df[removed_liquidity_col] = filtered_df[net_liquidity_col].apply(lambda x: -x if x < 0 else 0)

        # Merge with the main DataFrame
        if merged_df is None:
            merged_df = filtered_df
        else:
            merged_df = pd.merge(merged_df, filtered_df, on='day', how='inner')

    arb_gas_df = arbitrum_gas_df.rename(columns={'dt':'day'})
    arb_gas_df['day'] = pd.to_datetime(arb_gas_df['day'])
    merged_df = merged_df.merge(arb_gas_df, how='left', on=['day'])
        
    return merged_df

In [188]:
arb_pool_df.columns

Index(['arbitrum_avg_liquidity', 'day', 'arbitrum_fee_apr',
       'arbitrum_fee_tier', 'arbitrum_fees_usd', 'arbitrum_lp_addr',
       'arbitrum_num_trades', 'arbitrum_token_pair', 'arbitrum_tvl_usd',
       'arbitrum_volume_to_tvl', 'arbitrum_volume_usd'],
      dtype='object')

In [189]:
# All types of fee tiers in dataset 

fee_tiers_unique = arb_pool_df['arbitrum_fee_tier'].unique()
token_pairs_unique = arb_pool_df['arbitrum_token_pair'].unique()

print(f'token pairs: {token_pairs_unique}')

token pairs: ['WETH-ZRO' 'USDC-WETH' 'USDC-WBTC' 'USDT-WETH' 'WBTC-WETH' 'ARB-WETH'
 'WETH-XAI' 'WETH-wstETH' 'DAI-USDC']


In [190]:
def token_pair_df_cleaned(df, token_pair, network):
    token_pair_df = df[df[f'{network}_token_pair']==f'{token_pair}']
    cleaned_token_pair_df = fee_tier_cleaning(token_pair_df, network)
    return cleaned_token_pair_df

In [191]:
usdc_weth = token_pair_df_cleaned(arb_pool_df, 'USDC-WETH', 'arbitrum') 
usdc_weth.columns


'H' is deprecated and will be removed in a future version, please use 'h' instead.


A value is trying to be set on a copy of a DataFrame or Series through chained assignment using an inplace method.
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.




A value is trying to be set on a copy of a DataFrame or Series through chained assignment using an inplace method.
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].

Index(['day', 'arbitrum_avg_liquidity_0.01', 'arbitrum_fee_apr_0.01',
       'arbitrum_fee_tier_0.01', 'arbitrum_fees_usd_0.01',
       'arbitrum_lp_addr_0.01', 'arbitrum_num_trades_0.01',
       'arbitrum_token_pair_0.01', 'arbitrum_tvl_usd_0.01',
       'arbitrum_volume_to_tvl_0.01', 'arbitrum_volume_usd_0.01',
       'arbitrum_net_liquidity_0.01', 'arbitrum_liquidity_added_0.01',
       'arbitrum_liquidity_removed_0.01', 'arbitrum_avg_liquidity_0.05',
       'arbitrum_fee_apr_0.05', 'arbitrum_fee_tier_0.05',
       'arbitrum_fees_usd_0.05', 'arbitrum_lp_addr_0.05',
       'arbitrum_num_trades_0.05', 'arbitrum_token_pair_0.05',
       'arbitrum_tvl_usd_0.05', 'arbitrum_volume_to_tvl_0.05',
       'arbitrum_volume_usd_0.05', 'arbitrum_net_liquidity_0.05',
       'arbitrum_liquidity_added_0.05', 'arbitrum_liquidity_removed_0.05',
       'arbitrum_avg_liquidity_0.30', 'arbitrum_fee_apr_0.30',
       'arbitrum_fee_tier_0.30', 'arbitrum_fees_usd_0.30',
       'arbitrum_lp_addr_0.30', 'arb

In [192]:
arb_pool_df[(arb_pool_df['arbitrum_token_pair']=='DAI-USDC')]['arbitrum_fee_tier'].unique()

array(['0.01%', '0.05%', '0.30%'], dtype=object)

In [193]:
weth_zro_df = arb_pool_df[(arb_pool_df['arbitrum_token_pair']=='WETH-ZRO')]
weth_zro = fee_tier_cleaning(weth_zro_df, 'arbitrum') 


'H' is deprecated and will be removed in a future version, please use 'h' instead.


A value is trying to be set on a copy of a DataFrame or Series through chained assignment using an inplace method.
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.




A value is trying to be set on a copy of a DataFrame or Series through chained assignment using an inplace method.
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].

In [194]:
weth_zro.columns

Index(['day', 'arbitrum_avg_liquidity_0.30', 'arbitrum_fee_apr_0.30',
       'arbitrum_fee_tier_0.30', 'arbitrum_fees_usd_0.30',
       'arbitrum_lp_addr_0.30', 'arbitrum_num_trades_0.30',
       'arbitrum_token_pair_0.30', 'arbitrum_tvl_usd_0.30',
       'arbitrum_volume_to_tvl_0.30', 'arbitrum_volume_usd_0.30',
       'arbitrum_net_liquidity_0.30', 'arbitrum_liquidity_added_0.30',
       'arbitrum_liquidity_removed_0.30', 'arbitrum_avg_liquidity_1.00',
       'arbitrum_fee_apr_1.00', 'arbitrum_fee_tier_1.00',
       'arbitrum_fees_usd_1.00', 'arbitrum_lp_addr_1.00',
       'arbitrum_num_trades_1.00', 'arbitrum_token_pair_1.00',
       'arbitrum_tvl_usd_1.00', 'arbitrum_volume_to_tvl_1.00',
       'arbitrum_volume_usd_1.00', 'arbitrum_net_liquidity_1.00',
       'arbitrum_liquidity_added_1.00', 'arbitrum_liquidity_removed_1.00',
       'arbitrum_gas_usd_per_tx', 'arbitrum_median_gas_usd',
       'arbitrum_gas_usd_per_tx_low_gas', 'arbitrum_gas_usd_per_tx_normal_gas',
       'arbitrum_

In [195]:
dai_usdc_df = arb_pool_df[(arb_pool_df['arbitrum_token_pair']=='DAI-USDC')]
dai_usdc = fee_tier_cleaning(dai_usdc_df, 'arbitrum') 



'H' is deprecated and will be removed in a future version, please use 'h' instead.


A value is trying to be set on a copy of a DataFrame or Series through chained assignment using an inplace method.
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.




A value is trying to be set on a copy of a DataFrame or Series through chained assignment using an inplace method.
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].

In [196]:
dai_usdc.columns

Index(['day', 'arbitrum_avg_liquidity_0.01', 'arbitrum_fee_apr_0.01',
       'arbitrum_fee_tier_0.01', 'arbitrum_fees_usd_0.01',
       'arbitrum_lp_addr_0.01', 'arbitrum_num_trades_0.01',
       'arbitrum_token_pair_0.01', 'arbitrum_tvl_usd_0.01',
       'arbitrum_volume_to_tvl_0.01', 'arbitrum_volume_usd_0.01',
       'arbitrum_net_liquidity_0.01', 'arbitrum_liquidity_added_0.01',
       'arbitrum_liquidity_removed_0.01', 'arbitrum_avg_liquidity_0.05',
       'arbitrum_fee_apr_0.05', 'arbitrum_fee_tier_0.05',
       'arbitrum_fees_usd_0.05', 'arbitrum_lp_addr_0.05',
       'arbitrum_num_trades_0.05', 'arbitrum_token_pair_0.05',
       'arbitrum_tvl_usd_0.05', 'arbitrum_volume_to_tvl_0.05',
       'arbitrum_volume_usd_0.05', 'arbitrum_net_liquidity_0.05',
       'arbitrum_liquidity_added_0.05', 'arbitrum_liquidity_removed_0.05',
       'arbitrum_avg_liquidity_0.30', 'arbitrum_fee_apr_0.30',
       'arbitrum_fee_tier_0.30', 'arbitrum_fees_usd_0.30',
       'arbitrum_lp_addr_0.30', 'arb

weth_zro_30['arbitrum_net_liquidity_.30%'] = weth_zro_30['arbitrum_avg_liquidity_.30%'] - weth_zro_30['arbitrum_avg_liquidity_.30%'].shift(1)
weth_zro_30['arbitrum_net_liquidity_.30%'].fillna(0, inplace=True) 
print(weth_zro_30[['day', 'arbitrum_avg_liquidity_.30%', 'arbitrum_net_liquidity_.30%']].head())

weth_zro_100['arbitrum_net_liquidity_1.0%'] = weth_zro_100['arbitrum_avg_liquidity_1.0%'] - weth_zro_100['arbitrum_avg_liquidity_1.0%'].shift(1)
weth_zro_100['arbitrum_net_liquidity_1.0%'].fillna(0, inplace=True) 
print(weth_zro_100[['day', 'arbitrum_avg_liquidity_1.0%', 'arbitrum_net_liquidity_1.0%']].head())

weth_zro = pd.merge(weth_zro_30, weth_zro_100, how='left', on=['day']) 
weth_zro = weth_zro.fillna(0)

In [197]:
bar_colors = ['#1f77b4',  # Blue
              'yellow',  # Replaced Orange with Bright Red-Orange
              '#2ca02c',  # Green
              '#d62728',  # Red
              '#9467bd',  # Purple
              '#8c564b',  # Brown
              '#e377c2',  # Pink
              '#7f7f7f']  # Gray


line_colors = ['#ff6f61',  # Replaced Light Green with Coral
               '#c5b0d5',  # Light Purple
               '#ff9896',  # Light Red
               '#c49c94',  # Light Brown
               '#f7b6d2',  # Light Pink
               '#c7c7c7',  # Light Gray
               '#e5e5e5',  # Very Light Gray
               '#d62728']  # Red


In [198]:
suffixes = ['_0.30', '_0.05', '_0.01', '_1.00']

In [199]:
def corr_matrix_graph(df, fee_tier):
    fee_tier_cols = [f'arbitrum_avg_liquidity_{fee_tier}', f'arbitrum_fees_usd_{fee_tier}', f'arbitrum_tvl_usd_{fee_tier}', f'arbitrum_volume_usd_{fee_tier}']
    corr_matrix = df[fee_tier_cols].corr()
    fig = go.Figure(data=go.Heatmap(
                   z=corr_matrix.values,
                   x=corr_matrix.columns,
                   y=corr_matrix.columns,
                   colorscale='Viridis',
                   zmin=-1, zmax=1))

    fig.update_layout(
        title='Correlation Matrix',
        xaxis_nticks=36
    )

    fig.show()
    return fig 

In [200]:
def trades_to_gas_chart(df, barmode='stack'):
    df_fig = make_subplots(specs=[[{"secondary_y": True}]])
    
    # Add bar traces
    for idx, suffix in enumerate(suffixes):
        trades_column = f'arbitrum_num_trades{suffix}'
        if trades_column in df.columns:
            df_fig.add_trace(
                go.Bar(
                    x=df['day'],
                    y=df[trades_column],
                    name=f'{suffix.replace("_", "")}% # of Trades',
                    marker_color=bar_colors[idx]  # Set the color for the bar
                ),
                secondary_y=False
            )

    # Ensure line_colors has enough colors for your lines
    df_fig.add_trace(
        go.Scatter(
            x=df['day'],
            y=df['arbitrum_median_gas_usd'],
            name='Median Gas',
            mode='lines',
            line=dict(color=line_colors[0])  # Set the color for the Median Gas line
        ),
        secondary_y=True
    )

    df_fig.add_trace(
        go.Scatter(
            x=df['day'],
            y=df['arbitrum_gas_usd_per_tx'],
            name='Avg Gas',
            mode='lines',
            line=dict(color=line_colors[1])  # Set the color for the Avg Gas line
        ),
        secondary_y=True
    )

    df_fig.update_layout(
        title='Trades to Gas Comparison',
        barmode=barmode  # Set the bar mode to either 'group' for side-by-side or 'stack' for stacked
    )

    df_fig.update_xaxes(title_text="Date")

    df_fig.show()

    return df_fig


In [201]:
def liq_to_gas_chart(df, barmode='stack'):
    df_fig = make_subplots(specs=[[{"secondary_y": True}]])
        
    # Add bar traces
    for idx, suffix in enumerate(suffixes):
        liq_column = f'arbitrum_avg_liquidity{suffix}'
        if liq_column in df.columns:
            df_fig.add_trace(
                go.Bar(
                    x=df['day'],
                    y=df[liq_column],
                    name=f'{suffix.replace("_", "")}% Liquidity',
                    marker_color=bar_colors[idx]  # Set the color for the bar
                ),
                secondary_y=False
            )

    # Add line traces
    # Ensure line_colors has enough colors for your lines
    df_fig.add_trace(
        go.Scatter(
            x=df['day'],
            y=df['arbitrum_median_gas_usd'],
            name='Median Gas',
            mode='lines',
            line=dict(color=line_colors[0])  # Set the color for the Median Gas line
        ),
        secondary_y=True
    )

    df_fig.add_trace(
        go.Scatter(
            x=df['day'],
            y=df['arbitrum_gas_usd_per_tx'],
            name='Avg Gas',
            mode='lines',
            line=dict(color=line_colors[1])  # Set the color for the Avg Gas line
        ),
        secondary_y=True
    )

    df_fig.update_layout(
        title='Liquidity to Gas Comparison',
        barmode=barmode  # Set the bar mode to either 'group' for side-by-side or 'stack' for stacked
    )

    df_fig.update_xaxes(title_text="Date")

    df_fig.show()

    return df_fig


In [202]:
def net_liq_to_gas_chart(df, barmode='stack'):
    df_fig = make_subplots(specs=[[{"secondary_y": True}]])
        
    # Add bar traces
    for idx, suffix in enumerate(suffixes):
        net_liq_column = f'arbitrum_net_liquidity{suffix}'
        if net_liq_column in df.columns:
            df_fig.add_trace(
                go.Bar(
                    x=df['day'],
                    y=df[net_liq_column],
                    name=f'{suffix.replace("_", "")}% Net Liquidity Added',
                    marker_color=bar_colors[idx]  # Set the color for the bar
                ),
                secondary_y=False
            )

    # Add line traces
    # Note: Ensure line_colors has at least two colors
    df_fig.add_trace(
        go.Scatter(
            x=df['day'],
            y=df['arbitrum_median_gas_usd'],
            name='Median Gas',
            mode='lines',
            line=dict(color=line_colors[0])  # Set the color for the Median Gas line
        ),
        secondary_y=True
    )

    df_fig.add_trace(
        go.Scatter(
            x=df['day'],
            y=df['arbitrum_gas_usd_per_tx'],
            name='Avg Gas',
            mode='lines',
            line=dict(color=line_colors[1])  # Set the color for the Avg Gas line
        ),
        secondary_y=True
    )

    df_fig.update_layout(
        title='Net Liquidity to Gas Comparison',
        barmode=barmode  # Set the bar mode to either 'group' for side-by-side or 'stack' for stacked
    )

    df_fig.update_xaxes(title_text="Date")

    df_fig.show()

    return df_fig


In [203]:
def vol_tvl_to_gas_chart(df, barmode='stack'):
    df_fig = make_subplots(specs=[[{"secondary_y": True}]])
        
    # Add bar traces
    for idx, suffix in enumerate(suffixes):
        vol_tvl_column = f'arbitrum_volume_to_tvl{suffix}'
        if vol_tvl_column in df.columns:
            df_fig.add_trace(
                go.Bar(
                    x=df['day'],
                    y=df[vol_tvl_column],
                    name=f'{suffix.replace("_", "")}% Volume to TVL Ratio',
                    marker_color=bar_colors[idx]  # Set the color for the bar
                ),
                secondary_y=False
            )

    # Add line traces
    # Note: idx should not be reused here as there are only two lines and idx may be out of range for line_colors
    df_fig.add_trace(
        go.Scatter(
            x=df['day'],
            y=df['arbitrum_median_gas_usd'],
            name='Median Gas',
            mode='lines',
            line=dict(color=line_colors[0])  # Set the color for the Median Gas line
        ),
        secondary_y=True
    )

    df_fig.add_trace(
        go.Scatter(
            x=df['day'],
            y=df['arbitrum_gas_usd_per_tx'],
            name='Avg Gas',
            mode='lines',
            line=dict(color=line_colors[1])  # Set the color for the Avg Gas line
        ),
        secondary_y=True
    )

    df_fig.update_layout(
        title='Volume/TVL Ratio to Gas Comparison',
        barmode=barmode  # Set the bar mode to either 'group' for side-by-side or 'stack' for stacked
    )

    df_fig.update_xaxes(title_text="Date")

    df_fig.show()

    return df_fig


In [204]:
def liquidity_to_trades_chart(df, barmode='stack'):
    df_fig = make_subplots(specs=[[{"secondary_y": True}]])

    # Add bar traces
    for idx, suffix in enumerate(suffixes):
        liquidity_column = f'arbitrum_avg_liquidity{suffix}'
        if liquidity_column in df.columns:
            df_fig.add_trace(
                go.Bar(
                    x=df['day'],
                    y=df[liquidity_column],
                    name=f'{suffix.replace("_", "")}% Liquidity',
                    marker_color=bar_colors[idx]  # Set the color for the bar
                ),
                secondary_y=False
            )
    
    # Add line traces
    for idx, suffix in enumerate(suffixes):
        trades_column = f'arbitrum_num_trades{suffix}'
        if trades_column in df.columns:
            df_fig.add_trace(
                go.Scatter(
                    x=df['day'],
                    y=df[trades_column],
                    name=f'{suffix.replace("_", "")}% # Of Trades',
                    mode='lines',
                    line=dict(color=line_colors[idx])  # Set the color for the line
                ),
                secondary_y=True
            )
    
    df_fig.update_layout(
        title='Liquidity to # of Trades Comparison',
        barmode=barmode
    )

    df_fig.update_xaxes(title_text="Date")
    
    df_fig.show()
    return df_fig


In [205]:
def gas_to_rev_chart(df, barmode='stack'):
    df_fig = make_subplots(specs=[[{"secondary_y": True}]])
        
    for idx, suffix in enumerate(suffixes):
        fee_column = f'arbitrum_fees_usd{suffix}'
        if fee_column in df.columns:
            df_fig.add_trace(
                go.Bar(
                    x=df['day'],
                    y=df[fee_column],
                    name=f'{suffix.replace("_", "")}% Fee Revenue',
                    marker_color=bar_colors[idx % len(bar_colors)]  # Use color based on index
                ),
                secondary_y=False
            )

    # Ensure `idx` is set correctly for line traces
    df_fig.add_trace(
        go.Scatter(
            x=df['day'],
            y=df['arbitrum_median_gas_usd'],
            name='Median Gas',
            mode='lines',
            line=dict(color=line_colors[0])  # Use the first color in the line colors
        ),
        secondary_y=True
    )

    df_fig.add_trace(
        go.Scatter(
            x=df['day'],
            y=df['arbitrum_gas_usd_per_tx'],
            name='Avg Gas',
            mode='lines',
            line=dict(color=line_colors[1])  # Use the second color in the line colors
        ),
        secondary_y=True
    )

    df_fig.update_layout(
        title='Fee Revenue to Gas Comparison',
        barmode=barmode  # Set the bar mode to either 'group' for side-by-side or 'stack' for stacked
    )

    df_fig.update_xaxes(title_text="Date")

    df_fig.show()

    return df_fig

##### Volatile-Volatile Pair Visualizations
- WETH-ZRO

In [206]:
print([col for col in weth_zro.columns if col.startswith('arbitrum_avg_liquidity')])

['arbitrum_avg_liquidity_0.30', 'arbitrum_avg_liquidity_1.00']


weth_zro_corr_matrix = corr_matrix_graph(weth_zro, '030')

In [207]:
weth_zro.columns

Index(['day', 'arbitrum_avg_liquidity_0.30', 'arbitrum_fee_apr_0.30',
       'arbitrum_fee_tier_0.30', 'arbitrum_fees_usd_0.30',
       'arbitrum_lp_addr_0.30', 'arbitrum_num_trades_0.30',
       'arbitrum_token_pair_0.30', 'arbitrum_tvl_usd_0.30',
       'arbitrum_volume_to_tvl_0.30', 'arbitrum_volume_usd_0.30',
       'arbitrum_net_liquidity_0.30', 'arbitrum_liquidity_added_0.30',
       'arbitrum_liquidity_removed_0.30', 'arbitrum_avg_liquidity_1.00',
       'arbitrum_fee_apr_1.00', 'arbitrum_fee_tier_1.00',
       'arbitrum_fees_usd_1.00', 'arbitrum_lp_addr_1.00',
       'arbitrum_num_trades_1.00', 'arbitrum_token_pair_1.00',
       'arbitrum_tvl_usd_1.00', 'arbitrum_volume_to_tvl_1.00',
       'arbitrum_volume_usd_1.00', 'arbitrum_net_liquidity_1.00',
       'arbitrum_liquidity_added_1.00', 'arbitrum_liquidity_removed_1.00',
       'arbitrum_gas_usd_per_tx', 'arbitrum_median_gas_usd',
       'arbitrum_gas_usd_per_tx_low_gas', 'arbitrum_gas_usd_per_tx_normal_gas',
       'arbitrum_

In [208]:
weth_zro_gas_to_rev_fig = gas_to_rev_chart(weth_zro, barmode='stack')

In [209]:
weth_zro_liquidity_to_trades_fig = liquidity_to_trades_chart(weth_zro)

In [210]:
weth_zro_vol_tvl_ratio_fig = vol_tvl_to_gas_chart(weth_zro)

In [211]:
weth_zro_net_liq_to_gas_fig = net_liq_to_gas_chart(weth_zro, 'stack')

In [212]:
weth_zro_liq_to_gas_fig = liq_to_gas_chart(weth_zro)

In [213]:
weth_zro_trades_to_gas_fig = trades_to_gas_chart(weth_zro)

##### Volatile-Stable Pair Visualizations

- USDC-WETH

In [214]:
liquidity_cols = [col for col in usdc_weth.columns if col.startswith('arbitrum_avg_liquidity')]
usdc_weth_liq_ranked = usdc_weth[liquidity_cols]
usdc_weth_liq_ranked


Unnamed: 0,arbitrum_avg_liquidity_0.01,arbitrum_avg_liquidity_0.05,arbitrum_avg_liquidity_0.30,arbitrum_avg_liquidity_1.00
0,2.075720e+16,4.911041e+18,3.276466e+17,0.000000e+00
1,2.004641e+16,5.013677e+18,3.281683e+17,0.000000e+00
2,1.967474e+16,4.963207e+18,3.281485e+17,0.000000e+00
3,2.611485e+16,5.491211e+18,3.281728e+17,0.000000e+00
4,1.827394e+16,4.962887e+18,3.278365e+17,0.000000e+00
...,...,...,...,...
165,3.309061e+17,7.040240e+18,4.054977e+17,0.000000e+00
166,3.169404e+17,6.908095e+18,4.054185e+17,5.130116e+15
167,2.425884e+17,6.909834e+18,0.000000e+00,0.000000e+00
168,8.079644e+17,6.643308e+18,4.054406e+17,0.000000e+00


In [215]:
usdc_weth_gas_to_rev_fig = gas_to_rev_chart(usdc_weth, barmode='stack')

In [216]:
usdc_weth_liquidity_to_trades_fig = liquidity_to_trades_chart(usdc_weth, barmode='stack')

In [217]:
usdc_weth_vol_tvl_ratio_fig = vol_tvl_to_gas_chart(usdc_weth, barmode='stack')

In [218]:
usdc_weth_net_liq_to_gas_fig = net_liq_to_gas_chart(usdc_weth, barmode='stack')

In [219]:
usdc_weth_liq_to_gas_fig = liq_to_gas_chart(usdc_weth, barmode='stack')

In [220]:
usdc_weth_trades_to_gas_fig = trades_to_gas_chart(usdc_weth, barmode='stack')

##### Stable-Stable Pair Visualizations
- DAI-USDC

In [221]:
dai_usdc_gas_to_rev_fig = gas_to_rev_chart(dai_usdc, barmode='stack')

In [222]:
dai_usdc_liquidity_to_trades_fig = liquidity_to_trades_chart(dai_usdc, barmode='stack')

In [223]:
dai_usdc_vol_tvl_ratio_fig = vol_tvl_to_gas_chart(dai_usdc, barmode='stack')

In [224]:
dai_usdc_net_liq_to_gas_fig = net_liq_to_gas_chart(dai_usdc, barmode='stack')

In [225]:
dai_usdc_liq_to_gas_fig = liq_to_gas_chart(dai_usdc, 'stack')

In [226]:
dai_usdc_trades_to_gas_fig = trades_to_gas_chart(dai_usdc, 'stack')

## Optimism

## Base

## All Network Aggregation

### Aggregate Pool Data 

In [227]:
vol_vol = ['WBTC-WETH','ARB-WETH','WETH-ZRO','WETH-wstETH','WETH-XAI','OP-WETH','WETH-WLD','SNX-WETH','MAI-WETH',
           'MIGGLES-WETH','BRETT-WETH','AERO-WETH','DEGEN-WETH','TOSHI-WETH']
vol_stable = ['USDC-WETH','USDT-WETH','USDC-WBTC','USDC-WLD','USDbC-WETH']
stable_stable = ['DAI-USDC','USDC-USDC','USDC-USDT']

arb_pool_df.columns

arb_pool_filtered = arb_pool_df.drop(columns=['arbitrum_lp_addr','arbitrum_fee_tier','arbitrum_avg_liquidity'])

arb_vol_vol = arb_pool_filtered[arb_pool_filtered['arbitrum_token_pair'].isin(vol_vol)]
arb_vol_stable = arb_pool_filtered[arb_pool_filtered['arbitrum_token_pair'].isin(vol_stable)]
arb_stable_stable = arb_pool_filtered[arb_pool_filtered['arbitrum_token_pair'].isin(stable_stable)]


In [228]:
base_pool_df['day'] = pd.to_datetime(base_pool_df['day'])
base_pool_df.columns

base_pool_filtered = base_pool_df.drop(columns=['base_lp_addr','base_fee_tier','base_avg_liquidity']) 

base_vol_vol = base_pool_filtered[base_pool_filtered['base_token_pair'].isin(vol_vol)]
base_vol_stable = base_pool_filtered[base_pool_filtered['base_token_pair'].isin(vol_stable)]
base_stable_stable = base_pool_filtered[base_pool_filtered['base_token_pair'].isin(stable_stable)]


In [229]:
op_pool_df['day'] = pd.to_datetime(op_pool_df['day'])
op_pool_df.columns

op_pool_filtered = op_pool_df.drop(columns=['optimism_lp_addr','optimism_fee_tier','optimism_avg_liquidity']) 
op_vol_vol = op_pool_filtered[op_pool_filtered['optimism_token_pair'].isin(vol_vol)]
op_vol_stable = op_pool_filtered[op_pool_filtered['optimism_token_pair'].isin(vol_stable)]
op_stable_stable = op_pool_filtered[op_pool_filtered['optimism_token_pair'].isin(stable_stable)]


In [230]:
merged_stable_stable = pd.merge(op_stable_stable, base_stable_stable, on='day', how='outer')
merged_stable_stable = merged_stable_stable.merge(arb_stable_stable, on='day', how='outer')

merged_vol_vol = pd.merge(op_vol_vol, base_vol_vol, on='day', how='outer')
merged_vol_vol = merged_vol_vol.merge(arb_vol_vol, on='day', how='outer')

merged_vol_stable = pd.merge(op_vol_stable, base_vol_stable, on='day', how='outer')
merged_vol_stable = merged_vol_stable.merge(arb_vol_stable, on='day', how='outer')


In [231]:
merged_vol_vol['fee_apr'] = merged_vol_vol[['optimism_fee_apr', 'base_fee_apr','arbitrum_fee_apr']].mean(axis=1)
merged_vol_vol['fees_usd'] = merged_vol_vol[['optimism_fees_usd', 'base_fees_usd','arbitrum_fees_usd']].sum(axis=1)
merged_vol_vol['num_trades'] = merged_vol_vol[['optimism_num_trades', 'base_num_trades','arbitrum_num_trades']].sum(axis=1)
merged_vol_vol['tvl_usd'] = merged_vol_vol[['optimism_tvl_usd', 'base_tvl_usd','arbitrum_tvl_usd']].sum(axis=1)
merged_vol_vol['volume_to_tvl'] = merged_vol_vol[['optimism_volume_to_tvl', 'base_volume_to_tvl','arbitrum_volume_to_tvl']].mean(axis=1)
merged_vol_vol['volume_usd'] = merged_vol_vol[['optimism_volume_usd', 'base_volume_usd','arbitrum_volume_usd']].sum(axis=1)

In [232]:
merged_vol_stable['fee_apr'] = merged_vol_stable[['optimism_fee_apr', 'base_fee_apr','arbitrum_fee_apr']].mean(axis=1)
merged_vol_stable['fees_usd'] = merged_vol_stable[['optimism_fees_usd', 'base_fees_usd','arbitrum_fees_usd']].sum(axis=1)
merged_vol_stable['num_trades'] = merged_vol_stable[['optimism_num_trades', 'base_num_trades','arbitrum_num_trades']].sum(axis=1)
merged_vol_stable['tvl_usd'] = merged_vol_stable[['optimism_tvl_usd', 'base_tvl_usd','arbitrum_tvl_usd']].sum(axis=1)
merged_vol_stable['volume_to_tvl'] = merged_vol_stable[['optimism_volume_to_tvl', 'base_volume_to_tvl','arbitrum_volume_to_tvl']].mean(axis=1)
merged_vol_stable['volume_usd'] = merged_vol_stable[['optimism_volume_usd', 'base_volume_usd','arbitrum_volume_usd']].sum(axis=1)

In [233]:
merged_stable_stable['fee_apr'] = merged_stable_stable[['optimism_fee_apr', 'base_fee_apr','arbitrum_fee_apr']].mean(axis=1)
merged_stable_stable['fees_usd'] = merged_stable_stable[['optimism_fees_usd', 'base_fees_usd','arbitrum_fees_usd']].sum(axis=1)
merged_stable_stable['num_trades'] = merged_stable_stable[['optimism_num_trades', 'base_num_trades','arbitrum_num_trades']].sum(axis=1)
merged_stable_stable['tvl_usd'] = merged_stable_stable[['optimism_tvl_usd', 'base_tvl_usd','arbitrum_tvl_usd']].sum(axis=1)
merged_stable_stable['volume_to_tvl'] = merged_stable_stable[['optimism_volume_to_tvl', 'base_volume_to_tvl','arbitrum_volume_to_tvl']].mean(axis=1)
merged_stable_stable['volume_usd'] = merged_stable_stable[['optimism_volume_usd', 'base_volume_usd','arbitrum_volume_usd']].sum(axis=1)

In [234]:
agg_funcs = {
    'tvl_usd': 'sum',
    'num_trades': 'sum',
    'fee_apr': 'mean',
    'fees_usd': 'sum',
    'volume_usd': 'sum',
    'volume_to_tvl': 'mean'
}

def aggregate_metrics(df, agg_funcs):
    aggregated_df = df.groupby('day').agg(agg_funcs).reset_index()
    return aggregated_df

In [235]:
merged_stable_stable = merged_stable_stable[['day', 'fee_apr', 'fees_usd', 'num_trades', 'tvl_usd', 'volume_to_tvl', 'volume_usd']]
merged_vol_stable = merged_vol_stable[['day', 'fee_apr', 'fees_usd', 'num_trades', 'tvl_usd', 'volume_to_tvl', 'volume_usd']]
merged_vol_vol = merged_vol_vol[['day', 'fee_apr', 'fees_usd', 'num_trades', 'tvl_usd', 'volume_to_tvl', 'volume_usd']]

agg_stable_stable = aggregate_metrics(merged_stable_stable, agg_funcs)
agg_vol_stable = aggregate_metrics(merged_vol_stable, agg_funcs)
agg_vol_vol = aggregate_metrics(merged_vol_vol, agg_funcs)

In [236]:
agg_vol_vol

Unnamed: 0,day,tvl_usd,num_trades,fee_apr,fees_usd,volume_usd,volume_to_tvl
0,2024-07-14 00:00:00+00:00,1.625222e+10,289900.0,0.011635,4.146620e+05,3.954600e+08,0.032466
1,2024-07-14 01:00:00+00:00,1.245797e+10,172582.0,0.009540,1.940160e+05,1.429114e+08,0.017695
2,2024-07-14 02:00:00+00:00,1.647946e+10,343310.0,0.018335,5.366296e+05,2.818946e+08,0.060604
3,2024-07-14 03:00:00+00:00,1.190710e+10,104842.0,0.007529,1.686688e+05,1.051103e+08,0.011546
4,2024-07-14 04:00:00+00:00,1.851575e+10,263032.0,0.009214,4.603534e+05,4.647452e+08,0.019965
...,...,...,...,...,...,...,...
165,2024-07-20 21:00:00+00:00,2.064725e+10,234200.0,0.055589,7.585593e+05,3.836818e+08,0.051636
166,2024-07-20 22:00:00+00:00,1.400969e+10,206552.0,0.079238,1.075701e+06,2.604299e+08,0.053920
167,2024-07-20 23:00:00+00:00,1.843391e+10,228481.0,0.043500,9.674947e+05,2.507662e+08,0.027315
168,2024-07-21 00:00:00+00:00,1.576911e+10,214247.0,0.074008,1.346892e+06,3.732021e+08,0.046817


### Aggregate Gas

In [237]:
arbitrum_gas_df

Unnamed: 0,day,arbitrum_gas_usd_per_tx,arbitrum_median_gas_usd,arbitrum_gas_usd_per_tx_low_gas,arbitrum_gas_usd_per_tx_normal_gas,arbitrum_gas_usd_per_tx_high_gas,arbitrum_median_gas_usd_low_gas,arbitrum_median_gas_usd_normal_gas,arbitrum_median_gas_usd_high_gas
0,2024-04-22 00:00:00+00:00,0.010387,0.006123,0,1,0,0,1,0
1,2024-04-22 01:00:00+00:00,0.010970,0.006445,0,0,1,0,1,0
2,2024-04-22 02:00:00+00:00,0.009510,0.005550,0,1,0,0,1,0
3,2024-04-22 03:00:00+00:00,0.010174,0.005812,0,1,0,0,1,0
4,2024-04-22 04:00:00+00:00,0.009947,0.006139,0,1,0,0,1,0
...,...,...,...,...,...,...,...,...,...
2157,2024-07-20 21:00:00+00:00,0.009192,0.005052,0,1,0,0,1,0
2158,2024-07-20 22:00:00+00:00,0.010094,0.005563,0,1,0,0,1,0
2159,2024-07-20 23:00:00+00:00,0.009521,0.005383,0,1,0,0,1,0
2160,2024-07-21 00:00:00+00:00,0.009502,0.005291,0,1,0,0,1,0


In [238]:
optimism_gas_df.rename(columns={"dt":"day"}, inplace=True)
optimism_gas_df['day'] = pd.to_datetime(optimism_gas_df['day']) 

In [239]:
base_gas_df.rename(columns={"dt":"day"}, inplace=True)
base_gas_df['day'] = pd.to_datetime(base_gas_df['day']) 

In [240]:
merged_gas_df = pd.merge(arbitrum_gas_df,optimism_gas_df,how='inner',on='day' )
merged_gas_df = merged_gas_df.merge(base_gas_df, how='inner', on='day')
merged_gas_df.columns

Index(['day', 'arbitrum_gas_usd_per_tx', 'arbitrum_median_gas_usd',
       'arbitrum_gas_usd_per_tx_low_gas', 'arbitrum_gas_usd_per_tx_normal_gas',
       'arbitrum_gas_usd_per_tx_high_gas', 'arbitrum_median_gas_usd_low_gas',
       'arbitrum_median_gas_usd_normal_gas',
       'arbitrum_median_gas_usd_high_gas', 'optimism_gas_usd_per_tx',
       'optimism_median_gas_usd', 'optimism_gas_usd_per_tx_low_gas',
       'optimism_gas_usd_per_tx_normal_gas',
       'optimism_gas_usd_per_tx_high_gas', 'optimism_median_gas_usd_low_gas',
       'optimism_median_gas_usd_normal_gas',
       'optimism_median_gas_usd_high_gas', 'base_gas_usd_per_tx',
       'base_median_gas_usd', 'base_gas_usd_per_tx_low_gas',
       'base_gas_usd_per_tx_normal_gas', 'base_gas_usd_per_tx_high_gas',
       'base_median_gas_usd_low_gas', 'base_median_gas_usd_normal_gas',
       'base_median_gas_usd_high_gas'],
      dtype='object')

In [241]:
merged_gas_df['avg_gas'] = merged_gas_df[['arbitrum_gas_usd_per_tx','optimism_gas_usd_per_tx','base_gas_usd_per_tx']].mean(axis=1)
merged_gas_df['median_gas'] = merged_gas_df[['arbitrum_median_gas_usd','optimism_median_gas_usd','base_median_gas_usd']].mean(axis=1)
merged_gas_df = merged_gas_df[['day','avg_gas','median_gas']]

In [242]:
agg_gas_df = merged_gas_df.groupby('day').mean()
agg_gas_df = gas_classifier(agg_gas_df)

#### L1 and L2 Gas Comparison

In [243]:
l2_agg = agg_gas_df[['avg_gas','median_gas']]
l2_agg.reset_index(inplace=True)

In [244]:
merged_gas_df

Unnamed: 0,day,avg_gas,median_gas
0,2024-04-22 00:00:00+00:00,0.032086,0.006833
1,2024-04-22 01:00:00+00:00,0.041133,0.007923
2,2024-04-22 02:00:00+00:00,0.030572,0.007440
3,2024-04-22 03:00:00+00:00,0.067419,0.010030
4,2024-04-22 04:00:00+00:00,0.038169,0.010339
...,...,...,...
2157,2024-07-20 21:00:00+00:00,0.085266,0.014616
2158,2024-07-20 22:00:00+00:00,0.056465,0.014726
2159,2024-07-20 23:00:00+00:00,0.162665,0.011997
2160,2024-07-21 00:00:00+00:00,0.059415,0.013390


In [245]:
eth_gas_df.sort_values(by='dt', inplace=True)
eth_gas_df.rename(columns={'dt':'day'}, inplace=True)

In [246]:
eth_gas_df

Unnamed: 0,day,eth_gas_usd_per_tx,eth_median_gas_usd
72,2024-04-29 00:00:00.000 UTC,3.185942,1.421670
36,2024-04-30 00:00:00.000 UTC,4.452919,1.855182
8,2024-05-01 00:00:00.000 UTC,3.208347,1.421428
10,2024-05-02 00:00:00.000 UTC,2.741343,1.237304
42,2024-05-03 00:00:00.000 UTC,2.702320,1.130987
...,...,...,...
2,2024-07-23 00:00:00.000 UTC,2.631963,0.970493
34,2024-07-24 00:00:00.000 UTC,2.369506,0.852330
41,2024-07-25 00:00:00.000 UTC,2.340331,0.845032
45,2024-07-26 00:00:00.000 UTC,1.499424,0.526533


In [247]:
base_gas_df = base_gas_df.rename(columns={'dt':'day'})
base_gas_df['day'] = pd.to_datetime(base_gas_df['day'])

In [248]:
eth_gas_df['day'] = pd.to_datetime(eth_gas_df['day'])

desc_gas_df = pd.merge(eth_gas_df,arbitrum_gas_df[['day','arbitrum_gas_usd_per_tx','arbitrum_median_gas_usd']],
                       how='inner', on='day')
desc_gas_df = pd.merge(desc_gas_df, optimism_gas_df[['day','optimism_gas_usd_per_tx','optimism_median_gas_usd']],
                       how='inner', on='day')
desc_gas_df = pd.merge(desc_gas_df, base_gas_df[['day','base_gas_usd_per_tx','base_median_gas_usd']],
                       how='inner', on='day')
desc_gas_df = pd.merge(desc_gas_df, merged_gas_df[['day','avg_gas','median_gas']],
                       how='inner', on='day')

In [249]:
desc_gas_df

Unnamed: 0,day,eth_gas_usd_per_tx,eth_median_gas_usd,arbitrum_gas_usd_per_tx,arbitrum_median_gas_usd,optimism_gas_usd_per_tx,optimism_median_gas_usd,base_gas_usd_per_tx,base_median_gas_usd,avg_gas,median_gas
0,2024-04-29 00:00:00+00:00,3.185942,1.421670,0.009756,0.004944,0.107252,0.027768,0.064878,0.018544,0.060628,0.017085
1,2024-04-30 00:00:00+00:00,4.452919,1.855182,0.010127,0.005228,0.085072,0.018183,0.064134,0.016113,0.053111,0.013175
2,2024-05-01 00:00:00+00:00,3.208347,1.421428,0.010425,0.005779,0.085598,0.020022,0.066837,0.015063,0.054287,0.013622
3,2024-05-02 00:00:00+00:00,2.741343,1.237304,0.009378,0.004628,0.077186,0.021256,0.088339,0.013878,0.058301,0.013254
4,2024-05-03 00:00:00+00:00,2.702320,1.130987,0.007915,0.004382,0.134850,0.012204,0.075886,0.026621,0.072884,0.014402
...,...,...,...,...,...,...,...,...,...,...,...
79,2024-07-17 00:00:00+00:00,4.547521,1.845973,0.009696,0.005991,0.119131,0.025731,0.021095,0.003423,0.049974,0.011715
80,2024-07-18 00:00:00+00:00,4.804253,1.869219,0.008712,0.005965,0.119192,0.033435,0.028094,0.005637,0.051999,0.015013
81,2024-07-19 00:00:00+00:00,3.444182,1.287751,0.007343,0.006156,0.125296,0.032297,0.026221,0.003512,0.052953,0.013988
82,2024-07-20 00:00:00+00:00,2.087966,0.732594,0.009370,0.005517,0.128515,0.031127,0.054633,0.005302,0.064173,0.013982


##### AVG Gas Comparison

In [250]:
# Calculate means
eth_avg_gas_mean = desc_gas_df['eth_gas_usd_per_tx'].mean()
arbitrum_avg_gas_mean = desc_gas_df['arbitrum_gas_usd_per_tx'].mean()
optimism_avg_gas_mean = desc_gas_df['optimism_gas_usd_per_tx'].mean()
base_avg_gas_mean = desc_gas_df['base_gas_usd_per_tx'].mean()
l2_avg_gas_mean = desc_gas_df['avg_gas'].mean()

# Calculate percentage differences
arbitrum_vs_eth = (eth_avg_gas_mean - arbitrum_avg_gas_mean) / eth_avg_gas_mean * 100
optimism_vs_eth = (eth_avg_gas_mean - optimism_avg_gas_mean) / eth_avg_gas_mean * 100
base_vs_eth = (eth_avg_gas_mean - base_avg_gas_mean) / eth_avg_gas_mean * 100
l2_agg_vs_eth = (eth_avg_gas_mean - l2_avg_gas_mean) / eth_avg_gas_mean * 100


# Print results
print(f'ETH avg gas: ${eth_avg_gas_mean:.2f}')
print(f'Arbitrum avg gas: ${arbitrum_avg_gas_mean:.4f}')
print(f'Optimism avg gas: ${optimism_avg_gas_mean:.4f}')
print(f'Base avg gas: ${base_avg_gas_mean:.4f}')
print(f'L2 avg gas: ${l2_avg_gas_mean:.4f}')
print(f'Arbitrum vs ETH: {arbitrum_vs_eth:.2f}% cheaper')
print(f'Optimism vs ETH: {optimism_vs_eth:.2f}% cheaper')
print(f'Base vs ETH: {base_vs_eth:.2f}% cheaper')
print(f'L2 Agg vs ETH: {l2_agg_vs_eth:.2f}% cheaper')


ETH avg gas: $3.52
Arbitrum avg gas: $0.0133
Optimism avg gas: $0.0957
Base avg gas: $0.0579
L2 avg gas: $0.0556
Arbitrum vs ETH: 99.62% cheaper
Optimism vs ETH: 97.28% cheaper
Base vs ETH: 98.36% cheaper
L2 Agg vs ETH: 98.42% cheaper


In [251]:
# Data for plotting
labels = ['ETH', 'Arbitrum', 'Optimism', 'Base', 'L2 Aggregated']
avg_gas = [eth_avg_gas_mean, arbitrum_avg_gas_mean, optimism_avg_gas_mean, base_avg_gas_mean, l2_avg_gas_mean]

# Create a bar chart
fig = go.Figure(data=[go.Bar(x=labels, y=avg_gas, text=[f'${x:.4f}' for x in avg_gas], textposition='auto')])

# Update layout
fig.update_layout(
    title='Average Gas Fees Comparison',
    xaxis_title='Network',
    yaxis_title='Average Gas Fee (USD)',
    yaxis=dict(tickprefix='$', tickformat=',.2f'),
    template='plotly_white'
)

# Show the plot
fig.show()

### Combined Gas and Pools

In [252]:
agg_vol_vol

Unnamed: 0,day,tvl_usd,num_trades,fee_apr,fees_usd,volume_usd,volume_to_tvl
0,2024-07-14 00:00:00+00:00,1.625222e+10,289900.0,0.011635,4.146620e+05,3.954600e+08,0.032466
1,2024-07-14 01:00:00+00:00,1.245797e+10,172582.0,0.009540,1.940160e+05,1.429114e+08,0.017695
2,2024-07-14 02:00:00+00:00,1.647946e+10,343310.0,0.018335,5.366296e+05,2.818946e+08,0.060604
3,2024-07-14 03:00:00+00:00,1.190710e+10,104842.0,0.007529,1.686688e+05,1.051103e+08,0.011546
4,2024-07-14 04:00:00+00:00,1.851575e+10,263032.0,0.009214,4.603534e+05,4.647452e+08,0.019965
...,...,...,...,...,...,...,...
165,2024-07-20 21:00:00+00:00,2.064725e+10,234200.0,0.055589,7.585593e+05,3.836818e+08,0.051636
166,2024-07-20 22:00:00+00:00,1.400969e+10,206552.0,0.079238,1.075701e+06,2.604299e+08,0.053920
167,2024-07-20 23:00:00+00:00,1.843391e+10,228481.0,0.043500,9.674947e+05,2.507662e+08,0.027315
168,2024-07-21 00:00:00+00:00,1.576911e+10,214247.0,0.074008,1.346892e+06,3.732021e+08,0.046817


In [253]:
agg_stable_stable = agg_stable_stable.merge(agg_gas_df, how='inner', on='day')
agg_vol_stable = agg_vol_stable.merge(agg_gas_df, how='inner', on='day')
agg_vol_vol = agg_vol_vol.merge(agg_gas_df, how='inner', on='day')

In [254]:
agg_vol_vol

Unnamed: 0,day,tvl_usd,num_trades,fee_apr,fees_usd,volume_usd,volume_to_tvl,avg_gas,median_gas,avg_gas_low_gas,avg_gas_normal_gas,avg_gas_high_gas,median_gas_low_gas,median_gas_normal_gas,median_gas_high_gas
0,2024-07-14 00:00:00+00:00,1.625222e+10,289900.0,0.011635,4.146620e+05,3.954600e+08,0.032466,0.016990,0.003415,1,0,0,1,0,0
1,2024-07-14 01:00:00+00:00,1.245797e+10,172582.0,0.009540,1.940160e+05,1.429114e+08,0.017695,0.017306,0.002803,1,0,0,1,0,0
2,2024-07-14 02:00:00+00:00,1.647946e+10,343310.0,0.018335,5.366296e+05,2.818946e+08,0.060604,0.017207,0.002690,1,0,0,1,0,0
3,2024-07-14 03:00:00+00:00,1.190710e+10,104842.0,0.007529,1.686688e+05,1.051103e+08,0.011546,0.012653,0.002732,1,0,0,1,0,0
4,2024-07-14 04:00:00+00:00,1.851575e+10,263032.0,0.009214,4.603534e+05,4.647452e+08,0.019965,0.014438,0.002655,1,0,0,1,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
165,2024-07-20 21:00:00+00:00,2.064725e+10,234200.0,0.055589,7.585593e+05,3.836818e+08,0.051636,0.085266,0.014616,0,0,1,0,1,0
166,2024-07-20 22:00:00+00:00,1.400969e+10,206552.0,0.079238,1.075701e+06,2.604299e+08,0.053920,0.056465,0.014726,0,1,0,0,1,0
167,2024-07-20 23:00:00+00:00,1.843391e+10,228481.0,0.043500,9.674947e+05,2.507662e+08,0.027315,0.162665,0.011997,0,0,1,0,1,0
168,2024-07-21 00:00:00+00:00,1.576911e+10,214247.0,0.074008,1.346892e+06,3.732021e+08,0.046817,0.059415,0.013390,0,1,0,0,1,0


## Trader Behavior

In [255]:
trader_classifier_df['dt'] = pd.to_datetime(trader_classifier_df['dt'])
trader_classifier_df.rename(columns={"dt":"day"}, inplace=True)
trader_classifier_df

Unnamed: 0,trader_type,day,tx_count,total_volume_usd,avg_order_size_usd,unique_contracts
0,Professional,2024-07-21 15:00:00+00:00,928,1047810.93,1183.967153,104
1,Retail,2024-07-21 15:00:00+00:00,176,91856.49,540.332294,60
2,Professional,2024-07-21 14:00:00+00:00,7552,12913378.38,1766.536030,252
3,Retail,2024-07-21 14:00:00+00:00,1790,294765.21,170.680492,196
4,Professional,2024-07-21 13:00:00+00:00,4472,5883981.27,1367.731583,211
...,...,...,...,...,...,...
18553,Retail,2023-07-01 02:00:00+00:00,1170,278669.36,432.716398,108
18554,Professional,2023-07-01 01:00:00+00:00,4263,12052488.42,3328.497216,161
18555,Retail,2023-07-01 01:00:00+00:00,1254,301856.49,417.505519,110
18556,Professional,2023-07-01 00:00:00+00:00,4124,11335892.35,3270.597908,176


In [256]:
trader_pivot_df = trader_classifier_df.pivot(
    index='day',
    columns='trader_type',
    values=['tx_count', 'total_volume_usd', 'avg_order_size_usd', 'unique_contracts']
)
trader_pivot_df.columns = [f'{col[0]}_{col[1]}' for col in trader_pivot_df.columns]
trader_pivot_df.describe()


Unnamed: 0,tx_count_Professional,tx_count_Retail,total_volume_usd_Professional,total_volume_usd_Retail,avg_order_size_usd_Professional,avg_order_size_usd_Retail,unique_contracts_Professional,unique_contracts_Retail
count,9280.0,9278.0,9280.0,9278.0,9280.0,9278.0,9280.0,9278.0
mean,4294.61056,1225.897176,10733240.0,408151.4,2798.526942,437.026216,177.43125,134.548071
std,3870.219867,706.629295,13150370.0,351692.3,1248.233905,230.56515,61.317602,37.795907
min,1.0,22.0,26.38,15569.71,26.38,34.247021,1.0,14.0
25%,1687.75,733.0,3762952.0,194714.5,1885.668535,267.275442,132.0,106.0
50%,3035.5,1032.0,7268443.0,320713.8,2578.328731,390.336434,167.0,130.0
75%,5587.25,1549.75,13147560.0,524499.5,3475.973406,557.089698,213.25,158.0
max,36615.0,10168.0,315693600.0,8755907.0,29912.725717,2448.575457,603.0,274.0


In [257]:
arbitrum_gas_df.describe()

Unnamed: 0,arbitrum_gas_usd_per_tx,arbitrum_median_gas_usd,arbitrum_gas_usd_per_tx_low_gas,arbitrum_gas_usd_per_tx_normal_gas,arbitrum_gas_usd_per_tx_high_gas,arbitrum_median_gas_usd_low_gas,arbitrum_median_gas_usd_normal_gas,arbitrum_median_gas_usd_high_gas
count,2162.0,2162.0,2162.0,2162.0,2162.0,2162.0,2162.0,2162.0
mean,0.023634,0.01286,0.250231,0.499537,0.250231,0.250231,0.499537,0.250231
std,0.22163,0.129315,0.433246,0.500115,0.433246,0.433246,0.500115,0.433246
min,0.004397,0.001491,0.0,0.0,0.0,0.0,0.0,0.0
25%,0.006888,0.004532,0.0,0.0,0.0,0.0,0.0,0.0
50%,0.008472,0.005564,0.0,0.0,0.0,0.0,0.0,0.0
75%,0.010704,0.007182,0.75,1.0,0.75,0.75,1.0,0.75
max,6.944713,4.18109,1.0,1.0,1.0,1.0,1.0,1.0


In [258]:
combined_trader_data = pd.merge(trader_pivot_df, arbitrum_gas_df, how='inner', on='day')


In [259]:
combined_trader_data.isna().sum().sum()

np.int64(0)

In [260]:
trader_classifier_tx_fig = make_subplots(specs=[[{"secondary_y": True}]])
    
trader_classifier_tx_fig.add_trace(
    go.Bar(
        x=combined_trader_data['day'],
        y=combined_trader_data['tx_count_Professional'],
        name='Professional Tx'
    ),
    secondary_y=False
)
trader_classifier_tx_fig.add_trace(
    go.Bar(
        x=combined_trader_data['day'],
        y=combined_trader_data['tx_count_Retail'],
        name='Retail Tx'
    ),
    secondary_y=False
)


trader_classifier_tx_fig.add_trace(
    go.Scatter(
        x=combined_trader_data['day'],
        y=combined_trader_data['arbitrum_median_gas_usd'],
        name='Median Gas',
        mode='lines'
    ),
    secondary_y=True
)

trader_classifier_tx_fig.add_trace(
    go.Scatter(
        x=combined_trader_data['day'],
        y=combined_trader_data['arbitrum_gas_usd_per_tx'],
        name='Avg Gas',
        mode='lines'
    ),
    secondary_y=True
)

trader_classifier_tx_fig.update_layout(
    title='Trader Type Tx to Gas',
    barmode='stack'  # Set the bar mode to either 'group' for side-by-side or 'stack' for stacked
)

trader_classifier_tx_fig.update_xaxes(title_text="Date")

trader_classifier_tx_fig.show()

In [261]:
trader_classifier_vol_fig = make_subplots(specs=[[{"secondary_y": True}]])
    
trader_classifier_vol_fig.add_trace(
    go.Bar(
        x=combined_trader_data['day'],
        y=combined_trader_data['total_volume_usd_Professional'],
        name='Professional Vol'
    ),
    secondary_y=False
)
trader_classifier_vol_fig.add_trace(
    go.Bar(
        x=combined_trader_data['day'],
        y=combined_trader_data['total_volume_usd_Retail'],
        name='Retail Vol'
    ),
    secondary_y=False
)


trader_classifier_vol_fig.add_trace(
    go.Scatter(
        x=combined_trader_data['day'],
        y=combined_trader_data['arbitrum_median_gas_usd'],
        name='Median Gas',
        mode='lines'
    ),
    secondary_y=True
)

trader_classifier_vol_fig.add_trace(
    go.Scatter(
        x=combined_trader_data['day'],
        y=combined_trader_data['arbitrum_gas_usd_per_tx'],
        name='Avg Gas',
        mode='lines'
    ),
    secondary_y=True
)

trader_classifier_vol_fig.update_layout(
    title='Trader Type Vol to Gas',
    barmode='stack'  # Set the bar mode to either 'group' for side-by-side or 'stack' for stacked
)

trader_classifier_vol_fig.update_xaxes(title_text="Date")

trader_classifier_vol_fig.show()

In [262]:
trader_classifier_order_fig = make_subplots(specs=[[{"secondary_y": True}]])
    
trader_classifier_order_fig.add_trace(
    go.Bar(
        x=combined_trader_data['day'],
        y=combined_trader_data['avg_order_size_usd_Professional'],
        name='Professional Avg Order Size'
    ),
    secondary_y=False
)
trader_classifier_order_fig.add_trace(
    go.Bar(
        x=combined_trader_data['day'],
        y=combined_trader_data['avg_order_size_usd_Retail'],
        name='Retail Avg Order Size'
    ),
    secondary_y=False
)


trader_classifier_order_fig.add_trace(
    go.Scatter(
        x=combined_trader_data['day'],
        y=combined_trader_data['arbitrum_median_gas_usd'],
        name='Median Gas',
        mode='lines'
    ),
    secondary_y=True
)

trader_classifier_order_fig.add_trace(
    go.Scatter(
        x=combined_trader_data['day'],
        y=combined_trader_data['arbitrum_gas_usd_per_tx'],
        name='Avg Gas',
        mode='lines'
    ),
    secondary_y=True
)

trader_classifier_order_fig.update_layout(
    title='Trader Type Avg Order Size to Gas',
    barmode='stack'  # Set the bar mode to either 'group' for side-by-side or 'stack' for stacked
)

trader_classifier_order_fig.update_xaxes(title_text="Date")

trader_classifier_order_fig.show()

In [263]:
trader_classifier_contracts_fig = make_subplots(specs=[[{"secondary_y": True}]])
    
trader_classifier_contracts_fig.add_trace(
    go.Bar(
        x=combined_trader_data['day'],
        y=combined_trader_data['unique_contracts_Professional'],
        name='Professional # Unique Contracts'
    ),
    secondary_y=False
)
trader_classifier_contracts_fig.add_trace(
    go.Bar(
        x=combined_trader_data['day'],
        y=combined_trader_data['unique_contracts_Retail'],
        name='Retail # Unique Contracts'
    ),
    secondary_y=False
)


trader_classifier_contracts_fig.add_trace(
    go.Scatter(
        x=combined_trader_data['day'],
        y=combined_trader_data['arbitrum_median_gas_usd'],
        name='Median Gas',
        mode='lines'
    ),
    secondary_y=True
)

trader_classifier_contracts_fig.add_trace(
    go.Scatter(
        x=combined_trader_data['day'],
        y=combined_trader_data['arbitrum_gas_usd_per_tx'],
        name='Avg Gas',
        mode='lines'
    ),
    secondary_y=True
)

trader_classifier_contracts_fig.update_layout(
    title='Trader Type # of Contract Interactions to Gas',
    barmode='stack'  # Set the bar mode to either 'group' for side-by-side or 'stack' for stacked
)

trader_classifier_contracts_fig.update_xaxes(title_text="Date")

trader_classifier_contracts_fig.show()

# Gas Fee Correlation Analyses

## Pearson Correlation

### Arbitrum

#### Aggregate Correlations

In [264]:
aggregated_arb_hour.columns

Index(['day', 'arbitrum_fees_usd', 'arbitrum_tvl_usd', 'arbitrum_volume_usd',
       'arbitrum_num_trades', 'arbitrum_gas_usd_per_tx',
       'arbitrum_median_gas_usd', 'arbitrum_gas_usd_per_tx_low_gas',
       'arbitrum_gas_usd_per_tx_normal_gas',
       'arbitrum_gas_usd_per_tx_high_gas', 'arbitrum_median_gas_usd_low_gas',
       'arbitrum_median_gas_usd_normal_gas',
       'arbitrum_median_gas_usd_high_gas', 'arbitrum_volume_to_tvl'],
      dtype='object')

##### Avg Gas

In [265]:
aggregated_correlations = aggregated_arb_hour.corr()
avg_gas_correlations = aggregated_correlations['arbitrum_gas_usd_per_tx'].sort_values(ascending=False)
for index, value in avg_gas_correlations.items():
    print(f"{index:50} {value}")

arbitrum_gas_usd_per_tx                            1.0
arbitrum_median_gas_usd                            0.9219147473986707
arbitrum_gas_usd_per_tx_high_gas                   0.5117455965240303
arbitrum_median_gas_usd_high_gas                   0.47593217534905663
arbitrum_volume_usd                                0.4242897969256834
arbitrum_fees_usd                                  0.41528832482832984
arbitrum_volume_to_tvl                             0.40799098455068333
arbitrum_num_trades                                0.37050902566466637
arbitrum_tvl_usd                                   0.26224181865538926
day                                                0.09103870271185989
arbitrum_median_gas_usd_low_gas                    -0.15956977053706986
arbitrum_gas_usd_per_tx_low_gas                    -0.17660963839556185
arbitrum_median_gas_usd_normal_gas                 -0.3531063510394347
arbitrum_gas_usd_per_tx_normal_gas                 -0.3726402781583098


##### Median Gas

In [266]:
aggregated_correlations = aggregated_arb_hour.corr()
avg_gas_correlations = aggregated_correlations['arbitrum_median_gas_usd'].sort_values(ascending=False)
for index, value in avg_gas_correlations.items():
    print(f"{index:50} {value}")

arbitrum_median_gas_usd                            1.0
arbitrum_gas_usd_per_tx                            0.9219147473986707
arbitrum_median_gas_usd_high_gas                   0.6039378924265673
arbitrum_gas_usd_per_tx_high_gas                   0.5727797482053371
arbitrum_volume_usd                                0.49951630776691286
arbitrum_volume_to_tvl                             0.48199175789463616
arbitrum_fees_usd                                  0.4768814541769718
arbitrum_num_trades                                0.44492753979708344
arbitrum_tvl_usd                                   0.2713983969351232
day                                                0.05780970281930259
arbitrum_gas_usd_per_tx_low_gas                    -0.2259354202010674
arbitrum_median_gas_usd_low_gas                    -0.27523878460518936
arbitrum_gas_usd_per_tx_normal_gas                 -0.40015069433550965
arbitrum_median_gas_usd_normal_gas                 -0.4056149516315369


#### Volatile-Volatile Pair

In [267]:
numeric_weth_zro = weth_zro.select_dtypes(include=[np.number])
numeric_weth_zro.columns

Index(['arbitrum_avg_liquidity_0.30', 'arbitrum_fee_apr_0.30',
       'arbitrum_fees_usd_0.30', 'arbitrum_num_trades_0.30',
       'arbitrum_tvl_usd_0.30', 'arbitrum_volume_to_tvl_0.30',
       'arbitrum_volume_usd_0.30', 'arbitrum_net_liquidity_0.30',
       'arbitrum_liquidity_added_0.30', 'arbitrum_liquidity_removed_0.30',
       'arbitrum_avg_liquidity_1.00', 'arbitrum_fee_apr_1.00',
       'arbitrum_fees_usd_1.00', 'arbitrum_num_trades_1.00',
       'arbitrum_tvl_usd_1.00', 'arbitrum_volume_to_tvl_1.00',
       'arbitrum_volume_usd_1.00', 'arbitrum_net_liquidity_1.00',
       'arbitrum_liquidity_added_1.00', 'arbitrum_liquidity_removed_1.00',
       'arbitrum_gas_usd_per_tx', 'arbitrum_median_gas_usd',
       'arbitrum_gas_usd_per_tx_low_gas', 'arbitrum_gas_usd_per_tx_normal_gas',
       'arbitrum_gas_usd_per_tx_high_gas', 'arbitrum_median_gas_usd_low_gas',
       'arbitrum_median_gas_usd_normal_gas',
       'arbitrum_median_gas_usd_high_gas'],
      dtype='object')

##### Avg Gas

In [268]:
volatile_volatile_correlations = numeric_weth_zro.corr()
avg_gas_vol_vol_correlations = volatile_volatile_correlations['arbitrum_gas_usd_per_tx'].sort_values(ascending=False)
for index, value in avg_gas_vol_vol_correlations.items():
    print(f"{index:50} {value}")

arbitrum_gas_usd_per_tx                            1.0
arbitrum_median_gas_usd                            0.9219147473986707
arbitrum_gas_usd_per_tx_high_gas                   0.5117455965240303
arbitrum_median_gas_usd_high_gas                   0.47593217534905663
arbitrum_liquidity_added_0.30                      0.3179650115897875
arbitrum_avg_liquidity_0.30                        0.25354986008866914
arbitrum_net_liquidity_0.30                        0.21042355710465704
arbitrum_liquidity_removed_1.00                    0.19308819929507967
arbitrum_liquidity_added_1.00                      0.1039651831920677
arbitrum_tvl_usd_0.30                              0.017530381349635242
arbitrum_liquidity_removed_0.30                    -0.04385077750067204
arbitrum_fee_apr_0.30                              -0.051862560348301154
arbitrum_volume_to_tvl_0.30                        -0.05186256034831916
arbitrum_fees_usd_0.30                             -0.05353216728653922
arbitrum_volume_usd_

##### Median Gas

In [269]:
volatile_volatile_correlations = numeric_weth_zro.corr()
avg_gas_vol_vol_correlations = volatile_volatile_correlations['arbitrum_median_gas_usd'].sort_values(ascending=False)
for index, value in avg_gas_vol_vol_correlations.items():
    print(f"{index:50} {value}")

arbitrum_median_gas_usd                            1.0
arbitrum_gas_usd_per_tx                            0.9219147473986707
arbitrum_median_gas_usd_high_gas                   0.6039378924265673
arbitrum_gas_usd_per_tx_high_gas                   0.5727797482053371
arbitrum_liquidity_added_0.30                      0.3628984503119458
arbitrum_avg_liquidity_0.30                        0.3391716995209209
arbitrum_liquidity_removed_1.00                    0.2327869341425121
arbitrum_net_liquidity_0.30                        0.2115459602817172
arbitrum_liquidity_added_1.00                      0.07470707599296915
arbitrum_liquidity_removed_0.30                    -0.011199381448984467
arbitrum_avg_liquidity_1.00                        -0.02553279377052578
arbitrum_tvl_usd_0.30                              -0.030126927728474442
arbitrum_fee_apr_0.30                              -0.0965406755714302
arbitrum_volume_to_tvl_0.30                        -0.09654067557144701
arbitrum_fees_usd_0.30 

#### Volatile-Stable Pair

In [270]:
numeric_usdc_weth = usdc_weth.select_dtypes(include=[np.number])
numeric_usdc_weth.columns

Index(['arbitrum_avg_liquidity_0.01', 'arbitrum_fee_apr_0.01',
       'arbitrum_fees_usd_0.01', 'arbitrum_num_trades_0.01',
       'arbitrum_tvl_usd_0.01', 'arbitrum_volume_to_tvl_0.01',
       'arbitrum_volume_usd_0.01', 'arbitrum_net_liquidity_0.01',
       'arbitrum_liquidity_added_0.01', 'arbitrum_liquidity_removed_0.01',
       'arbitrum_avg_liquidity_0.05', 'arbitrum_fee_apr_0.05',
       'arbitrum_fees_usd_0.05', 'arbitrum_num_trades_0.05',
       'arbitrum_tvl_usd_0.05', 'arbitrum_volume_to_tvl_0.05',
       'arbitrum_volume_usd_0.05', 'arbitrum_net_liquidity_0.05',
       'arbitrum_liquidity_added_0.05', 'arbitrum_liquidity_removed_0.05',
       'arbitrum_avg_liquidity_0.30', 'arbitrum_fee_apr_0.30',
       'arbitrum_fees_usd_0.30', 'arbitrum_num_trades_0.30',
       'arbitrum_tvl_usd_0.30', 'arbitrum_volume_to_tvl_0.30',
       'arbitrum_volume_usd_0.30', 'arbitrum_net_liquidity_0.30',
       'arbitrum_liquidity_added_0.30', 'arbitrum_liquidity_removed_0.30',
       'arbitrum

##### Avg Gas

In [271]:
volatile_stable_correlations = numeric_usdc_weth.corr()
avg_gas_vol_stb_correlations = volatile_stable_correlations['arbitrum_gas_usd_per_tx'].sort_values(ascending=False)
for index, value in avg_gas_vol_stb_correlations.items():
    print(f"{index:50} {value}")

arbitrum_gas_usd_per_tx                            1.0
arbitrum_median_gas_usd                            0.9219147473986707
arbitrum_gas_usd_per_tx_high_gas                   0.5117455965240303
arbitrum_median_gas_usd_high_gas                   0.47593217534905663
arbitrum_volume_usd_1.00                           0.4700260513977799
arbitrum_fees_usd_1.00                             0.4700260513977798
arbitrum_fee_apr_1.00                              0.45792402761956075
arbitrum_volume_to_tvl_1.00                        0.4579240276195266
arbitrum_fees_usd_0.05                             0.4409119569836468
arbitrum_volume_usd_0.05                           0.44091195698364677
arbitrum_volume_to_tvl_0.05                        0.4403496460484363
arbitrum_fee_apr_0.05                              0.44034964604820875
arbitrum_num_trades_0.05                           0.38073707616052516
arbitrum_num_trades_1.00                           0.35894404279143305
arbitrum_fee_apr_0.30        

##### Median Gas

In [272]:
volatile_stable_correlations = numeric_usdc_weth.corr()
avg_gas_vol_stb_correlations = volatile_stable_correlations['arbitrum_median_gas_usd'].sort_values(ascending=False)
for index, value in avg_gas_vol_stb_correlations.items():
    print(f"{index:50} {value}")

arbitrum_median_gas_usd                            1.0
arbitrum_gas_usd_per_tx                            0.9219147473986707
arbitrum_median_gas_usd_high_gas                   0.6039378924265673
arbitrum_gas_usd_per_tx_high_gas                   0.5727797482053371
arbitrum_volume_usd_0.05                           0.5214916410794086
arbitrum_fees_usd_0.05                             0.5214916410794082
arbitrum_volume_to_tvl_0.05                        0.5213532090575917
arbitrum_fee_apr_0.05                              0.5213532090574425
arbitrum_fees_usd_1.00                             0.5041360220302444
arbitrum_volume_usd_1.00                           0.5041360220302443
arbitrum_fee_apr_1.00                              0.4878219027281941
arbitrum_volume_to_tvl_1.00                        0.48782190272817083
arbitrum_num_trades_0.05                           0.4620831874842367
arbitrum_num_trades_1.00                           0.38330827055972777
arbitrum_fee_apr_0.30            

#### Stable-Stable Pair

In [273]:
numeric_dai_usdc = dai_usdc.select_dtypes(include=[np.number])
numeric_dai_usdc.columns

Index(['arbitrum_avg_liquidity_0.01', 'arbitrum_fee_apr_0.01',
       'arbitrum_fees_usd_0.01', 'arbitrum_num_trades_0.01',
       'arbitrum_tvl_usd_0.01', 'arbitrum_volume_to_tvl_0.01',
       'arbitrum_volume_usd_0.01', 'arbitrum_net_liquidity_0.01',
       'arbitrum_liquidity_added_0.01', 'arbitrum_liquidity_removed_0.01',
       'arbitrum_avg_liquidity_0.05', 'arbitrum_fee_apr_0.05',
       'arbitrum_fees_usd_0.05', 'arbitrum_num_trades_0.05',
       'arbitrum_tvl_usd_0.05', 'arbitrum_volume_to_tvl_0.05',
       'arbitrum_volume_usd_0.05', 'arbitrum_net_liquidity_0.05',
       'arbitrum_liquidity_added_0.05', 'arbitrum_liquidity_removed_0.05',
       'arbitrum_avg_liquidity_0.30', 'arbitrum_fee_apr_0.30',
       'arbitrum_fees_usd_0.30', 'arbitrum_num_trades_0.30',
       'arbitrum_tvl_usd_0.30', 'arbitrum_volume_to_tvl_0.30',
       'arbitrum_volume_usd_0.30', 'arbitrum_net_liquidity_0.30',
       'arbitrum_liquidity_added_0.30', 'arbitrum_liquidity_removed_0.30',
       'arbitrum

##### Avg Gas

In [274]:
stable_stable_correlations = numeric_dai_usdc.corr()
avg_gas_stb_stb_correlations = stable_stable_correlations['arbitrum_gas_usd_per_tx'].sort_values(ascending=False)
for index, value in avg_gas_stb_stb_correlations.items():
    print(f"{index:50} {value}")

arbitrum_gas_usd_per_tx                            1.0
arbitrum_median_gas_usd                            0.9219147473986707
arbitrum_gas_usd_per_tx_high_gas                   0.5117455965240303
arbitrum_median_gas_usd_high_gas                   0.47593217534905663
arbitrum_avg_liquidity_0.01                        0.12730769752960172
arbitrum_tvl_usd_0.01                              0.12702007492519254
arbitrum_num_trades_0.01                           0.1097571037535162
arbitrum_liquidity_added_0.01                      0.10551649328279306
arbitrum_net_liquidity_0.01                        0.0966246441697817
arbitrum_avg_liquidity_0.05                        0.09132628086178862
arbitrum_tvl_usd_0.05                              0.08729345948838327
arbitrum_liquidity_added_0.05                      0.07573248355487032
arbitrum_fee_apr_0.05                              0.04273233854987819
arbitrum_volume_to_tvl_0.05                        0.04273233854973981
arbitrum_volume_usd_0.05  

##### Median Gas

In [275]:
stable_stable_correlations = numeric_dai_usdc.corr()
avg_gas_stb_stb_correlations = stable_stable_correlations['arbitrum_median_gas_usd'].sort_values(ascending=False)
for index, value in avg_gas_stb_stb_correlations.items():
    print(f"{index:50} {value}")

arbitrum_median_gas_usd                            1.0
arbitrum_gas_usd_per_tx                            0.9219147473986707
arbitrum_median_gas_usd_high_gas                   0.6039378924265673
arbitrum_gas_usd_per_tx_high_gas                   0.5727797482053371
arbitrum_avg_liquidity_0.01                        0.16461007302190472
arbitrum_liquidity_added_0.01                      0.11790146491693729
arbitrum_net_liquidity_0.01                        0.11581642662334317
arbitrum_tvl_usd_0.01                              0.11161078630659658
arbitrum_num_trades_0.01                           0.09823470599977475
arbitrum_avg_liquidity_0.05                        0.08183807364520998
arbitrum_tvl_usd_0.05                              0.07342033318636268
arbitrum_liquidity_added_0.05                      0.03156681361528122
arbitrum_net_liquidity_0.05                        0.01689660681676992
arbitrum_volume_usd_0.05                           0.009952323851284528
arbitrum_fees_usd_0.05  

### Aggregated Across Networks

#### Volatile-Volatile Pair

##### Avg Gas

In [276]:
agg_vol_vol_correlations = agg_vol_vol.corr()
agg_avg_gas_vol_vol_correlations = agg_vol_vol_correlations['avg_gas'].sort_values(ascending=False)
for index, value in agg_avg_gas_vol_vol_correlations.items():
    print(f"{index:50} {value}")

avg_gas                                            1.0
median_gas                                         0.7556881810808027
median_gas_normal_gas                              0.6554577702698278
day                                                0.6349357401388742
avg_gas_high_gas                                   0.5342374259462243
fees_usd                                           0.4641661414650002
avg_gas_normal_gas                                 0.4529383062145439
num_trades                                         0.24758714924538514
volume_usd                                         0.23513111586108928
tvl_usd                                            0.19710522465586838
median_gas_high_gas                                0.17269786009112764
fee_apr                                            0.1388552795661063
volume_to_tvl                                      -0.008537467197076324
median_gas_low_gas                                 -0.7673731083105018
avg_gas_low_gas            

In [277]:
agg_avg_gas_low_vol_vol_correlations = agg_vol_vol_correlations['avg_gas_low_gas'].sort_values(ascending=False)
for index, value in agg_avg_gas_low_vol_vol_correlations.items():
    print(f"{index:50} {value}")

avg_gas_low_gas                                    1.0
median_gas_low_gas                                 0.9706326079286993
volume_to_tvl                                      0.029620465965998963
median_gas_high_gas                                -0.13333333333333316
fee_apr                                            -0.14564345792830918
avg_gas_high_gas                                   -0.1578152729534074
volume_usd                                         -0.22773291918386307
tvl_usd                                            -0.25597036233449016
num_trades                                         -0.2808244658473438
fees_usd                                           -0.5080925064426968
day                                                -0.7269305274518
avg_gas                                            -0.7804281466643787
avg_gas_normal_gas                                 -0.8560707580902075
median_gas_normal_gas                              -0.8675985665774656
median_gas           

##### Median Gas

In [278]:
agg_vol_vol_correlations = agg_vol_vol.corr()
agg_avg_gas_vol_vol_correlations = agg_vol_vol_correlations['median_gas'].sort_values(ascending=False)
for index, value in agg_avg_gas_vol_vol_correlations.items():
    print(f"{index:50} {value}")

median_gas                                         1.0
avg_gas                                            0.7556881810808027
day                                                0.7088782070530033
avg_gas_normal_gas                                 0.6835475602025434
median_gas_normal_gas                              0.6741654106229642
fees_usd                                           0.6713923646322593
num_trades                                         0.47421910479145085
volume_usd                                         0.42396820245968536
median_gas_high_gas                                0.4064664536479723
tvl_usd                                            0.3813483069075064
avg_gas_high_gas                                   0.26731261648965965
fee_apr                                            0.17192325674652204
volume_to_tvl                                      0.02417820954193431
avg_gas_low_gas                                    -0.877262678290724
median_gas_low_gas            

#### Volatile-Stable Pair

##### Avg Gas

In [279]:
agg_vol_stable_correlations = agg_vol_stable.corr()
agg_avg_gas_vol_stb_correlations = agg_vol_stable_correlations['avg_gas'].sort_values(ascending=False)
for index, value in agg_avg_gas_vol_stb_correlations.items():
    print(f"{index:50} {value}")

avg_gas                                            1.0
median_gas                                         0.7556881810808027
median_gas_normal_gas                              0.6554577702698278
day                                                0.6349357401388742
avg_gas_high_gas                                   0.5342374259462243
avg_gas_normal_gas                                 0.4529383062145439
median_gas_high_gas                                0.17269786009112764
volume_to_tvl                                      0.14433603447796456
fees_usd                                           0.13681852676242903
num_trades                                         0.11477085153311657
volume_usd                                         0.10076710631015846
fee_apr                                            0.08825889001878766
tvl_usd                                            -0.2577297595719947
median_gas_low_gas                                 -0.7673731083105018
avg_gas_low_gas            

##### Median Gas

In [280]:
agg_vol_stable_correlations = agg_vol_stable.corr()
agg_avg_gas_vol_stb_correlations = agg_vol_stable_correlations['median_gas'].sort_values(ascending=False)
for index, value in agg_avg_gas_vol_stb_correlations.items():
    print(f"{index:50} {value}")

median_gas                                         1.0
avg_gas                                            0.7556881810808027
day                                                0.7088782070530033
avg_gas_normal_gas                                 0.6835475602025434
median_gas_normal_gas                              0.6741654106229642
median_gas_high_gas                                0.4064664536479723
volume_to_tvl                                      0.3038295895141003
num_trades                                         0.2840103450529939
fees_usd                                           0.27462290903342956
avg_gas_high_gas                                   0.26731261648965965
volume_usd                                         0.2596204240464476
fee_apr                                            0.1326250830489416
tvl_usd                                            -0.2154830528359862
avg_gas_low_gas                                    -0.877262678290724
median_gas_low_gas              

#### Stable-Stable Pair

##### Avg Gas

In [281]:
agg_stable_stable_correlations = agg_stable_stable.corr()
agg_avg_gas_stb_stb_correlations = agg_stable_stable_correlations['avg_gas'].sort_values(ascending=False)
for index, value in agg_avg_gas_stb_stb_correlations.items():
    print(f"{index:50} {value}")

avg_gas                                            1.0
median_gas                                         0.7556881810808027
median_gas_normal_gas                              0.6554577702698278
day                                                0.6349357401388742
avg_gas_high_gas                                   0.5342374259462243
avg_gas_normal_gas                                 0.4529383062145439
volume_usd                                         0.2348138854250011
fees_usd                                           0.23461802966768375
tvl_usd                                            0.19109866207681434
median_gas_high_gas                                0.17269786009112764
volume_to_tvl                                      0.10929782283829688
num_trades                                         0.07397000720382309
fee_apr                                            -0.02254151114657157
median_gas_low_gas                                 -0.7673731083105018
avg_gas_low_gas            

##### Median Gas

In [282]:
agg_stable_stable_correlations = agg_stable_stable.corr()
agg_avg_gas_stb_stb_correlations = agg_stable_stable_correlations['median_gas'].sort_values(ascending=False)
for index, value in agg_avg_gas_stb_stb_correlations.items():
    print(f"{index:50} {value}")

median_gas                                         1.0
avg_gas                                            0.7556881810808027
day                                                0.7088782070530033
avg_gas_normal_gas                                 0.6835475602025434
median_gas_normal_gas                              0.6741654106229642
median_gas_high_gas                                0.4064664536479723
avg_gas_high_gas                                   0.26731261648965965
tvl_usd                                            0.2340635146259494
fees_usd                                           0.21940140535631358
volume_usd                                         0.21937800308445796
num_trades                                         0.16483781533510958
volume_to_tvl                                      0.08789957241293926
fee_apr                                            -0.046386271126893294
avg_gas_low_gas                                    -0.877262678290724
median_gas_low_gas         

### Trader Classifier

#### Avg Gas

In [283]:
agg_trader_correlations = combined_trader_data.corr()
agg_avg_gas_trader_correlations = agg_trader_correlations['arbitrum_gas_usd_per_tx'].sort_values(ascending=False)
for index, value in agg_avg_gas_trader_correlations.items():
    print(f"{index:50} {value}")

arbitrum_gas_usd_per_tx                            1.0
arbitrum_median_gas_usd                            0.979236564615026
avg_order_size_usd_Professional                    0.310516802297874
avg_order_size_usd_Retail                          0.26060108805056065
total_volume_usd_Retail                            0.1921841901445734
tx_count_Retail                                    0.14835145999790766
total_volume_usd_Professional                      0.12720543393156963
arbitrum_gas_usd_per_tx_high_gas                   0.12438140734562872
arbitrum_median_gas_usd_high_gas                   0.1223934244317744
tx_count_Professional                              0.044145634621361146
day                                                0.030522173314481066
unique_contracts_Professional                      -0.031806421361717775
arbitrum_median_gas_usd_low_gas                    -0.045271846958974966
arbitrum_gas_usd_per_tx_low_gas                    -0.04604271790497039
arbitrum_median_gas_u

#### Median Gas

In [284]:
agg_trader_correlations = combined_trader_data.corr()
agg_avg_gas_trader_correlations = agg_trader_correlations['arbitrum_median_gas_usd'].sort_values(ascending=False)
for index, value in agg_avg_gas_trader_correlations.items():
    print(f"{index:50} {value}")

arbitrum_median_gas_usd                            1.0
arbitrum_gas_usd_per_tx                            0.979236564615026
avg_order_size_usd_Professional                    0.2563694480545029
avg_order_size_usd_Retail                          0.21403421781193524
total_volume_usd_Retail                            0.15487955252868985
tx_count_Retail                                    0.1300906385398528
arbitrum_median_gas_usd_high_gas                   0.10351285297529192
arbitrum_gas_usd_per_tx_high_gas                   0.10216120195088553
total_volume_usd_Professional                      0.06123288731210473
day                                                0.02557411266112926
tx_count_Professional                              0.004095209303444803
arbitrum_gas_usd_per_tx_low_gas                    -0.03839778770490451
arbitrum_median_gas_usd_low_gas                    -0.03928770384666949
arbitrum_gas_usd_per_tx_normal_gas                 -0.055237777376914404
arbitrum_median_gas_u

# Sim Target Variable Correlation Analyses
- Want to look at the potential to simulate individual vaults; potentially move on to aggregated metrics

## Volume Target

### Arbitrum Aggregated

aggregated_arb_hour.columns

combined_trader_data.columns

prices_vol_df_pivot.columns

gas_cols = ['arbitrum_gas_usd_per_tx',
       'arbitrum_median_gas_usd','arbitrum_median_gas_usd_low_gas','arbitrum_median_gas_usd_normal_gas',
       'arbitrum_median_gas_usd_high_gas', 'arbitrum_gas_usd_per_tx_low_gas','arbitrum_gas_usd_per_tx_normal_gas',
       'arbitrum_gas_usd_per_tx_high_gas']

# Combine pool data with trader data
pool_trader_arb_data = pd.merge(aggregated_arb_hour.drop(columns=gas_cols), combined_trader_data, how='inner', on='day')
pool_trader_arb_data = pool_trader_arb_data.merge(prices_vol_df_pivot, how='left', on='day')

aggregated_arb_hour[['arbitrum_gas_usd_per_tx_low_gas',
       'arbitrum_gas_usd_per_tx_normal_gas',
       'arbitrum_gas_usd_per_tx_high_gas', 'arbitrum_median_gas_usd_low_gas',
       'arbitrum_median_gas_usd_normal_gas',
       'arbitrum_median_gas_usd_high_gas']]

correlation_volume = pool_trader_arb_data.corr()['arbitrum_volume_usd'].sort_values(ascending=False)
print("Correlation with Volume:")
print(correlation_volume)

### All Network Aggregated

#### Volatile-Volatile Pair

agg_vol_vol = agg_vol_vol.merge(combined_trader_data[['day','avg_order_size_usd_Professional','avg_order_size_usd_Retail','tx_count_Professional', 'tx_count_Retail']], how='inner', on='day')
agg_vol_vol = agg_vol_vol.merge(prices_vol_df_pivot, on='day', how='inner')

agg_vol_vol.columns

agg_vol_vol_correlation_volume = agg_vol_vol.corr()['volume_usd'].sort_values(ascending=False)
print("Correlation with Volume:")
print(agg_vol_vol_correlation_volume)

#### Volatile-Stable Pair

agg_vol_stable = agg_vol_stable.merge(combined_trader_data[['day','avg_order_size_usd_Professional','avg_order_size_usd_Retail','tx_count_Professional', 'tx_count_Retail']], how='inner', on='day')
agg_vol_stable = agg_vol_stable.merge(prices_vol_df_pivot, on='day', how='inner')

agg_vol_stable_correlation_volume = agg_vol_stable.corr()['volume_usd'].sort_values(ascending=False)
print("Correlation with Volume:")
print(agg_vol_stable_correlation_volume)

#### Stable-Stable Pair

agg_stable_stable = agg_stable_stable.merge(combined_trader_data[['day','avg_order_size_usd_Professional','avg_order_size_usd_Retail','tx_count_Professional', 'tx_count_Retail']], how='inner', on='day')
agg_stable_stable = agg_stable_stable.merge(prices_vol_df_pivot, on='day', how='inner')

agg_stable_stable_correlation_volume = agg_stable_stable.corr()['volume_usd'].sort_values(ascending=False)
print("Correlation with Volume:")
print(agg_stable_stable_correlation_volume)

## Fee Target

### Arbitrum Aggregated 

correlation_fees  = pool_trader_arb_data.corr()['arbitrum_fees_usd'].sort_values(ascending=False)
print("Correlation with Fees:")
print(correlation_fees)

### All Networks Aggregated

#### Volatile-Volatile Pair

agg_vol_vol_correlation_fees = agg_vol_vol.corr()['fees_usd'].sort_values(ascending=False)
print("Correlation with Volume:")
print(agg_vol_vol_correlation_fees)

#### Volatile-Stable Pair

agg_vol_stable_correlation_fees = agg_vol_stable.corr()['fees_usd'].sort_values(ascending=False)
print("Correlation with Fees:")
print(agg_vol_stable_correlation_fees)

#### Stable-Stable Pair

agg_stable_stable_correlation_fees = agg_stable_stable.corr()['fees_usd'].sort_values(ascending=False)
print("Correlation with fees:")
print(agg_stable_stable_correlation_fees)

# ML Models

- For simulation, need to use non-dependent variables for x
- From inital correlation analyses on volume, fees, can use:
    - unique_contracts
    - avg_order_size
    - BTC, ETH prices
    - Avg, Median Gas price
    - Potentially tx count

## Linear Regression

### Individual Pairs

#### Volatile-Volatile Pair

Strongest Correlations (>20):

arbitrum_avg_liquidity_030                         0.25354986008866914
arbitrum_net_liquidity_030                         0.21042355710465704

avg_order_size_usd_Professional                    0.310516802297874
avg_order_size_usd_Retail                          0.26060108805056065

# We will try volume, fees as y/target variable

weth_zro.columns

combined_trader_data.columns

vol_vol_ml = weth_zro[['day','arbitrum_avg_liquidity_030','arbitrum_net_liquidity_030','arbitrum_gas_usd_per_tx','arbitrum_median_gas_usd','arbitrum_volume_usd_030','arbitrum_num_trades_030']].copy()
vol_vol_ml = vol_vol_ml.merge(combined_trader_data[['day','avg_order_size_usd_Professional','avg_order_size_usd_Retail','tx_count_Professional', 'tx_count_Retail']], how='inner', on='day')
vol_vol_ml

vol_vol_ml.set_index('day', inplace=True)

X = vol_vol_ml.drop(columns=['arbitrum_volume_usd_030'])  # Features
y = vol_vol_ml['arbitrum_volume_usd_030']  # Target variable

# Split data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train model
model = LinearRegression()
model.fit(X_train, y_train)

# Predictions
y_pred = model.predict(X_test)

# Evaluation
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print(f'Mean Squared Error: {mse}')
print(f'R-squared: {r2}')

### Arbitrum Aggregated

### All Networks Aggregated 

#### Volatile-Volatile Pair

agg_vol_vol.columns

vol_vol_ml = weth_zro[['day','arbitrum_avg_liquidity_030','arbitrum_net_liquidity_030','arbitrum_gas_usd_per_tx','arbitrum_median_gas_usd','arbitrum_volume_usd_030','arbitrum_num_trades_030']].copy()
vol_vol_ml = vol_vol_ml.merge(combined_trader_data[['day','avg_order_size_usd_Professional','avg_order_size_usd_Retail','tx_count_Professional', 'tx_count_Retail']], how='inner', on='day')
vol_vol_ml

Volume Target Correlation with Volume:
volume_usd                         1.000000
num_trades                         0.882988
tx_count_Professional              0.842997
avg_order_size_usd_Professional    0.741148
tvl_usd                            0.673111
fees_usd                           0.595892
median_gas_high_gas                0.464012
avg_order_size_usd_Retail          0.431865
median_gas                         0.423968
tx_count_Retail                    0.262624
avg_gas                            0.235131
ETH_Price                          0.215284
avg_gas_high_gas                   0.162728
BTC_Price                          0.148784
avg_gas_normal_gas                 0.128592
ETH_Price_pct_change               0.073444
arbitrum_vol_ex_uni_pct_change     0.073299
day                                0.038370
BTC_Price_pct_change               0.037381
fee_apr                            0.037227
arbitrum_vol_ex_uni                0.022481
median_gas_normal_gas              0.008413
volume_to_tvl                     -0.027635
avg_gas_low_gas                   -0.227733
median_gas_low_gas                -0.228500
Name: volume_usd, dtype: float64


Fee Target Correlation with Volume:
fees_usd                           1.000000
num_trades                         0.678453
median_gas                         0.671392
volume_usd                         0.595892
tvl_usd                            0.572199
avg_gas                            0.464166
BTC_Price                          0.463283
ETH_Price                          0.462361
median_gas_high_gas                0.432432
avg_gas_normal_gas                 0.416532
tx_count_Retail                    0.386533
tx_count_Professional              0.364055
avg_order_size_usd_Professional    0.358951
median_gas_normal_gas              0.303175
avg_gas_high_gas                   0.115397
ETH_Price_pct_change               0.064218
BTC_Price_pct_change               0.052825
fee_apr                            0.034760
arbitrum_vol_ex_uni_pct_change     0.024686
avg_order_size_usd_Retail         -0.014818
volume_to_tvl                     -0.062159
arbitrum_vol_ex_uni               -0.149911
avg_gas_low_gas                   -0.508093
median_gas_low_gas                -0.521866
Name: fees_usd, dtype: float64


vol_vol_low_corr_cols_volume = ['ETH_Price','avg_gas_high_gas','BTC_Price','avg_gas_normal_gas','ETH_Price_pct_change','arbitrum_vol_ex_uni_pct_change',
                 'BTC_Price_pct_change','fee_apr','arbitrum_vol_ex_uni','median_gas_normal_gas','volume_to_tvl','avg_gas_low_gas',
                 'median_gas_low_gas']

vol_vol_low_corr_cols_fees = ['median_gas_normal_gas','avg_gas_high_gas','ETH_Price_pct_change','BTC_Price_pct_change','fee_apr','arbitrum_vol_ex_uni_pct_change','avg_order_size_usd_Retail','volume_to_tvl','arbitrum_vol_ex_uni','avg_gas_low_gas','median_gas_low_gas']

agg_vol_target = ['volume_usd']
agg_fee_target = ['fees_usd']

agg_vol_vol.set_index('day', inplace=True)



X = agg_vol_vol.drop(columns=agg_vol_target + vol_vol_low_corr_cols_volume)  # Features
y = agg_vol_vol['volume_usd']  # Target variable

# Split data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train model
model = Ridge(50)
model.fit(X_train, y_train)

# Predictions
y_pred = model.predict(X_test)

# Evaluation
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print(f'Mean Squared Error: {mse}')
print(f'R-squared: {r2}')

##### Linear Regression Time Seres CV

###### All Features as X

X = agg_vol_vol.drop(columns=['volume_usd'])  # Features
y = agg_vol_vol['volume_usd']  # Target variable

# Initialize TimeSeriesSplit with 5 splits
tscv = TimeSeriesSplit(n_splits=5)

mse_list = []
mae_list = []
r2_list = []

# Perform time series cross-validation
for fold, (train_index, test_index) in enumerate(tscv.split(X), 1):
    X_train, X_test = X.iloc[train_index], X.iloc[test_index]
    y_train, y_test = y.iloc[train_index], y.iloc[test_index]

    # Train model
    model = LinearRegression()
    model.fit(X_train, y_train)

    # Predictions
    y_pred = model.predict(X_test)

    # Evaluation
    mse = mean_squared_error(y_test, y_pred)
    # mae = mean_absolute_error(y_test, y_pred)
    r2 = r2_score(y_test, y_pred)

    mse_list.append(mse)
    # mae_list.append(mae)
    r2_list.append(r2)

    print(f'Fold {fold}:')
    print(f'Mean Squared Error: {mse}')
    # print(f'Mean Absolute Error: {mae}')
    print(f'R-squared: {r2}\n')

# Average scores
avg_mse = np.mean(mse_list)
# avg_mae = np.mean(mae_list)
avg_r2 = np.mean(r2_list)

print(f'Average Mean Squared Error: {avg_mse}')
# print(f'Average Mean Absolute Error: {avg_mae}')
print(f'Average R-squared: {avg_r2}')

###### Refined Feature Selection

#Volume Target

X = agg_vol_vol.drop(columns=agg_vol_target + vol_vol_low_corr_cols_volume)  # Features
y = agg_vol_vol['volume_usd']  # Target variable

# Initialize TimeSeriesSplit with 5 splits
tscv = TimeSeriesSplit(n_splits=5)

mse_list = []
mae_list = []
r2_list = []

# Perform time series cross-validation
for fold, (train_index, test_index) in enumerate(tscv.split(X), 1):
    X_train, X_test = X.iloc[train_index], X.iloc[test_index]
    y_train, y_test = y.iloc[train_index], y.iloc[test_index]

    # Train model
    model = Ridge(alpha=50)
    model.fit(X_train, y_train)

    # Predictions
    y_pred = model.predict(X_test)

    # Evaluation
    mse = mean_squared_error(y_test, y_pred)
    # mae = mean_absolute_error(y_test, y_pred)
    r2 = r2_score(y_test, y_pred)

    mse_list.append(mse)
    # mae_list.append(mae)
    r2_list.append(r2)

    print(f'Fold {fold}:')
    print(f'Mean Squared Error: {mse}')
    # print(f'Mean Absolute Error: {mae}')
    print(f'R-squared: {r2}\n')

# Average scores
avg_mse = np.mean(mse_list)
# avg_mae = np.mean(mae_list)
avg_r2 = np.mean(r2_list)

print(f'Average Mean Squared Error: {avg_mse}')
# print(f'Average Mean Absolute Error: {avg_mae}')
print(f'Average R-squared: {avg_r2}')

#Fees Target

X = agg_vol_vol.drop(columns=agg_fee_target + vol_vol_low_corr_cols_fees)  # Features
y = agg_vol_vol[agg_fee_target]  # Target variable

# Initialize TimeSeriesSplit with 5 splits
tscv = TimeSeriesSplit(n_splits=5)

mse_list = []
mae_list = []
r2_list = []

# Perform time series cross-validation
for fold, (train_index, test_index) in enumerate(tscv.split(X), 1):
    X_train, X_test = X.iloc[train_index], X.iloc[test_index]
    y_train, y_test = y.iloc[train_index], y.iloc[test_index]

    # Train model
    model = Ridge(alpha=0.1)
    model.fit(X_train, y_train)

    # Predictions
    y_pred = model.predict(X_test)

    # Evaluation
    mse = mean_squared_error(y_test, y_pred)
    # mae = mean_absolute_error(y_test, y_pred)
    r2 = r2_score(y_test, y_pred)

    mse_list.append(mse)
    # mae_list.append(mae)
    r2_list.append(r2)

    print(f'Fold {fold}:')
    print(f'Mean Squared Error: {mse}')
    # print(f'Mean Absolute Error: {mae}')
    print(f'R-squared: {r2}\n')

# Average scores
avg_mse = np.mean(mse_list)
# avg_mae = np.mean(mae_list)
avg_r2 = np.mean(r2_list)

print(f'Average Mean Squared Error: {avg_mse}')
# print(f'Average Mean Absolute Error: {avg_mae}')
print(f'Average R-squared: {avg_r2}')

##### Ridge Regression

X = agg_vol_vol.drop(columns=agg_vol_target + vol_vol_low_corr_cols_volume)  # Features
y = agg_vol_vol['volume_usd']  # Target variable

# Split data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Define the model
ridge = Ridge()

# Define the parameters to tune
parameters = {'alpha': [0.1, 1.0, 10.0, 50, 100.0, 1000.0]}  # Example range, adjust as needed

# Use GridSearchCV to find the best hyperparameters
grid_search = GridSearchCV(ridge, parameters, cv=5, scoring='r2')
grid_search.fit(X_train, y_train)

# Get the best model
best_ridge = grid_search.best_estimator_

# Predictions
y_pred = best_ridge.predict(X_test)

# Evaluation
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print(f'Best alpha: {grid_search.best_params_["alpha"]}')
print(f'Mean Squared Error: {mse}')
print(f'R-squared: {r2}')

#### Volatile-Stable Pair

Correlation with Volume:
volume_usd                         1.000000
fees_usd                           0.972520
num_trades                         0.958054
tx_count_Professional              0.911002
volume_to_tvl                      0.789622
avg_order_size_usd_Professional    0.741186
fee_apr                            0.621249
avg_order_size_usd_Retail          0.544163
median_gas_high_gas                0.473416
tvl_usd                            0.416362
median_gas                         0.259620
tx_count_Retail                    0.192901
avg_gas_high_gas                   0.153987
avg_gas                            0.100767
arbitrum_vol_ex_uni_pct_change     0.071804
ETH_Price                          0.071547
arbitrum_vol_ex_uni                0.067584
ETH_Price_pct_change               0.027398
median_gas_low_gas                -0.015314
BTC_Price_pct_change              -0.017254
BTC_Price                         -0.019240
avg_gas_low_gas                   -0.019713
avg_gas_normal_gas                -0.062095
day                               -0.127373
median_gas_normal_gas             -0.199656
Name: volume_usd, dtype: float64


Correlation with Fees:
fees_usd                           1.000000
volume_usd                         0.972520
num_trades                         0.950782
tx_count_Professional              0.908798
volume_to_tvl                      0.778951
avg_order_size_usd_Professional    0.729755
fee_apr                            0.697881
avg_order_size_usd_Retail          0.552634
median_gas_high_gas                0.450443
tvl_usd                            0.359282
median_gas                         0.274623
tx_count_Retail                    0.189163
avg_gas_high_gas                   0.162495
avg_gas                            0.136819
ETH_Price                          0.112305
arbitrum_vol_ex_uni                0.061885
arbitrum_vol_ex_uni_pct_change     0.059863
ETH_Price_pct_change               0.029574
BTC_Price                          0.018951
BTC_Price_pct_change              -0.005122
avg_gas_normal_gas                -0.029428
median_gas_low_gas                -0.049074
avg_gas_low_gas                   -0.059259
median_gas_normal_gas             -0.156982
Name: fees_usd, dtype: float64


vol_stable_low_corr_cols_volume = ['tx_count_Retail','avg_gas_high_gas','avg_gas','arbitrum_vol_ex_uni_pct_change','ETH_Price','arbitrum_vol_ex_uni','ETH_Price_pct_change','median_gas_low_gas','BTC_Price_pct_change','BTC_Price','avg_gas_low_gas','avg_gas_normal_gas','median_gas_normal_gas']
vol_stable_low_corr_cols_fees = ['tx_count_Retail','avg_gas_high_gas','avg_gas','ETH_Price','arbitrum_vol_ex_uni','arbitrum_vol_ex_uni_pct_change','ETH_Price_pct_change','BTC_Price','BTC_Price_pct_change','avg_gas_normal_gas','median_gas_low_gas','avg_gas_low_gas','median_gas_normal_gas']

agg_vol_stable.set_index('day', inplace=True)

#Volume Target

X = agg_vol_stable.drop(columns=agg_vol_target + vol_stable_low_corr_cols_volume)  # Features
y = agg_vol_stable['volume_usd']  # Target variable

# Initialize TimeSeriesSplit with 5 splits
tscv = TimeSeriesSplit(n_splits=5)

mse_list = []
mae_list = []
r2_list = []

# Perform time series cross-validation
for fold, (train_index, test_index) in enumerate(tscv.split(X), 1):
    X_train, X_test = X.iloc[train_index], X.iloc[test_index]
    y_train, y_test = y.iloc[train_index], y.iloc[test_index]

    # Train model
    model = Ridge(alpha=50)
    model.fit(X_train, y_train)

    # Predictions
    y_pred = model.predict(X_test)

    # Evaluation
    mse = mean_squared_error(y_test, y_pred)
    # mae = mean_absolute_error(y_test, y_pred)
    r2 = r2_score(y_test, y_pred)

    mse_list.append(mse)
    # mae_list.append(mae)
    r2_list.append(r2)

    print(f'Fold {fold}:')
    print(f'Mean Squared Error: {mse}')
    # print(f'Mean Absolute Error: {mae}')
    print(f'R-squared: {r2}\n')

# Average scores
avg_mse = np.mean(mse_list)
# avg_mae = np.mean(mae_list)
avg_r2 = np.mean(r2_list)

print(f'Average Mean Squared Error: {avg_mse}')
# print(f'Average Mean Absolute Error: {avg_mae}')
print(f'Average R-squared: {avg_r2}')

#Fees Target

X = agg_vol_stable.drop(columns=agg_fee_target + vol_stable_low_corr_cols_fees)  # Features
y = agg_vol_stable[agg_fee_target]  # Target variable

# Initialize TimeSeriesSplit with 5 splits
tscv = TimeSeriesSplit(n_splits=5)

mse_list = []
mae_list = []
r2_list = []

# Perform time series cross-validation
for fold, (train_index, test_index) in enumerate(tscv.split(X), 1):
    X_train, X_test = X.iloc[train_index], X.iloc[test_index]
    y_train, y_test = y.iloc[train_index], y.iloc[test_index]

    # Train model
    model = Ridge(alpha=50)
    model.fit(X_train, y_train)

    # Predictions
    y_pred = model.predict(X_test)

    # Evaluation
    mse = mean_squared_error(y_test, y_pred)
    # mae = mean_absolute_error(y_test, y_pred)
    r2 = r2_score(y_test, y_pred)

    mse_list.append(mse)
    # mae_list.append(mae)
    r2_list.append(r2)

    print(f'Fold {fold}:')
    print(f'Mean Squared Error: {mse}')
    # print(f'Mean Absolute Error: {mae}')
    print(f'R-squared: {r2}\n')

# Average scores
avg_mse = np.mean(mse_list)
# avg_mae = np.mean(mae_list)
avg_r2 = np.mean(r2_list)

print(f'Average Mean Squared Error: {avg_mse}')
# print(f'Average Mean Absolute Error: {avg_mae}')
print(f'Average R-squared: {avg_r2}')

#### Stable-Stable Pair

Correlation with Volume:
volume_usd                         1.000000
fees_usd                           0.999994
volume_to_tvl                      0.774103
num_trades                         0.770749
tvl_usd                            0.593093
tx_count_Professional              0.575269
fee_apr                            0.523088
avg_order_size_usd_Professional    0.447907
avg_gas_high_gas                   0.304616
avg_order_size_usd_Retail          0.299142
avg_gas                            0.234814
median_gas                         0.219378
ETH_Price                          0.172865
median_gas_high_gas                0.157583
tx_count_Retail                    0.092969
BTC_Price                          0.090799
ETH_Price_pct_change               0.052281
median_gas_normal_gas              0.049627
arbitrum_vol_ex_uni_pct_change     0.033932
BTC_Price_pct_change              -0.006928
avg_gas_normal_gas                -0.040930
arbitrum_vol_ex_uni               -0.046451
day                               -0.057700
avg_gas_low_gas                   -0.126255
median_gas_low_gas                -0.126522
Name: volume_usd, dtype: float64


Correlation with fees:
fees_usd                           1.000000
volume_usd                         0.999994
volume_to_tvl                      0.773835
num_trades                         0.771279
tvl_usd                            0.594116
tx_count_Professional              0.575257
fee_apr                            0.523389
avg_order_size_usd_Professional    0.447944
avg_gas_high_gas                   0.304174
avg_order_size_usd_Retail          0.299282
avg_gas                            0.234618
median_gas                         0.219401
ETH_Price                          0.173008
median_gas_high_gas                0.157438
tx_count_Retail                    0.093133
BTC_Price                          0.091019
ETH_Price_pct_change               0.052142
median_gas_normal_gas              0.049623
arbitrum_vol_ex_uni_pct_change     0.033864
BTC_Price_pct_change              -0.006979
avg_gas_normal_gas                -0.040779
arbitrum_vol_ex_uni               -0.046622
avg_gas_low_gas                   -0.126169
median_gas_low_gas                -0.126449
Name: fees_usd, dtype: float64

stable_stable_low_corr_cols_volume = ['ETH_Price','median_gas_high_gas','tx_count_Retail','BTC_Price','ETH_Price_pct_change','median_gas_normal_gas','arbitrum_vol_ex_uni_pct_change','BTC_Price_pct_change','avg_gas_normal_gas','arbitrum_vol_ex_uni','avg_gas_low_gas','median_gas_low_gas']
stable_stable_low_corr_cols_fees = ['ETH_Price','median_gas_high_gas','tx_count_Retail','BTC_Price','ETH_Price_pct_change','median_gas_normal_gas','arbitrum_vol_ex_uni_pct_change','BTC_Price_pct_change','avg_gas_normal_gas','arbitrum_vol_ex_uni','avg_gas_low_gas','median_gas_low_gas']

agg_stable_stable.set_index('day', inplace=True)

# Volume Target

X = agg_stable_stable.drop(columns=agg_vol_target + stable_stable_low_corr_cols_volume)  # Features
y = agg_stable_stable['volume_usd']  # Target variable

# Initialize TimeSeriesSplit with 5 splits
tscv = TimeSeriesSplit(n_splits=5)

mse_list = []
mae_list = []
r2_list = []

# Perform time series cross-validation
for fold, (train_index, test_index) in enumerate(tscv.split(X), 1):
    X_train, X_test = X.iloc[train_index], X.iloc[test_index]
    y_train, y_test = y.iloc[train_index], y.iloc[test_index]

    # Train model
    model = Ridge(alpha=50)
    model.fit(X_train, y_train)

    # Predictions
    y_pred = model.predict(X_test)

    # Evaluation
    mse = mean_squared_error(y_test, y_pred)
    # mae = mean_absolute_error(y_test, y_pred)
    r2 = r2_score(y_test, y_pred)

    mse_list.append(mse)
    # mae_list.append(mae)
    r2_list.append(r2)

    print(f'Fold {fold}:')
    print(f'Mean Squared Error: {mse}')
    # print(f'Mean Absolute Error: {mae}')
    print(f'R-squared: {r2}\n')

# Average scores
avg_mse = np.mean(mse_list)
# avg_mae = np.mean(mae_list)
avg_r2 = np.mean(r2_list)

print(f'Average Mean Squared Error: {avg_mse}')
# print(f'Average Mean Absolute Error: {avg_mae}')
print(f'Average R-squared: {avg_r2}')

# Fees Target

X = agg_stable_stable.drop(columns=agg_fee_target + stable_stable_low_corr_cols_fees)  # Features
y = agg_stable_stable[agg_fee_target]  # Target variable

# Initialize TimeSeriesSplit with 5 splits
tscv = TimeSeriesSplit(n_splits=5)

mse_list = []
mae_list = []
r2_list = []

# Perform time series cross-validation
for fold, (train_index, test_index) in enumerate(tscv.split(X), 1):
    X_train, X_test = X.iloc[train_index], X.iloc[test_index]
    y_train, y_test = y.iloc[train_index], y.iloc[test_index]

    # Train model
    model = Ridge(alpha=50)
    model.fit(X_train, y_train)

    # Predictions
    y_pred = model.predict(X_test)

    # Evaluation
    mse = mean_squared_error(y_test, y_pred)
    # mae = mean_absolute_error(y_test, y_pred)
    r2 = r2_score(y_test, y_pred)

    mse_list.append(mse)
    # mae_list.append(mae)
    r2_list.append(r2)

    print(f'Fold {fold}:')
    print(f'Mean Squared Error: {mse}')
    # print(f'Mean Absolute Error: {mae}')
    print(f'R-squared: {r2}\n')

# Average scores
avg_mse = np.mean(mse_list)
# avg_mae = np.mean(mae_list)
avg_r2 = np.mean(r2_list)

print(f'Average Mean Squared Error: {avg_mse}')
# print(f'Average Mean Absolute Error: {avg_mae}')
print(f'Average R-squared: {avg_r2}')

## Prophet

### All Networks Aggregated 

## Logistic Regression

# LSTM

# Simulation

## Simulation Model

X = agg_vol_vol.drop(columns=agg_vol_target + vol_vol_low_corr_cols_volume)  # Features
y = agg_vol_vol['volume_usd']  # Target variable

# Split data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Define the model
ridge = Ridge()

# Define the parameters to tune
parameters = {'alpha': [0.1, 1.0, 10.0, 50, 100.0, 1000.0]}  # Example range, adjust as needed

# Use GridSearchCV to find the best hyperparameters
grid_search = GridSearchCV(ridge, parameters, cv=5, scoring='r2')
grid_search.fit(X_train, y_train)

# Get the best model
best_ridge = grid_search.best_estimator_

# Predictions
y_pred = best_ridge.predict(X_test)

# Evaluation
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print(f'Best alpha: {grid_search.best_params_["alpha"]}')
print(f'Mean Squared Error: {mse}')
print(f'R-squared: {r2}')

# Define features and target variable
X = agg_vol_vol.drop(columns=agg_vol_target + vol_vol_low_corr_cols_volume)  # Features
y = agg_vol_vol['volume_usd']  # Target variable

# Split data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Hyperparameter tuning using GridSearchCV
# param_grid = {'alpha': np.logspace(-4, 4, 10)}  # Example grid
ridge = Ridge(alpha=50)
# ridge_cv = GridSearchCV(ridge, param_grid, scoring='neg_mean_squared_error', cv=5)
ridge.fit(X_train, y_train)

# Best model
vol_vol_model = ridge

# # Print best parameters and corresponding score
# print(f'Best alpha: {ridge_cv.best_params_["alpha"]}')
# print(f'Best MSE (from GridSearchCV): {-ridge_cv.best_score_}')

# # Predictions
# y_pred = vol_vol_model.predict(X_test)

# # Evaluation
# mse = mean_squared_error(y_test, y_pred)
# r2 = r2_score(y_test, y_pred)

# print(f'Mean Squared Error: {mse}')
# print(f'R-squared: {r2}')


agg_vol_vol.drop(columns=agg_vol_target + vol_vol_low_corr_cols_volume).columns

def simulate_gas_subsidy(model, X, y, gas_change_factor=0.9):
    """
    Simulate the impact of a gas subsidy by adjusting the gas-related features.
    
    Parameters:
    - model: The fitted regression model (e.g., vol_vol_model)
    - X: DataFrame, feature matrix
    - y: Series, target variable
    - gas_change_factor: float, the factor by which to adjust the gas-related features (e.g., 0.9 for a 10% reduction)
    
    Returns:
    - simulated_pred: Series, predicted values after simulating gas subsidy
    """
    # Copy the feature matrix to avoid modifying the original data
    X_simulated = X.copy()
    
    # Identify and adjust gas-related features
    gas_cols = [col for col in X.columns if 'gas' in col.lower()]
    for col in gas_cols:
        X_simulated[col] *= gas_change_factor
    
    # Predict the target variable with the modified features
    simulated_pred = model.predict(X_simulated)
    
    # Evaluate the change in the target variable
    original_pred = model.predict(X)
    change = simulated_pred - original_pred
    
    print(f'Average change in target variable: {np.mean(change)}')
    
    return simulated_pred


X = agg_vol_vol.drop(columns=agg_vol_target + vol_vol_low_corr_cols_volume)  # Features
y = agg_vol_vol[agg_vol_target] # Volume target

vol_vol_simulated_volume = simulate_gas_subsidy(vol_vol_model, X, y, gas_change_factor=0.9)