In [1]:
from web3 import Web3
import os
from dotenv import load_dotenv
import pandas as pd
import prophet
import numpy as np
import datetime as dt
from datetime import timedelta
from prophet import Prophet
from eth_account import Account
# from web3.middleware import geth_poa_middleware
from eth_abi import decode
from eth_utils import decode_hex, to_text

from itertools import product

import requests
import random
import json

from sklearn.metrics import r2_score, mean_absolute_error, mean_squared_error
import joblib

from dune_client.client import DuneClient

import plotly.graph_objs as go

from diskcache import Cache

from chartengineer import ChartMaker

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
from gas_accountant import (dune_api_results, get_tx_and_log_with_pagination, process_transaction, get_eth_balances,
                            get_price_timeseries)

In [3]:
load_dotenv()

ETHERSCAN_KEY = os.getenv("ETHERSCAN_KEY")
COINGECKO_API_KEY = os.getenv("COINGECKO_API_KEY")
DUNE_API_KEY = os.getenv('DUNE_API_KEY')
FLIPSIDE_API_KEY=os.getenv('FLIPSIDE_API_KEY')
dune = DuneClient(DUNE_API_KEY)

GAS_ACCOUNTANT = os.getenv("GAS_ACCOUNTANT")
GAS_RESERVE = os.getenv('GAS_RESERVE')
ACCOUNT_ADDRESS = os.getenv("ACCOUNT_ADDRESS")
PRIVATE_KEY = os.getenv("PRIVATE_KEY")
YIELD_FARM_ADDRESS = os.getenv("YIELD_FARM_ADDRESS")
STAKING_CONTRACT = os.getenv("STAKING_CONTRACT")
SEPOLIA_GATEWAY = os.getenv("SEPOLIA_GATEWAY")

BOT_1_ADDRESS=os.getenv("BOT_1_ADDRESS")
BOT_2_ADDRESS=os.getenv("BOT_2_ADDRESS")

In [4]:
os.chdir('..')

api = True

In [None]:
base_cache_dir = os.getcwd()
cache = Cache(os.path.join(base_cache_dir, 'cache')) 
gas_reserve_data = cache.get(f'gas_reserve_spent', pd.DataFrame())

abi_path = r'abi'
abi_paths = []  # Assuming GAS_ACCOUNTANT_ABI_PATH is predefined

for file in os.listdir(abi_path):
    if file.endswith('.json') and "metadata" not in file:  # Exclude metadata files
        abi_paths.append(os.path.join(abi_path, file))  # Add full path

print(abi_paths)  # Debug: Check the final list

abis = {}

for path in abi_paths:
    filename = os.path.basename(path)  # Extract filename (e.g., "YieldVault.json")
    name = os.path.splitext(filename)[0]  # Remove .json extension (e.g., "YieldVault")

    with open(path, "r") as file:
        abis[name] = json.load(file)  # Use name as key

print(abis)  # Debug output

w3 = Web3(Web3.HTTPProvider(SEPOLIA_GATEWAY))

ACCOUNT = Account.from_key(PRIVATE_KEY)

w3.eth.default_account = ACCOUNT.address

['abi\\erc20_abi.json', 'abi\\staking_abi.json', 'abi\\vault_abi.json']
{'erc20_abi': [{'constant': True, 'inputs': [], 'name': 'name', 'outputs': [{'name': '', 'type': 'string'}], 'payable': False, 'stateMutability': 'view', 'type': 'function'}, {'constant': False, 'inputs': [{'name': '_spender', 'type': 'address'}, {'name': '_value', 'type': 'uint256'}], 'name': 'approve', 'outputs': [{'name': '', 'type': 'bool'}], 'payable': False, 'stateMutability': 'nonpayable', 'type': 'function'}, {'constant': True, 'inputs': [], 'name': 'totalSupply', 'outputs': [{'name': '', 'type': 'uint256'}], 'payable': False, 'stateMutability': 'view', 'type': 'function'}, {'constant': False, 'inputs': [{'name': '_from', 'type': 'address'}, {'name': '_to', 'type': 'address'}, {'name': '_value', 'type': 'uint256'}], 'name': 'transferFrom', 'outputs': [{'name': '', 'type': 'bool'}], 'payable': False, 'stateMutability': 'nonpayable', 'type': 'function'}, {'constant': True, 'inputs': [], 'name': 'decimals', 'o

In [6]:
def update_eth_price_data(new_data):
    historical_data = cache.get(f'Prices', pd.DataFrame())
    historical_data = pd.concat([historical_data, new_data]).reset_index(drop=True)
    historical_data.drop_duplicates(subset='timestamp', keep='last', inplace=True)
    cache.set(f'Prices', historical_data)

def update_balance_data(values):
    print(f'values: {values}')
    # values = pd.DataFrame([values])
    historical_port_values = cache.get(f'balance_data')
    historical_port_values = pd.concat([historical_port_values, values]).reset_index(drop=True)
    print(f'historical_port_values: {historical_port_values}')
    # breakpoint()
    historical_port_values.drop_duplicates(subset='hour', keep='first', inplace=True)
    cache.set(f'balance_data', historical_port_values)

def update_forecast_gas_data(values):
    # print(f'values at update price: {values}')

    # Ensure the 'hour' column exists by resetting index if necessary
    if isinstance(values.index, pd.DatetimeIndex):
        values = values.reset_index().rename(columns={'index': 'hour'})
    
    if 'hour' not in values.columns:
        raise ValueError("The provided DataFrame must have a 'hour' column.")

    oracle_prices = cache.get(f'forecast_price_data', pd.DataFrame())
    # breakpoint()

    # Concatenate the new values with the existing oracle_prices
    oracle_prices = pd.concat([oracle_prices, values]).drop_duplicates(subset='hour', keep='first').reset_index(drop=True)

    oracle_prices = oracle_prices.set_index('hour')
    oracle_prices.index = pd.to_datetime(oracle_prices.index)
    oracle_prices.resample('H').ffill()

    # breakpoint()
    
    # Cache the updated oracle_prices
    cache.set(f'forecast_price_data', oracle_prices.reset_index())

    print(f'Updated forecast_price_data:\n{oracle_prices}')

def update_historical_gas_data(values):
    # print(f'values at update price: {values}')

    # Ensure the 'hour' column exists by resetting index if necessary
    if isinstance(values.index, pd.DatetimeIndex):
        values = values.reset_index().rename(columns={'index': 'hour'})
    
    if 'hour' not in values.columns:
        raise ValueError("The provided DataFrame must have a 'hour' column.")

    oracle_prices = cache.get(f'historical_gas_data', pd.DataFrame())
    # breakpoint()

    # Concatenate the new values with the existing oracle_prices
    oracle_prices = pd.concat([oracle_prices, values]).drop_duplicates(subset='hour', keep='first').reset_index(drop=True)

    # breakpoint()

    oracle_prices = oracle_prices.set_index('hour')
    oracle_prices.index = pd.to_datetime(oracle_prices.index)
    oracle_prices.resample('H').ffill()
    
    # Cache the updated oracle_prices
    cache.set(f'historical_gas_data', oracle_prices.reset_index())

    print(f'Updated historical_gas_data:\n{oracle_prices}')

def update_contract_data(data):
    print(f'new data: {data}')
    print(len(data))
    model_actions = cache.get(f'contract_data', pd.DataFrame())
    print(len(model_actions))
    print(f'model actions before update: {data}')
    model_actions = pd.concat([model_actions, data])
    print(len(model_actions))
    model_actions.drop_duplicates(subset=['transaction_hash'], keep='last', inplace=True)
    print(len(model_actions))
    cache.set(f'contract_data', model_actions)

In [7]:
# def get_eth_balances(api_key, addresses):
#     """
#     Fetch balances for multiple Ethereum addresses from Etherscan (Sepolia).
    
#     :param api_key: Your Etherscan API key as a string.
#     :param addresses: A list of Ethereum addresses.
#     :return: JSON response with balances.
#     """
#     base_url = "https://api-sepolia.etherscan.io/api"
    
#     # Convert list of addresses to comma-separated string
#     addresses_str = ",".join(addresses)
    
#     # Construct the API request URL
#     params = {
#         "module": "account",
#         "action": "balancemulti",
#         "address": addresses_str,
#         "tag": "latest",
#         "apikey": api_key
#     }
    
#     # Make the request
#     response = requests.get(base_url, params=params)
    
#     # Return the JSON response
#     return response.json()

# Gas Price Forecasting

## Short Term Query: https://dune.com/queries/4650506

## Long Term Query: https://dune.com/queries/4622627

In [8]:
def get_sepolia_gas_history(api, term='long_term'):
    lt_sepolia_gas_data_path = 'data/sepolia_gas_metrics.csv'
    st_sepolia_gas_data_path = 'data/current_sepolia_gas_metrics.csv'

    if api:
        if term == 'long_term':
            query = 4622627  # 2 Years worth
            sepolia_gas_data = dune_api_results(query, True, lt_sepolia_gas_data_path)
            sepolia_gas_data.to_csv(lt_sepolia_gas_data_path, index=False)  
        
        elif term == 'short_term':
            query = 4650506  # Runs daily at midnight for latest prices
            sepolia_gas_data = dune_api_results(query, True, st_sepolia_gas_data_path)
            sepolia_gas_data.to_csv(st_sepolia_gas_data_path, index=False)

        else:
            print('Pass "long_term" or "short_term" as parameters')
            return None

    else:
        if term == 'long_term':
            sepolia_gas_data = pd.read_csv(lt_sepolia_gas_data_path)
        elif term == 'short_term':
            sepolia_gas_data = pd.read_csv(st_sepolia_gas_data_path)
        else:
            print('Pass "long_term" or "short_term" as parameters')
            return None

    # Convert 'hour' column to datetime format
    sepolia_gas_data['hour'] = pd.to_datetime(sepolia_gas_data['hour'])

    return sepolia_gas_data

In [9]:
import matplotlib.pyplot as plt

In [10]:
# def set_global_seed(env, seed=20):
#     random.seed(seed)
#     os.environ["PYTHONHASHSEED"] = str(seed)
#     np.random.seed(seed)


In [11]:
from plotly.subplots import make_subplots
import plotly.graph_objects as go

def create_chart(df, columns, title, y2_col=None, tickprefix=dict(y1=None, y2=None), ticksuffix=dict(y1=None, y2=None),show_legend=False,auto_title=True,show=True,save_directory=None):
    # Create a subplot with a secondary y-axis
    fig = make_subplots(specs=[[{"secondary_y": True}]])

    # Define the color cycle
    colors = ['blue', 'red', None]  # Blue, red, then default Plotly colors
    color_index = 0  # To track the current color in the cycle

    for col in columns:
        line_color = colors[color_index]  # Get the current color from the cycle
        color_index = (color_index + 1) % len(colors)  # Update to the next color, cycling back

        if col != y2_col:
            # Add traces to primary y-axis
            fig.add_trace(
                go.Scatter(
                    x=df.index,
                    y=df[col],
                    name=col.replace("_"," ").upper(),
                    line=dict(color=line_color) if line_color else {},
                    showlegend=show_legend
                ),
                secondary_y=False,  # Plot on primary y-axis
            )
        else:
            # Add traces to secondary y-axis
            fig.add_trace(
                go.Scatter(
                    x=df.index,
                    y=df[y2_col],
                    name=y2_col.replace("_"," ").upper(),
                    line=dict(color=line_color) if line_color else {},
                    showlegend=show_legend
                ),
                secondary_y=True,  # Plot on secondary y-axis
            )

    # Update the layout with titles
    fig.update_layout(
        title=title,
        xaxis_title="Date",
        legend=dict(x=0.01, y=0.99),
        template='plotly_white',
        hovermode='x unified',
        font=dict(color='black')
    )

    if auto_title:
        y1_title_text = columns[0].replace("_"," ").upper()
        y2_title_text = y2_col.replace("_"," ").upper() if y2_col else None
    else:
        y1_title_text = None
        y2_title_text = None

    # Set y-axes titles and colors
    fig.update_yaxes(title_text=y1_title_text, secondary_y=False, color='blue', tickprefix=tickprefix['y1'], ticksuffix=ticksuffix['y1'])
    fig.update_yaxes(title_text=y2_title_text, secondary_y=True, color='red', tickprefix=tickprefix['y2'], ticksuffix=ticksuffix['y2'])
    fig.update_xaxes(tickfont=dict(color='black'))

    # Show the plot
    if show:
        fig.show()

    return fig

In [12]:
def evaluate_prophet_with_regressors(df, regressors, target, train_size=0.75, freq='H',model_path='prophet_model.pkl',save=False):
    """
    Trains a Prophet model with multiple regressors and evaluates its forecast.

    Parameters:
    - df: DataFrame with ['ds', 'y'] (target variable) and regressors.
    - regressors: List of column names to use as extra regressors.
    - train_size: Proportion of the dataset to use for training (default: 0.75).
    - freq: Frequency of the dataset ('H' for hourly).

    Returns:
    - Dictionary with evaluation metrics.
    """

    # Ensure correct column names
    df = df.rename(columns={'hour': 'ds', 'median_gas_price': 'y'})

    # Sort data by timestamp
    df = df.sort_values('ds')

    # Verify all regressors exist
    missing_cols = [col for col in regressors if col not in df.columns]
    if missing_cols:
        raise ValueError(f"Missing regressors in dataframe: {missing_cols}")

    # Split train/test data
    split_idx = int(len(df) * train_size)
    train_df = df.iloc[:split_idx].copy()  # Training data
    test_df = df.iloc[split_idx:].copy()   # Testing data

    # Initialize Prophet model
    m = Prophet()

    # Add regressors to Prophet
    for reg in regressors:
        m.add_regressor(reg)

    # Fit model with training data
    m.fit(train_df)

    # Save model using joblib
    if save:
        joblib.dump(m, model_path)
        print(f"✅ Model saved at: {model_path}")

    # Create future dataframe
    future = m.make_future_dataframe(periods=len(test_df), freq=freq)

    # ✅ Fix: Assign correct-sized regressors to the future dataframe
    for reg in regressors:
        if reg in df.columns:
            future[reg] = df[reg].iloc[-len(future):].values
        else:
            future[reg] = 0  # Default to zero if missing

    # Predict
    forecast = m.predict(future)

    # Merge predictions with actual test values
    merged_df = test_df.merge(forecast[['ds', 'yhat']], on='ds', how='inner')

    fig = m.plot(forecast)
    plt.title(f'Forecast for {target}')
    plt.show()

    # Extract actual and predicted values
    y_true = merged_df['y'].values
    y_pred = merged_df['yhat'].values

    # Calculate evaluation metrics
    r2 = r2_score(y_true, y_pred)
    mae = mean_absolute_error(y_true, y_pred)
    rmse = np.sqrt(mean_squared_error(y_true, y_pred))
    mape = np.mean(np.abs((y_true - y_pred) / y_true)) * 100  # Mean Absolute Percentage Error

    # Print results
    print(f"R² Score: {r2:.4f}")
    print(f"MAE: {mae:.4f}")
    print(f"RMSE: {rmse:.4f}")
    print(f"MAPE: {mape:.2f}%")

    return {
        "r2_score": r2,
        "mae": mae,
        "rmse": rmse,
        "mape": mape,
    }, merged_df


In [None]:
def tune_prophet_hyperparameters(target_df, train_size=0.75, freq="H"):
    """
    Tunes Prophet's seasonality hyperparameters using a 75/25 train-test split.

    Parameters:
    - target_df: DataFrame containing ['ds', 'y'] columns.
    - train_size: Proportion of data to use for training (default: 0.75).
    - freq: Frequency of the dataset (default: 'H' for hourly data).

    Returns:
    - A dictionary with the best model, best parameters, and evaluation metrics.
    """

    # Sort data by date
    target_df = target_df.sort_values("ds")

    # Split into train (75%) and test (25%) based on `train_size`
    split_idx = int(len(target_df) * train_size)
    train_df = target_df.iloc[:split_idx]
    test_df = target_df.iloc[split_idx:]

    # Define parameter grid
    param_grid = {
        "yearly_seasonality": [True, False],
        "weekly_seasonality": [True, False],
        "daily_seasonality": [True, False],
        "seasonality_mode": ["additive", "multiplicative"]
    }

    # Generate all possible parameter combinations
    param_combinations = list(product(*param_grid.values()))

    # Initialize best model tracking
    best_mape = float("inf")
    best_params = None
    best_model = None
    best_metrics = {}

    for params in param_combinations:
        try:
            # Initialize Prophet model with given parameters
            model = Prophet(
                yearly_seasonality=params[0],
                weekly_seasonality=params[1],
                daily_seasonality=params[2],
                seasonality_mode=params[3]
            )
            model.fit(train_df)  # Fit on training data

            # Create future dataframe for the test period
            future = model.make_future_dataframe(periods=len(test_df), freq=freq)
            forecast = model.predict(future)

            # Merge forecast with actual test values
            merged_df = test_df.merge(forecast[['ds', 'yhat']], on='ds', how='inner')

            # Extract actual and predicted values
            y_true = merged_df['y'].values
            y_pred = merged_df['yhat'].values

            # Compute evaluation metrics
            r2 = r2_score(y_true, y_pred)
            mae = mean_absolute_error(y_true, y_pred)
            rmse = np.sqrt(mean_squared_error(y_true, y_pred))
            mape = np.mean(np.abs((y_true - y_pred) / y_true)) * 100  # Mean Absolute Percentage Error

            # Save best model if MAPE is lower
            if mape < best_mape:
                best_mape = mape
                best_params = params
                best_model = model
                best_metrics = {"r2": r2, "mae": mae, "rmse": rmse, "mape": mape}

        except Exception as e:
            print(f"Error with parameters {params}: {e}")

    # Print best parameters
    print("Best Parameters:", dict(zip(param_grid.keys(), best_params)))
    print(f"Best MAPE: {best_mape:.2f}%")
    print(f"Best R² Score: {best_metrics['r2']:.4f}")
    print(f"Best RMSE: {best_metrics['rmse']:.4f}")
    print(f"Best MAE: {best_metrics['mae']:.4f}")

    return {
        "best_model": best_model,
        "best_params": dict(zip(param_grid.keys(), best_params)),
        "metrics": best_metrics
    }


In [14]:
def prepare_model_dataset(long_term_sepolia_gas, target):

    target_df = long_term_sepolia_gas.set_index('hour')[[target]]

    target_df.reset_index(inplace=True)
    target_df.rename(columns={"hour":"ds","median_gas_price":"y"},inplace=True)

    target_df['ds'] = pd.to_datetime(target_df['ds'])
    print(target_df.dtypes)

    target_df['ds'] = target_df['ds'].dt.strftime('%Y-%m-%d %H:00:00')
    target_df['ds'] = pd.to_datetime(target_df['ds'])

    # Load Data
    df_gas = long_term_sepolia_gas.copy()

    # Rename timestamp column for consistency
    df_gas.rename(columns={'hour': 'ds'}, inplace=True)

    # Sort by time
    df_gas = df_gas.sort_values(by="ds")

    # Compute rolling averages
    df_gas["median_gas_price_7d"] = df_gas["median_gas_price"].rolling(window=7 * 24, min_periods=1).mean()
    df_gas["median_gas_price_30d"] = df_gas["median_gas_price"].rolling(window=30 * 24, min_periods=1).mean()

    # Drop NaN values (first few rows may have missing rolling averages)
    df_gas.dropna(inplace=True)

    # Print to check data structure
    print(df_gas.head())

    df_gas.rename(columns={target:'y'},inplace=True)
    df_gas['ds'] = pd.to_datetime(df_gas['ds']).dt.strftime('%Y-%m-%d %H:00:00')
    df_gas['ds'] = pd.to_datetime(df_gas['ds'])

    # Define regressors to use
    regressor_columns = ["max_gas_price", "min_gas_price", "p25_gas_price", "p75_gas_price", 
                        "median_gas_price_7d", "median_gas_price_30d"]

    return df_gas, regressor_columns, target_df

In [None]:
def forecast_gas_prices(model, df, regressors, start_date, forecast_hours=24):
    """
    Uses an existing Prophet model trained on the last `train_hours` of data
    and forecasts `forecast_hours + any missing hours`, starting from the last data timestamp.

    Parameters:
    - model: A fitted Prophet model.
    - df: DataFrame containing historical gas price data.
    - regressors: List of additional columns to use as extra regressors.
    - forecast_hours: Number of hours to forecast (default: 24).

    Returns:
    - Forecast DataFrame with predicted hourly gas prices.
    """

    # Ensure correct column names
    df = df.rename(columns={'hour': 'ds', 'median_gas_price': 'y'})

    # Sort data by timestamp
    df = df.sort_values('ds')

    # Get the last timestamp in the dataset
    last_timestamp = df['ds'].max()

    # Get current UTC time rounded to the nearest hour
    now_utc = pd.to_datetime(start_date)

    # Calculate the gap between now and the last data point
    time_gap = (now_utc - last_timestamp).total_seconds() / 3600  # Convert to hours

    # Adjust forecast hours to account for the missing gap
    adjusted_forecast_hours = forecast_hours + int(time_gap)

    print(f"📌 Last data point: {last_timestamp}")
    print(f"📌 Time gap to now: {time_gap:.1f} hours")
    print(f"📌 Forecasting {adjusted_forecast_hours} hours from {last_timestamp}")

    # Create future dataframe starting **right after the last data point**
    future_dates = pd.date_range(start=last_timestamp + pd.Timedelta(hours=1), 
                                 periods=adjusted_forecast_hours, freq='H')

    future = pd.DataFrame({'ds': future_dates})

    # ✅ Assign regressor values for future periods
    for reg in regressors:
        if reg in df.columns:
            # Use the last known regressor value
            future[reg] = df[reg].iloc[-1]

    # Predict
    forecast = model.predict(future)

    return forecast[['ds', 'yhat', 'yhat_lower', 'yhat_upper']]


In [16]:
def preapre_for_live_foreast(short_term_sepolia_gas):

    forecast_sepolia = short_term_sepolia_gas.copy()

    forecast_sepolia = forecast_sepolia.sort_values(by="hour")

    # Compute rolling averages
    forecast_sepolia["median_gas_price_7d"] = forecast_sepolia["median_gas_price"].rolling(window=7 * 24, min_periods=1).mean()
    forecast_sepolia["median_gas_price_30d"] = forecast_sepolia["median_gas_price"].rolling(window=30 * 24, min_periods=1).mean()

    # Drop NaN values (first few rows may have missing rolling averages)
    forecast_sepolia.dropna(inplace=True)

    forecast_sepolia= forecast_sepolia.rename(columns={"hour":"ds","median_gas_price":"y"}).copy()

    # Print to check data structure
    print(forecast_sepolia.head())

    forecast_sepolia['ds'] = pd.to_datetime(forecast_sepolia['ds'])
    forecast_sepolia['ds'] = pd.to_datetime(forecast_sepolia['ds']
                                            ).dt.tz_localize(None)
    return forecast_sepolia

In [173]:
api = True
train_model = False
tune_hyperparameters = False

long_term_sepolia_gas = get_sepolia_gas_history(False,'long_term')
short_term_sepolia_gas = get_sepolia_gas_history(api,'short_term')

target = 'median_gas_price'
days = 30

model_path = os.path.join(base_cache_dir,'AI\prophet_model.pkl')

data_start_date = dt.datetime.now(dt.timezone.utc) - timedelta(hours=5)
data_start_date = data_start_date.strftime('%Y-%m-%d %H:00:00')

today_utc = dt.datetime.now(dt.timezone.utc) 
formatted_today_utc = today_utc.strftime('%Y-%m-%d %H:00:00')


invalid escape sequence '\p'


invalid escape sequence '\p'


invalid escape sequence '\p'



In [172]:
df_gas, regressor_columns, target_df = prepare_model_dataset(long_term_sepolia_gas, target)

if train_model:

    # Train and evaluate Prophet with regressors
    results, results_df = evaluate_prophet_with_regressors(df_gas, regressor_columns, target, train_size=0.75, freq='H',model_path=model_path,save=True)

    # Print results
    print(results)

    y_fig = create_chart(results_df.set_index('ds'),['y','yhat'],title='Actual vs Predicted',auto_title=False,show_legend=True)


ds    datetime64[ns, UTC]
y                 float64
dtype: object
                             ds  max_gas_price  median_gas_price  \
17299 2023-04-02 00:00:00+00:00   428571428571      1.501494e+09   
17298 2023-04-02 01:00:00+00:00   428571428571      1.613547e+09   
17297 2023-04-02 02:00:00+00:00   500000000007      1.841365e+09   
17296 2023-04-02 03:00:00+00:00   300000000000      2.210777e+09   
17295 2023-04-02 04:00:00+00:00   428571428571      1.605303e+09   

       min_gas_price  p25_gas_price  p75_gas_price  median_gas_price_7d  \
17299              7   1.500000e+09   4.480613e+09         1.501494e+09   
17298              7   1.500000e+09   4.500000e+09         1.557520e+09   
17297              7   1.500000e+09   4.500000e+09         1.652135e+09   
17296              7   1.500000e+09   4.500000e+09         1.791796e+09   
17295              7   1.500000e+09   4.500000e+09         1.754497e+09   

       median_gas_price_30d  
17299          1.501494e+09  
17298         

2025-05-09 17:10:55,106 DEBUG cmdstanpy cmd: where.exe tbb.dll
cwd: None
2025-05-09 17:10:55,271 DEBUG cmdstanpy TBB already found in load path
2025-05-09 17:10:55,339 INFO prophet Disabling yearly seasonality. Run prophet with yearly_seasonality=True to override this.
2025-05-09 17:10:55,391 DEBUG cmdstanpy input tempfile: C:\Users\brand\AppData\Local\Temp\tmp9li0w3wl\7fir9ak_.json
2025-05-09 17:10:55,794 DEBUG cmdstanpy input tempfile: C:\Users\brand\AppData\Local\Temp\tmp9li0w3wl\236hpyvy.json
2025-05-09 17:10:55,805 DEBUG cmdstanpy idx 0
2025-05-09 17:10:55,807 DEBUG cmdstanpy running CmdStan, num_threads: None
2025-05-09 17:10:55,809 DEBUG cmdstanpy CmdStan args: ['C:\\Users\\brand\\projects\\gas_accountant\\.venv\\Lib\\site-packages\\prophet\\stan_model\\prophet_model.bin', 'random', 'seed=16421', 'data', 'file=C:\\Users\\brand\\AppData\\Local\\Temp\\tmp9li0w3wl\\7fir9ak_.json', 'init=C:\\Users\\brand\\AppData\\Local\\Temp\\tmp9li0w3wl\\236hpyvy.json', 'output', 'file=C:\\Users\\

RuntimeError: Error during optimization! Command 'C:\Users\brand\projects\gas_accountant\.venv\Lib\site-packages\prophet\stan_model\prophet_model.bin random seed=63480 data file=C:\Users\brand\AppData\Local\Temp\tmp9li0w3wl\a0835fgk.json init=C:\Users\brand\AppData\Local\Temp\tmp9li0w3wl\1kvlohpr.json output file=C:\Users\brand\AppData\Local\Temp\tmp9li0w3wl\prophet_modelkemcskt8\prophet_model-20250509171101.csv method=optimize algorithm=newton iter=10000' failed: 

In [174]:
if tune_hyperparameters:
    best_result = tune_prophet_hyperparameters(target_df.reset_index())

In [175]:
# Loading the model 

prophet_model = joblib.load(model_path)
print("✅ Model loaded successfully!")

✅ Model loaded successfully!


In [21]:
# import os

# def update_prophet_model(df, model_path=r'E:\Projects\gas_accountant\AI\prophet_model.pkl', update_threshold=24):
#     """
#     Loads an existing Prophet model, updates it if new data is available, and saves it.

#     - update_threshold: Minimum number of new hours before refitting.
#     """
#     if os.path.exists(model_path):
#         prophet_model = joblib.load(model_path)
#         last_train_ds = prophet_model.history['ds'].max()
#     else:
#         prophet_model = Prophet()
#         last_train_ds = None

#     # Check if enough new data is available
#     new_data = df[df['ds'] > last_train_ds] if last_train_ds else df

#     if len(new_data) >= update_threshold:
#         print(f"🔄 Retraining model with {len(new_data)} new data points...")
#         prophet_model.fit(df)  # Retrain with updated data
#         joblib.dump(prophet_model, model_path)
#         print("✅ Model updated and saved!")
#     else:
#         print("✅ Using existing model (No significant new data).")

#     return prophet_model


In [176]:
forecast_sepolia = preapre_for_live_foreast(short_term_sepolia_gas)

                          ds max_gas_price             y min_gas_price  \
38 2025-05-08 00:00:00+00:00  500000000000  7.695511e+07          9548   
37 2025-05-08 01:00:00+00:00  110934534761  1.249613e+09         48852   
36 2025-05-08 02:00:00+00:00  200000000000  1.428428e+09       1000000   
35 2025-05-08 03:00:00+00:00  300000000000  9.985097e+08       3366976   
34 2025-05-08 04:00:00+00:00  238932654565  2.315091e+08      38009384   

    p25_gas_price  p75_gas_price  median_gas_price_7d  median_gas_price_30d  
38   1.036419e+06   1.500033e+09         7.695511e+07          7.695511e+07  
37   2.273143e+06   1.752028e+09         6.632839e+08          6.632839e+08  
36   3.733200e+06   1.629754e+09         9.183319e+08          9.183319e+08  
35   1.577748e+08   1.682994e+09         9.383763e+08          9.383763e+08  
34   1.193250e+08   1.479846e+09         7.970029e+08          7.970029e+08  


In [177]:
# ✅ Load or update the Prophet model
# model = update_prophet_model(forecast_sepolia, model_path=model_path, update_threshold=24)

# ✅ Forecast the next 24 hours, ensuring it starts from the **current hour**
forecast = forecast_gas_prices(prophet_model, forecast_sepolia,regressor_columns, start_date=formatted_today_utc, forecast_hours=24)


📌 Last data point: 2025-05-09 14:00:00
📌 Time gap to now: 8.0 hours
📌 Forecasting 32 hours from 2025-05-09 14:00:00



'H' is deprecated and will be removed in a future version, please use 'h' instead.



In [178]:
y_fig = create_chart(forecast.set_index('ds'),['yhat'],title=f"Predicted Gas Prices <br> Through {forecast['ds'].max()}",auto_title=False,show_legend=False,show=False)
y_fig.add_shape(
    type="line",
    x0=formatted_today_utc, x1=formatted_today_utc,
    y0=min(forecast['yhat']), y1=max(forecast['yhat']),
    line=dict(color="black", width=2, dash="dash")
)

In [25]:
foreast_and_real = pd.concat([forecast[['ds','yhat']].rename(columns={'yhat':'y'}),forecast_sepolia[['ds','y']]]).drop_duplicates(subset='ds').sort_values(by='ds')

In [26]:
cache.clear()

6

In [27]:
update_historical_gas_data(foreast_and_real.rename(columns={'ds':'hour'}))
update_forecast_gas_data(forecast.rename(columns={'ds':'hour'}))

forecast_gas = cache.get(f'forecast_price_data')
historical_gas = cache.get(f'historical_gas_data')

Updated historical_gas_data:
                                y
hour                             
2025-05-08 00:00:00  7.695511e+07
2025-05-08 01:00:00  1.249613e+09
2025-05-08 02:00:00  1.428428e+09
2025-05-08 03:00:00  9.985097e+08
2025-05-08 04:00:00  2.315091e+08
...                           ...
2025-05-10 16:00:00  2.616510e+09
2025-05-10 17:00:00  2.657911e+09
2025-05-10 18:00:00  2.568283e+09
2025-05-10 19:00:00  2.392087e+09
2025-05-10 20:00:00  2.265439e+09

[69 rows x 1 columns]
Updated forecast_price_data:
                             yhat    yhat_lower    yhat_upper
hour                                                         
2025-05-09 15:00:00  2.468528e+09 -4.380123e+09  9.483114e+09
2025-05-09 16:00:00  2.546280e+09 -4.209847e+09  8.939748e+09
2025-05-09 17:00:00  2.589398e+09 -4.468431e+09  9.636154e+09
2025-05-09 18:00:00  2.502911e+09 -4.494764e+09  9.262555e+09
2025-05-09 19:00:00  2.331131e+09 -4.691875e+09  8.792226e+09
2025-05-09 20:00:00  2.210007e+09 -4.574008


'H' is deprecated and will be removed in a future version, please use 'h' instead.


'H' is deprecated and will be removed in a future version, please use 'h' instead.



# Tracking the Test Protocol Gas Reverse

In [28]:
GAS_RESERVE

'0x21229F20F71882Fb61AD31Fe5501FAe26C621830'

## We can track internal TX of gas reserve contract to see how much it spends

In [29]:
# # tx_history = get_tx_with_pagination(GAS_RESERVE, current_block - 10000, current_block - 1, ETHERSCAN_KEY)
# internal_tx_history = get_tx_and_log_with_pagination(GAS_RESERVE, last_from_block, 'latest', ETHERSCAN_KEY, action='txlistinternal')

# if len(internal_tx_history) > 0:

#     processed_transactions = [process_transaction(tx) for tx in internal_tx_history]
    
#     # Convert to DataFrame
#     df = pd.DataFrame(processed_transactions)
    
#     dataset = df.copy()
#     dataset = dataset[dataset['from']==GAS_RESERVE.lower()]
#     dataset['hour'] = dataset['timestamp'].dt.strftime('%Y-%m-%d %H:00:00')
    
#     earliest_date = dataset['hour'].min()
    
#     eth_query = token_prices(['0xC02aaA39b223FE8D0A0e5C4F27eAD9083C756Cc2'],'ethereum',earliest_date)
#     eth_df = flipside_api_results(api_key=FLIPSIDE_API_KEY,query=eth_query)
#     eth_df['hour'] = pd.to_datetime(eth_df['hour']).dt.strftime('%Y-%m-%d %H:00:00')
#     eth_df.set_index('hour',inplace=True)
#     eth_df_wide = data_processing(eth_df.reset_index())
#     eth_df_wide.index = eth_df_wide.index.strftime('%Y-%m-%d %H:00:00')
#     update_eth_price_data(eth_df_wide)

### Here we combine gas history to get full tx history

In [30]:
# short_term_sepolia_gas['hour'] = pd.to_datetime(short_term_sepolia_gas['hour']).dt.strftime('%Y-%m-%d %H:00:00')
# dataset_merged = dataset.merge(
#     short_term_sepolia_gas,
#     on='hour',
#     how='left'
# )
# print(dataset)

In [31]:
# dataset_merged['hour']

In [32]:
# dataset_merged['gasPrice'] = dataset_merged['gasPrice'].fillna(dataset_merged['median_gas_price'])
# dataset_merged['tx_fee'] = dataset_merged['tx_fee'].fillna((dataset_merged['gasPrice'] * dataset_merged['gasUsed']) / 1e18)

In [33]:
# eth_df_wide.reset_index(inplace=True)
# eth_df_wide.rename(columns={'dt':'hour'},inplace=True)

In [34]:
# dataset_merged

In [35]:
# eth_df_wide.columns

In [36]:
# dataset

In [37]:
# dataset_merged=dataset_merged.merge(
#     eth_df_wide,
#     on='hour',
#     how='left'
# )

In [38]:
# dataset_merged['tx_fee_usd'] = dataset_merged['tx_fee'] * dataset_merged['WETH_price']
# dataset_merged['tx_fee_usd']

In [39]:
# dataset_merged['transaction_hash'].values[0]

In [40]:
#USE logs instead

# update_contract_data(dataset_merged)

## We can also read the event logs to see gas requested, requester, and time requested

In [41]:
# w3.eth.block_number

In [42]:
# logs = get_tx_and_log_with_pagination(GAS_RESERVE, w3.eth.block_number - 1000000, w3.eth.block_number, ETHERSCAN_KEY,module='logs', action='getLogs')

In [43]:
# parsed_logs = [parse_gas_log(log) for log in logs]
# for log in parsed_logs:
#     print(log)

In [44]:
# df = pd.DataFrame(parsed_logs)

# # Convert timestamp to datetime format
# df["timestamp"] = pd.to_datetime(df["timestamp"])

# # Set timestamp as the index
# df.set_index("timestamp", inplace=True)

# # Sort by timestamp
# df = df.sort_index()

# # Display the first few rows
# print(df.head())

# df.reset_index(inplace=True)
# df['hour'] = df['timestamp'].dt.strftime('%Y-%m-%d %H:00:00')

# df_merged = df.merge(
#     short_term_sepolia_gas,
#     on='hour',
#     how='left'
# )
# print(df_merged)

# df_merged=df_merged.merge(
#     eth_df_wide,
#     on='hour',
#     how='left'
# )

In [45]:
# df_merged['address'] = GAS_RESERVE

In [46]:
# update_contract_data(df_merged)

## Here we combine gas history to get full tx history

# Tracking AI Agent Gas Costs

In [47]:
current_block = w3.eth.block_number
print(f"Current block number: {current_block}")

last_from_block = cache.get('last_from_block',None)
print(f'last_from_block: {last_from_block}')

if last_from_block is None:
    last_from_block = current_block

last_from_block

Current block number: 8291913
last_from_block: None


8291913

In [48]:
BOT_1_ADDRESS

'0x21efbeE92E732D9d87Ae7b67E0aae7a972bd23F8'

In [49]:
BOT_2_ADDRESS

'0xc26204ecE1f23d993200E31A5C68bD58DDd0c6b6'

In [50]:
CHAINSETTLE_NODE = os.getenv('CHAINSETTLE_NODE')
DEX_ADDRESS = os.getenv('DEX_ADDRESS')

In [51]:
addresses = [CHAINSETTLE_NODE,DEX_ADDRESS]
addresses_lower = [addr.lower() for addr in addresses]

address_df = pd.DataFrame()

for address in addresses:
    tx_history = get_tx_and_log_with_pagination(address, last_from_block, 'latest', ETHERSCAN_KEY)

    processed_transactions = [process_transaction(tx) for tx in tx_history]

    # Convert to DataFrame
    df = pd.DataFrame(processed_transactions)

    address_df = pd.concat([address_df,df], ignore_index=True)

getting fresh data
{'status': '1', 'message': 'OK', 'result': [{'blockNumber': '8175365', 'timeStamp': '1745362944', 'hash': '0x4245fa4f6a681233de5a7e52247b1daae68557493f61ff1da7cd9d8f564d9a40', 'nonce': '412818', 'blockHash': '0x531d3ca866df30d47188e8e5bddec3491099e5d1a28f70e56f577ac51b261518', 'transactionIndex': '132', 'from': '0xc0f3833b7e7216eecd9f6bc2c7927a7aa36ab58b', 'to': '0x6fbc41ea9cff9f1c2dcc8f61e190623d0b1cd7b3', 'value': '50000000000000000', 'gas': '21000', 'gasPrice': '1000934', 'isError': '0', 'txreceipt_status': '1', 'input': '0x', 'contractAddress': '', 'cumulativeGasUsed': '21888647', 'gasUsed': '21000', 'confirmations': '116547', 'methodId': '0x', 'functionName': ''}, {'blockNumber': '8175379', 'timeStamp': '1745363112', 'hash': '0xb9b42d346b4f95536d44526d7e226c4f5b710eb2f867e74205bb883c6c5d64d4', 'nonce': '29', 'blockHash': '0x790621968f01897cf03408ded2367edc4f85778d577ce582555a057884ea6f8e', 'transactionIndex': '33', 'from': '0x2083b0413869f7b5b9e0ea27d20cb6dd3535

In [52]:
cache.set('last_from_block',current_block)

True

In [53]:
address_df.sort_values(by='timestamp',inplace=True)

In [54]:
dataset = address_df.copy()
dataset = dataset[dataset['from'].str.lower().isin(addresses_lower)] # only where the contracts are the sender (spend gas)
dataset['hour'] = dataset['timestamp'].dt.strftime('%Y-%m-%d %H:00:00')

In [55]:
earliest_date = cache.get('earliest_date',None)

if earliest_date is None:

    earliest_date = dataset['hour'].min()

In [56]:
days_back = (pd.to_datetime(formatted_today_utc) - pd.to_datetime(str(earliest_date))).days

In [57]:
eth_price_df = get_price_timeseries(days=days_back)
eth_price_df

Unnamed: 0_level_0,price
timestamp,Unnamed: 1_level_1
2025-02-01 00:00:00,3296.390635
2025-02-02 00:00:00,3125.038680
2025-02-03 00:00:00,2862.697619
2025-02-04 00:00:00,2877.813824
2025-02-05 00:00:00,2740.380976
...,...
2025-05-06 00:00:00,1820.004460
2025-05-07 00:00:00,1816.168694
2025-05-08 00:00:00,1810.316927
2025-05-09 00:00:00,2197.561189


In [58]:
update_eth_price_data(eth_price_df.reset_index())

In [59]:
cache.set('earliest_date',eth_price_df.index.max())

True

In [60]:
eth_price_df = cache.get(f'Prices')
eth_price_df = eth_price_df.set_index('timestamp')

In [61]:
historical_gas = historical_gas.sort_values(by='hour')

In [62]:
DEX_ADDRESS 

'0x24E1F4029BbC228B74fE221f29821CF64366C2Fa'

In [63]:
dataset[dataset['from']==DEX_ADDRESS.lower()]

Unnamed: 0,blockNumber,timestamp,transaction_hash,from,to,gas,gasPrice,gasUsed,tx_fee,contractAddress,hour
239,7612241,2025-01-31 19:11:00,0x4305292f6f9b5b6b0a098886b8055f66e930ea3679d2...,0x24e1f4029bbc228b74fe221f29821cf64366c2fa,Contract Deployment,1104335,1.008046e+10,1094481,0.011033,0x16a70cd06b19880018491233c0ef27ec5bc0e98b,2025-01-31 19:00:00
240,7612274,2025-01-31 19:17:48,0x6621a708c0ee5dc4a7d0bae37cf34b3991b401d76995...,0x24e1f4029bbc228b74fe221f29821cf64366c2fa,Contract Deployment,1104335,1.071536e+10,1094481,0.011728,0x5c6054a4f5f3b6f7485de77b18ec40708944ffc6,2025-01-31 19:00:00
241,7612287,2025-01-31 19:20:48,0x598db0ae58d790eb5eec42f400f59052b65f570cb6a0...,0x24e1f4029bbc228b74fe221f29821cf64366c2fa,Contract Deployment,1104335,1.038476e+10,1094481,0.011366,0x3fc47744b859e08c59b71c883bdc204eba100b60,2025-01-31 19:00:00
242,7612290,2025-01-31 19:21:24,0x32648451dab350c5ac996febe9dab6701683b1977813...,0x24e1f4029bbc228b74fe221f29821cf64366c2fa,Contract Deployment,1104335,1.026866e+10,1094481,0.011239,0x86ce29079cc2b017dbf3285fce40c911000a0f45,2025-01-31 19:00:00
243,7639601,2025-02-04 17:33:48,0x06ee4ad775f5457041f4804c2dc6821ddc7b2ed63bd6...,0x24e1f4029bbc228b74fe221f29821cf64366c2fa,0x86ce29079cc2b017dbf3285fce40c911000a0f45,77977,3.295607e+10,51592,0.001700,,2025-02-04 17:00:00
...,...,...,...,...,...,...,...,...,...,...,...
356,8290650,2025-05-09 16:23:12,0x1ee1ef4cb655910b69308a6b775b71b06266ac623fbd...,0x24e1f4029bbc228b74fe221f29821cf64366c2fa,0x3fc47744b859e08c59b71c883bdc204eba100b60,34863,2.000000e+10,26904,0.000538,,2025-05-09 16:00:00
357,8290656,2025-05-09 16:24:24,0x1383dfe81775757f17c8f2b7ba14c9260ae0f45b2754...,0x24e1f4029bbc228b74fe221f29821cf64366c2fa,0x86ce29079cc2b017dbf3285fce40c911000a0f45,34851,2.000000e+10,34492,0.000690,,2025-05-09 16:00:00
358,8291535,2025-05-09 19:20:36,0x345c0b469fa6ed0c6954b731396bf2da79ea074c2a4f...,0x24e1f4029bbc228b74fe221f29821cf64366c2fa,0x3fc47744b859e08c59b71c883bdc204eba100b60,34863,2.000000e+10,26904,0.000538,,2025-05-09 19:00:00
359,8291536,2025-05-09 19:20:48,0xfd423fdf3ac56716e4dfe13079119c2131d024f336f8...,0x24e1f4029bbc228b74fe221f29821cf64366c2fa,0x3fc47744b859e08c59b71c883bdc204eba100b60,34863,2.000000e+10,34504,0.000690,,2025-05-09 19:00:00


In [64]:
dataset[dataset['from']==CHAINSETTLE_NODE.lower()]

Unnamed: 0,blockNumber,timestamp,transaction_hash,from,to,gas,gasPrice,gasUsed,tx_fee,contractAddress,hour
2,8189178,2025-04-24 23:58:00,0xb1d71ec4cfc9876f3186c8bb8e500b7f75e2a2802cbf...,0x6fbc41ea9cff9f1c2dcc8f61e190623d0b1cd7b3,Contract Deployment,741627,1.500799e+09,734629,0.001103,0x837351ffa11ff3c4058d0c1a5d47f4b8b87c67cc,2025-04-24 23:00:00
3,8189672,2025-04-25 01:42:12,0x4d0eee635a1988017f5bc543530b673e8b2389cf1a9f...,0x6fbc41ea9cff9f1c2dcc8f61e190623d0b1cd7b3,Contract Deployment,741627,1.507380e+09,734629,0.001107,0x8924aa4f75634cd3c53258c5c998a137fe170b4b,2025-04-25 01:00:00
4,8189723,2025-04-25 01:53:24,0x8d101178638a6efa2690747fe3acedcfb12482f14b21...,0x6fbc41ea9cff9f1c2dcc8f61e190623d0b1cd7b3,0x8924aa4f75634cd3c53258c5c998a137fe170b4b,117394,2.007358e+09,96862,0.000194,,2025-04-25 01:00:00
5,8189763,2025-04-25 02:01:48,0x1464a90856de75ebe4b9acea8ba152fbcc4f223a2129...,0x6fbc41ea9cff9f1c2dcc8f61e190623d0b1cd7b3,0x8924aa4f75634cd3c53258c5c998a137fe170b4b,45031,2.014007e+09,37162,0.000075,,2025-04-25 02:00:00
6,8189773,2025-04-25 02:03:48,0x1804bc8c67bf5c23c1adbfc3ab954df115120bdf58f7...,0x6fbc41ea9cff9f1c2dcc8f61e190623d0b1cd7b3,0x8924aa4f75634cd3c53258c5c998a137fe170b4b,45031,2.014523e+09,37162,0.000075,,2025-04-25 02:00:00
...,...,...,...,...,...,...,...,...,...,...,...
232,8284214,2025-05-08 18:51:12,0xc1b6cc7f49a0fbe7c76309edb5270465b840ef406bd9...,0x6fbc41ea9cff9f1c2dcc8f61e190623d0b1cd7b3,0xebdbcab457ef0cb6d3d3cc83db172c22c67846f6,89738,2.280846e+09,73906,0.000169,,2025-05-08 18:00:00
233,8285353,2025-05-08 22:39:24,0x535ec2f0a1c064b0355e3fcf182acfa96935b7eba30b...,0x6fbc41ea9cff9f1c2dcc8f61e190623d0b1cd7b3,Contract Deployment,875466,2.001008e+09,722652,0.001446,0xb668afa18c94ab722d89bc9c86e15fa980c952fe,2025-05-08 22:00:00
234,8285354,2025-05-08 22:39:36,0xe664998a9ebb94d351d5d057c9ad20ab85d67f11a6fc...,0x6fbc41ea9cff9f1c2dcc8f61e190623d0b1cd7b3,Contract Deployment,2430435,2.000983e+09,2008257,0.004018,0xc329e9a7c73ae00e31d3e892030574582f470b34,2025-05-08 22:00:00
235,8285355,2025-05-08 22:39:48,0xfda16dddf7be2e03d7d9e062b6028c55055c7c9968e1...,0x6fbc41ea9cff9f1c2dcc8f61e190623d0b1cd7b3,0xb668afa18c94ab722d89bc9c86e15fa980c952fe,110340,2.000941e+09,91006,0.000182,,2025-05-08 22:00:00


In [65]:
# historical_gas['hour'] = pd.to_datetime(historical_gas['hour']).dt.strftime('%Y-%m-%d %H:00:00')
# dataset_merged = dataset.merge(
#     historical_gas,
#     on='hour',
#     how='left'
# ).dropna()
# print(dataset_merged)

In [66]:
dataset = dataset.sort_values(by='hour')

In [67]:
dataset.index.duplicated().sum()

np.int64(0)

In [68]:
update_contract_data(dataset)

new data:      blockNumber           timestamp  \
239      7612241 2025-01-31 19:11:00   
240      7612274 2025-01-31 19:17:48   
241      7612287 2025-01-31 19:20:48   
242      7612290 2025-01-31 19:21:24   
243      7639601 2025-02-04 17:33:48   
..           ...                 ...   
355      8290645 2025-05-09 16:22:12   
357      8290656 2025-05-09 16:24:24   
359      8291536 2025-05-09 19:20:48   
358      8291535 2025-05-09 19:20:36   
360      8291540 2025-05-09 19:21:36   

                                      transaction_hash  \
239  0x4305292f6f9b5b6b0a098886b8055f66e930ea3679d2...   
240  0x6621a708c0ee5dc4a7d0bae37cf34b3991b401d76995...   
241  0x598db0ae58d790eb5eec42f400f59052b65f570cb6a0...   
242  0x32648451dab350c5ac996febe9dab6701683b1977813...   
243  0x06ee4ad775f5457041f4804c2dc6821ddc7b2ed63bd6...   
..                                                 ...   
355  0x369b1c548bb7932c43a05ed12d53a5adf7f91a45847c...   
357  0x1383dfe81775757f17c8f2b7ba14c9260ae0f4

# Burn Rate Calculations

In [94]:
y_fig

In [136]:
contract_data = cache.get(f'contract_data')

balance_data = cache.get(f'balance_data')

forecast_data = cache.get('forecast_price_data')

In [137]:
balance_data

In [138]:
contract_data

Unnamed: 0,blockNumber,timestamp,transaction_hash,from,to,gas,gasPrice,gasUsed,tx_fee,contractAddress,hour
239,7612241,2025-01-31 19:11:00,0x4305292f6f9b5b6b0a098886b8055f66e930ea3679d2...,0x24e1f4029bbc228b74fe221f29821cf64366c2fa,Contract Deployment,1104335,1.008046e+10,1094481,0.011033,0x16a70cd06b19880018491233c0ef27ec5bc0e98b,2025-01-31 19:00:00
240,7612274,2025-01-31 19:17:48,0x6621a708c0ee5dc4a7d0bae37cf34b3991b401d76995...,0x24e1f4029bbc228b74fe221f29821cf64366c2fa,Contract Deployment,1104335,1.071536e+10,1094481,0.011728,0x5c6054a4f5f3b6f7485de77b18ec40708944ffc6,2025-01-31 19:00:00
241,7612287,2025-01-31 19:20:48,0x598db0ae58d790eb5eec42f400f59052b65f570cb6a0...,0x24e1f4029bbc228b74fe221f29821cf64366c2fa,Contract Deployment,1104335,1.038476e+10,1094481,0.011366,0x3fc47744b859e08c59b71c883bdc204eba100b60,2025-01-31 19:00:00
242,7612290,2025-01-31 19:21:24,0x32648451dab350c5ac996febe9dab6701683b1977813...,0x24e1f4029bbc228b74fe221f29821cf64366c2fa,Contract Deployment,1104335,1.026866e+10,1094481,0.011239,0x86ce29079cc2b017dbf3285fce40c911000a0f45,2025-01-31 19:00:00
243,7639601,2025-02-04 17:33:48,0x06ee4ad775f5457041f4804c2dc6821ddc7b2ed63bd6...,0x24e1f4029bbc228b74fe221f29821cf64366c2fa,0x86ce29079cc2b017dbf3285fce40c911000a0f45,77977,3.295607e+10,51592,0.001700,,2025-02-04 17:00:00
...,...,...,...,...,...,...,...,...,...,...,...
355,8290645,2025-05-09 16:22:12,0x369b1c548bb7932c43a05ed12d53a5adf7f91a45847c...,0x24e1f4029bbc228b74fe221f29821cf64366c2fa,0x3fc47744b859e08c59b71c883bdc204eba100b60,34875,2.000000e+10,34516,0.000690,,2025-05-09 16:00:00
357,8290656,2025-05-09 16:24:24,0x1383dfe81775757f17c8f2b7ba14c9260ae0f45b2754...,0x24e1f4029bbc228b74fe221f29821cf64366c2fa,0x86ce29079cc2b017dbf3285fce40c911000a0f45,34851,2.000000e+10,34492,0.000690,,2025-05-09 16:00:00
359,8291536,2025-05-09 19:20:48,0xfd423fdf3ac56716e4dfe13079119c2131d024f336f8...,0x24e1f4029bbc228b74fe221f29821cf64366c2fa,0x3fc47744b859e08c59b71c883bdc204eba100b60,34863,2.000000e+10,34504,0.000690,,2025-05-09 19:00:00
358,8291535,2025-05-09 19:20:36,0x345c0b469fa6ed0c6954b731396bf2da79ea074c2a4f...,0x24e1f4029bbc228b74fe221f29821cf64366c2fa,0x3fc47744b859e08c59b71c883bdc204eba100b60,34863,2.000000e+10,26904,0.000538,,2025-05-09 19:00:00


In [139]:
contract_data['hour'] = pd.to_datetime(contract_data['hour'])

In [146]:
contract_data[['timestamp','from','tx_fee']]

Unnamed: 0,timestamp,from,tx_fee
239,2025-01-31 19:11:00,0x24e1f4029bbc228b74fe221f29821cf64366c2fa,0.011033
240,2025-01-31 19:17:48,0x24e1f4029bbc228b74fe221f29821cf64366c2fa,0.011728
241,2025-01-31 19:20:48,0x24e1f4029bbc228b74fe221f29821cf64366c2fa,0.011366
242,2025-01-31 19:21:24,0x24e1f4029bbc228b74fe221f29821cf64366c2fa,0.011239
243,2025-02-04 17:33:48,0x24e1f4029bbc228b74fe221f29821cf64366c2fa,0.001700
...,...,...,...
355,2025-05-09 16:22:12,0x24e1f4029bbc228b74fe221f29821cf64366c2fa,0.000690
357,2025-05-09 16:24:24,0x24e1f4029bbc228b74fe221f29821cf64366c2fa,0.000690
359,2025-05-09 19:20:48,0x24e1f4029bbc228b74fe221f29821cf64366c2fa,0.000690
358,2025-05-09 19:20:36,0x24e1f4029bbc228b74fe221f29821cf64366c2fa,0.000538


In [153]:
filled = (
    contract_data
      .set_index('hour')
      .groupby('from', group_keys=True)
      .resample('H')[['tx_fee']].sum()
      .fillna(0)        # fill any remaining NaNs with 0
      .reset_index()
)



'H' is deprecated and will be removed in a future version, please use 'h' instead.



In [154]:
filled

Unnamed: 0,from,hour,tx_fee
0,0x24e1f4029bbc228b74fe221f29821cf64366c2fa,2025-01-31 19:00:00,0.045365
1,0x24e1f4029bbc228b74fe221f29821cf64366c2fa,2025-01-31 20:00:00,0.000000
2,0x24e1f4029bbc228b74fe221f29821cf64366c2fa,2025-01-31 21:00:00,0.000000
3,0x24e1f4029bbc228b74fe221f29821cf64366c2fa,2025-01-31 22:00:00,0.000000
4,0x24e1f4029bbc228b74fe221f29821cf64366c2fa,2025-01-31 23:00:00,0.000000
...,...,...,...
2684,0x6fbc41ea9cff9f1c2dcc8f61e190623d0b1cd7b3,2025-05-08 18:00:00,0.006906
2685,0x6fbc41ea9cff9f1c2dcc8f61e190623d0b1cd7b3,2025-05-08 19:00:00,0.000000
2686,0x6fbc41ea9cff9f1c2dcc8f61e190623d0b1cd7b3,2025-05-08 20:00:00,0.000000
2687,0x6fbc41ea9cff9f1c2dcc8f61e190623d0b1cd7b3,2025-05-08 21:00:00,0.000000


In [None]:
burn_rate = filled[filled['hour'] <= dt.datetime.now() - timedelta(days=7)].groupby('from')[['tx_fee']].mean() # avg houlry burn last 7 days
burn_rate

Unnamed: 0_level_0,tx_fee
from,Unnamed: 1_level_1
0x24e1f4029bbc228b74fe221f29821cf64366c2fa,8.5e-05
0x6fbc41ea9cff9f1c2dcc8f61e190623d0b1cd7b3,0.002368


In [162]:
contract_data['from'].unique()

array(['0x24e1f4029bbc228b74fe221f29821cf64366c2fa',
       '0x6fbc41ea9cff9f1c2dcc8f61e190623d0b1cd7b3'], dtype=object)

In [163]:
addresses = contract_data['from'].unique()

balance_dict = {
    addr: w3.from_wei(
        w3.eth.get_balance(w3.to_checksum_address(addr)),
        'ether'
    )
    for addr in addresses
}

# Now you can do:
print(balance_dict)
# {
#   '0x24e1f4029bbc228b74fe221f29821cf64366c2fa': 0.003444284611634465,
#   '0x6fbc41ea9cff9f1c2dcc8f61e190623d0b1cd7b3': …,
# }


{'0x24e1f4029bbc228b74fe221f29821cf64366c2fa': Decimal('0.003444284611634465'), '0x6fbc41ea9cff9f1c2dcc8f61e190623d0b1cd7b3': Decimal('0.264342927117024646')}


In [164]:
balance_data = pd.DataFrame([balance_dict]).melt().rename(columns={'variable':'address','value':'eth_balance'}).set_index('address').sort_values(by='eth_balance',ascending=False)

In [165]:
balance_data

Unnamed: 0_level_0,eth_balance
address,Unnamed: 1_level_1
0x6fbc41ea9cff9f1c2dcc8f61e190623d0b1cd7b3,0.2643429271170246
0x24e1f4029bbc228b74fe221f29821cf64366c2fa,0.0034442846116344


In [166]:
burn_rate

Unnamed: 0_level_0,tx_fee
from,Unnamed: 1_level_1
0x24e1f4029bbc228b74fe221f29821cf64366c2fa,8.5e-05
0x6fbc41ea9cff9f1c2dcc8f61e190623d0b1cd7b3,0.002368


In [167]:
HOURS_PER_YEAR = 365 * 24

# 1) Align and merge your two tables as before
burn = burn_rate.rename_axis("address").reset_index()
bal  = balance_data.rename_axis("address").reset_index()
df   = pd.merge(burn, bal, on="address", how="inner")

# 2) Cast the Decimal columns to float
df["tx_fee"]     = df["tx_fee"].astype(float)
df["eth_balance"] = df["eth_balance"].apply(float)

# 3) Compute the burn-rate ratio:
#    ratio = (hourly burn) / (balance ÷ hours_per_year)
df["burn_rate_ratio"] = df["tx_fee"] / (df["eth_balance"] / HOURS_PER_YEAR)

# (Optional) % of balance per year
df["burn_rate_pct"] = df["burn_rate_ratio"] * 100

print(df[["address","tx_fee","eth_balance","burn_rate_ratio"]])


                                      address    tx_fee  eth_balance  \
0  0x24e1f4029bbc228b74fe221f29821cf64366c2fa  0.000085     0.003444   
1  0x6fbc41ea9cff9f1c2dcc8f61e190623d0b1cd7b3  0.002368     0.264343   

   burn_rate_ratio  
0       214.942291  
1        78.487617  


In [169]:
df["runway_years"]  = 1 / df["burn_rate_ratio"]
df["runway_days"]   = df["runway_years"] * 365
df["runway_hours"]  = df["runway_years"] * 365 * 24

In [170]:
df

Unnamed: 0,address,tx_fee,eth_balance,burn_rate_ratio,burn_rate_pct,runway_years,runway_days,runway_hours
0,0x24e1f4029bbc228b74fe221f29821cf64366c2fa,8.5e-05,0.003444,214.942291,21494.229144,0.004652,1.69813,40.755125
1,0x6fbc41ea9cff9f1c2dcc8f61e190623d0b1cd7b3,0.002368,0.264343,78.487617,7848.761715,0.012741,4.650415,111.609962


In [None]:
sustainable_ceiling = balance_data['balance_ETH'].values[0] / (365*24)

In [None]:
# 1️⃣ Calculate average hourly ETH spent on gas
hourly_gas_spent = abs(contract_data['gas_amount_eth'].mean())

# 2️⃣ Calculate total ETH balance
eth_balance = balance_data['balance_ETH'].values[0]

# 3️⃣ Calculate ETH cash runway (hours)
eth_runway_hours = eth_balance / hourly_gas_spent

# 4️⃣ Convert to days, months, and years
eth_runway_days = eth_runway_hours / 24
eth_runway_months = eth_runway_days / 30
eth_runway_years = eth_runway_months / 12

# 5️⃣ Burn rate ratio: ETH spent per hour compared to sustainable ETH burn
burn_rate_ratio = hourly_gas_spent / (eth_balance / (365 * 24))

# ✅ Print results
print(f"ETH Burn Rate: {hourly_gas_spent}")
print(f'ETH Balance {eth_balance}')
print(f"Sustainable Ceiling: {sustainable_ceiling}")
print(f"ETH Gas Runway: {eth_runway_hours:.2f} hours")
print(f"ETH Gas Runway: {eth_runway_days:.2f} days")
print(f"ETH Gas Runway: {eth_runway_months:.2f} months")
print(f"ETH Gas Runway: {eth_runway_years:.2f} years")
print(f"ETH Burn Rate Ratio: {burn_rate_ratio:.4f}")


In [None]:
balance_data

In [None]:
burn_rate_ratio_timeseries = {
    'date':formatted_today_utc,
    'burn_rate_ratio':burn_rate_ratio,
    'burn_rate ETH (avg hourly spend)':contract_data['gas_amount_eth'].mean(),
    'balance': balance_data['balance_ETH'].values[0]
}

In [None]:
pd.DataFrame([burn_rate_ratio_timeseries])

In [None]:
dataset_merged.columns

In [None]:
dataset_merged[['tx_fee_usd','tx_fee']]

In [None]:
eth_data = get_eth_balances(ETHERSCAN_KEY, [ACCOUNT_ADDRESS,BOT_1_ADDRESS,BOT_2_ADDRESS])

In [None]:
eth_data

In [None]:
pd.DataFrame(eth_data['result'])

In [None]:
current_balances = pd.DataFrame(eth_data['result'])
current_balances['balance'] = pd.to_numeric(current_balances['balance'], errors='coerce')

# Convert from wei to ETH
current_balances['balance_ETH'] = current_balances['balance'] / 1e18

In [None]:
current_balances['hour'] = formatted_today_utc

In [None]:
current_balances

In [None]:
update_balance_data(current_balances)

In [None]:
len(forecast_data.index)

In [None]:
len(forecast_data[forecast_data['hour'] >= formatted_today_utc].index)

In [None]:
hourly_burn_wei = contract_data['gas_amount_eth'].mean() * 1e18
hourly_burn_wei


In [None]:
contract_data["gas_amount_eth"] * 1e18

In [None]:
contract_data["rolling_burn_wei"] = (
    contract_data["gas_amount_eth"] * 1e18
).rolling(window=24, min_periods=1).mean()

In [None]:
forecast_period = forecast_data[forecast_data['hour'] >= formatted_today_utc]

In [None]:
# Extend rolling average trend for the forecast period
future_burn_trend = (
    contract_data["rolling_burn_wei"].iloc[-24:].mean()
)  # Use the last 24-hour average as a baseline

# Apply this rolling average trend to future timestamps
estimated_rolling_burn_df = pd.DataFrame(
    index=forecast_period['hour'],
    data=[future_burn_trend] * len(forecast_period),
    columns=["estimated_burn_wei"],
)


In [None]:
estimated_rolling_burn_df

In [None]:
estimated_static_future_burn = pd.DataFrame(index=forecast_data[forecast_data['hour'] >= formatted_today_utc]['hour'],data=future_burn_trend,columns=['estimated burn'])
estimated_static_future_burn

In [None]:
# estimated_burn_df = pd.DataFrame(index=forecast_period["hour"])

# # Assign last N rolling values, allowing for missing values
# # estimated_burn_df["estimated_burn_wei"] = contract_data["rolling_burn_wei"].iloc[-len(forecast_period):].values

# # Interpolate missing values smoothly
# estimated_burn_df["estimated_burn_wei"] = contract_data["rolling_burn_wei"].interpolate(method="linear")


In [None]:
estimated_rolling_burn_df

In [None]:
# ✅ Convert gas price from Gwei to Wei
forecasted_costs = estimated_rolling_burn_df.merge(
    forecast_data[["hour", "yhat"]], on="hour", how="left"
)

# ✅ Convert `yhat` from Gwei to Wei before multiplying
forecasted_costs["yhat_wei"] = forecasted_costs["yhat"] * 1e9  # Convert Gwei → Wei

# ✅ Calculate estimated gas cost in ETH (Burn * Gas Price in Wei)
forecasted_costs["estimated_gas_cost_eth"] = (
    forecasted_costs["estimated_burn_wei"] * forecasted_costs["yhat_wei"]
) / 1e18  # Convert back to ETH


In [None]:
# ✅ Convert `yhat` from Gwei → ETH
forecasted_costs["yhat_eth"] = forecasted_costs["yhat"] / 1e9  # Correct conversion

# ✅ Compute estimated gas cost in ETH
forecasted_costs["estimated_gas_cost_eth"] = (
    (forecasted_costs["estimated_burn_wei"] / 1e18) * forecasted_costs["yhat_eth"]
)
forecasted_costs["estimated_gas_cost_eth"]

In [None]:
forecasted_costs["yhat_eth"]

In [None]:
forecasted_costs

In [None]:
forecasted_costs["yhat"]

In [None]:
forecasted_costs["estimated_burn_wei"] / 1e18

In [None]:
forecasted_costs["estimated_gas_cost_eth"]

In [None]:
forecasted_costs["estimated_burn_wei"]

In [None]:
forecasted_costs["estimated_gas_cost_eth"]

In [None]:
create_chart(forecasted_costs.set_index('hour'),title='forecasted contract costs',columns=['estimated_gas_cost_eth'])

# Gas Reserve

In [None]:
forecasted_costs

In [None]:
# forecast_data[]

In [None]:
# gas_reserve_logs["gas_amount_wei"] = gas_reserve_logs["gas_amount_eth"] * 1e18


In [None]:
# df_merged['hour'] = pd.to_datetime(df_merged['hour'])

In [None]:
# gas_burn_data = df_merged.merge(forecast_data, on="hour", how="left")

# # Estimate ETH spent using forecasted gas prices
# gas_burn_data["estimated_burn_eth"] = gas_burn_data["gas_amount_eth"] * gas_burn_data["yhat"]

In [None]:
# os.chdir('notebooks')