In [1]:
from web3 import Web3
import json
from tqdm import tqdm
import os
import pandas as pd

infura_url = 'https://mainnet.infura.io/v3/b7b06ad6a7304e2197efa10b79e1c867'
web3 = Web3(Web3.HTTPProvider(infura_url))



In [2]:
data_parts = ['data/Merged_CEX_DEX_v2_p1.pkl', 'data/Merged_CEX_DEX_v2_p2.pkl', 'data/Merged_CEX_DEX_v2_p3.pkl']
data = pd.concat([pd.read_pickle(part).assign(time=lambda df: pd.to_datetime(df['time'])) for part in data_parts])
data = data.sort_values('time')

In [3]:
gas_price_eth = {}

# Load existing data if available
if os.path.exists('gas_prices.json'):
    with open('gas_prices.json', 'r') as file:
        gas_price_eth = json.load(file)

unique_blocks = set(data.blockNumber.unique()) - set(gas_price_eth.keys())

for block_number in tqdm(unique_blocks):
    try:
        block = web3.eth.get_block(int(block_number), full_transactions=True)
        if block and 'transactions' in block:
            total_gas_price = sum(tx['gasPrice'] * 10**-18 for tx in block['transactions'])
            average_gas_price = total_gas_price / len(block['transactions']) if block['transactions'] else 0
            gas_price_eth[str(block_number)] = float(average_gas_price)
    except Exception as e:
        print(f"Error processing block {block_number}: {e}")

    # Periodically save the dictionary to avoid losing data
    if len(gas_price_eth) % 10 == 0:
        with open('gas_prices.json', 'w') as file:
            json.dump(gas_price_eth, file)

# Final save to ensure no data is lost
with open('gas_prices.json', 'w') as file:
    json.dump(gas_price_eth, file)


  0%|          | 7/120132 [00:03<14:53:29,  2.24it/s]


KeyboardInterrupt: 

In [12]:
from joblib import Parallel, delayed
from web3 import Web3
import json
import pandas as pd

INFURA_KEYS = ['a22a463543fe4798ac48207582c43bef', 'a35a2a764b154f5b8b3f37695d08fc44',
               '778d27622a1e4b9bb01a3cd903a6bc9e', '4d08ab7e29294be397e15ec5503401ca'] 

# Function to load and preprocess data
def load_and_preprocess_data():
    data_parts = ['data/Merged_CEX_DEX_v2_p1.pkl', 'data/Merged_CEX_DEX_v2_p2.pkl', 'data/Merged_CEX_DEX_v2_p3.pkl']
    data = pd.concat([pd.read_pickle(part).assign(time=lambda df: pd.to_datetime(df['time'])) for part in data_parts])
    return data.sort_values('time')

# Function to split blocks into N parts
def split_blocks(blocks, n):
    k, m = divmod(len(blocks), n)
    return (blocks[i*k+min(i, m):(i+1)*k+min(i+1, m)] for i in range(n))


# Function to fetch and calculate gas prices for a given list of blocks using a specific API key
def fetch_gas_prices(block_numbers, api_key):
    infura_url = f'https://mainnet.infura.io/v3/{api_key}'
    web3 = Web3(Web3.HTTPProvider(infura_url))
    gas_price_eth = {}
    
    for block_number in block_numbers:
        try:
            block = web3.eth.get_block(int(block_number), full_transactions=True)
            if block and 'transactions' in block:
                total_gas_price = sum(tx['gasPrice'] * 10**-18 for tx in block['transactions'])
                average_gas_price = total_gas_price / len(block['transactions']) if block['transactions'] else 0
                gas_price_eth[str(block_number)] = float(average_gas_price)
        except Exception as e:
            print(f"Error processing block {block_number}: {e}")
    return gas_price_eth

# Main script
if __name__ == "__main__":
    data = load_and_preprocess_data()  # Load and preprocess your data here
    unique_blocks = list(data.blockNumber.unique())
    block_segments = list(split_blocks(unique_blocks, len(INFURA_KEYS)))
    
    # Use Joblib to run fetch_gas_prices in parallel
    results = Parallel(n_jobs=len(INFURA_KEYS))(
        delayed(fetch_gas_prices)(blocks, INFURA_KEYS[i]) for i, blocks in enumerate(block_segments)
    )
    
    all_gas_prices = {}
    for result in results:
        all_gas_prices.update(result)
    
    # Save the combined results
    with open('gas_prices_final.json', 'w') as file:
        json.dump(all_gas_prices, file)



In [10]:
with open('gas_prices.json', 'r') as file:
    gas_price_eth = json.load(file)
import numpy as np
print(len(np.unique(list(gas_price_eth.keys()))))

53711
