In [1]:
from web3 import Web3
import json
from tqdm import tqdm
import os
import pandas as pd

infura_url = 'https://mainnet.infura.io/v3/b7b06ad6a7304e2197efa10b79e1c867'
web3 = Web3(Web3.HTTPProvider(infura_url))



In [2]:
data_parts = ['data/Merged_CEX_DEX_v2_p1.pkl', 'data/Merged_CEX_DEX_v2_p2.pkl', 'data/Merged_CEX_DEX_v2_p3.pkl']
data = pd.concat([pd.read_pickle(part).assign(time=lambda df: pd.to_datetime(df['time'])) for part in data_parts])
data = data.sort_values('time')

In [3]:
gas_price_eth = {}

# Load existing data if available
if os.path.exists('gas_prices.json'):
    with open('gas_prices.json', 'r') as file:
        gas_price_eth = json.load(file)

unique_blocks = set(data.blockNumber.unique()) - set(gas_price_eth.keys())

for block_number in tqdm(unique_blocks):
    try:
        block = web3.eth.get_block(int(block_number), full_transactions=True)
        if block and 'transactions' in block:
            total_gas_price = sum(tx['gasPrice'] * 10**-18 for tx in block['transactions'])
            average_gas_price = total_gas_price / len(block['transactions']) if block['transactions'] else 0
            gas_price_eth[str(block_number)] = float(average_gas_price)
    except Exception as e:
        print(f"Error processing block {block_number}: {e}")

    # Periodically save the dictionary to avoid losing data
    if len(gas_price_eth) % 10 == 0:
        with open('gas_prices.json', 'w') as file:
            json.dump(gas_price_eth, file)

# Final save to ensure no data is lost
with open('gas_prices.json', 'w') as file:
    json.dump(gas_price_eth, file)


  0%|          | 7/120132 [00:03<14:53:29,  2.24it/s]


KeyboardInterrupt: 

In [14]:
from joblib import Parallel, delayed
from web3 import Web3
import json
import pandas as pd

INFURA_KEYS = ['a22a463543fe4798ac48207582c43bef', 'a35a2a764b154f5b8b3f37695d08fc44',
               '778d27622a1e4b9bb01a3cd903a6bc9e', '4d08ab7e29294be397e15ec5503401ca'] 

def save_intermediate_results(gas_prices, filename='gas_prices_intermediate.json'):
    try:
        # Attempt to load existing data if the file exists
        if os.path.exists(filename):
            with open(filename, 'r') as file:
                existing_data = json.load(file)
        else:
            existing_data = {}
        
        # Update existing data with new gas prices
        existing_data.update(gas_prices)
        
        # Write the updated data back to the file
        with open(filename, 'w') as file:
            json.dump(existing_data, file)
    except Exception as e:
        print(f"Error saving intermediate results: {e}")


# Function to load and preprocess data
def load_and_preprocess_data():
    data_parts = ['data/Merged_CEX_DEX_v2_p1.pkl', 'data/Merged_CEX_DEX_v2_p2.pkl', 'data/Merged_CEX_DEX_v2_p3.pkl']
    data = pd.concat([pd.read_pickle(part).assign(time=lambda df: pd.to_datetime(df['time'])) for part in data_parts])
    return data.sort_values('time')

# Function to split blocks into N parts
def split_blocks(blocks, n):
    k, m = divmod(len(blocks), n)
    return (blocks[i*k+min(i, m):(i+1)*k+min(i+1, m)] for i in range(n))


def fetch_gas_prices(block_numbers, api_key, filename):
    infura_url = f'https://mainnet.infura.io/v3/{api_key}'
    web3 = Web3(Web3.HTTPProvider(infura_url))
    gas_price_eth = {}
    
    # Initialize a counter to track every 10 blocks
    counter = 0
    
    for block_number in block_numbers:
        try:
            block = web3.eth.get_block(int(block_number), full_transactions=True)
            if block and 'transactions' in block:
                total_gas_price = sum(tx['gasPrice'] * 10**-18 for tx in block['transactions'])
                average_gas_price = total_gas_price / len(block['transactions']) if block['transactions'] else 0
                gas_price_eth[str(block_number)] = float(average_gas_price)
                counter += 1
                
                # Check if the counter has reached a multiple of 10
                if counter % 10 == 0:
                    save_intermediate_results(gas_price_eth, filename)
                    print(f"Saved intermediate results after {counter} blocks.")
                    
        except Exception as e:
            print(f"Error processing block {block_number}: {e}")
    
    # Ensure any remaining results are saved
    if counter % 10 != 0:
        save_intermediate_results(gas_price_eth, filename)
        print(f"Saved final intermediate results after {counter} blocks.")
    
    return gas_price_eth


# Main script
if __name__ == "__main__":
    data = load_and_preprocess_data()  # Assume this function is defined
    unique_blocks = list(data.blockNumber.unique())
    block_segments = list(split_blocks(unique_blocks, len(INFURA_KEYS)))
    
    # Adjust the call to include a filename for intermediate saves
    results = Parallel(n_jobs=len(INFURA_KEYS))(
        delayed(fetch_gas_prices)(blocks, INFURA_KEYS[i], f'gas_prices_batch_{i}.json') 
        for i, blocks in enumerate(block_segments)
    )
    
    all_gas_prices = {}
    for result in results:
        all_gas_prices.update(result)
    
    # Optionally, consolidate all intermediate saves into a final file
    with open('gas_prices_final.json', 'w') as file:
        json.dump(all_gas_prices, file)





Saved intermediate results after 10 blocks.
Saved intermediate results after 10 blocks.
Saved intermediate results after 10 blocks.
Saved intermediate results after 10 blocks.
Saved intermediate results after 20 blocks.
Saved intermediate results after 20 blocks.
Saved intermediate results after 20 blocks.
Saved intermediate results after 20 blocks.
Saved intermediate results after 30 blocks.
Saved intermediate results after 30 blocks.
Saved intermediate results after 30 blocks.
Saved intermediate results after 30 blocks.
Saved intermediate results after 40 blocks.
Saved intermediate results after 40 blocks.
Saved intermediate results after 40 blocks.
Saved intermediate results after 40 blocks.
Saved intermediate results after 50 blocks.
Saved intermediate results after 50 blocks.
Saved intermediate results after 50 blocks.
Saved intermediate results after 50 blocks.
Saved intermediate results after 60 blocks.
Saved intermediate results after 60 blocks.
Saved intermediate results a

In [10]:
with open('gas_prices.json', 'r') as file:
    gas_price_eth = json.load(file)
import numpy as np
print(len(np.unique(list(gas_price_eth.keys()))))

53711
