In [None]:
from web3 import Web3
import pandas as pd
import datetime
from concurrent.futures import ProcessPoolExecutor
import numpy as np

In [None]:
#key_1 = "4d08ab7e29294be397e15ec5503401ca"
key_2 = "d4d8ddd18ef6438ea49fd3d5006b7eba"
key_3 = "b7b06ad6a7304e2197efa10b79e1c867"
key_4 = "58bce48fe80b451c96fee8ef35bbcce8"

provider = "https://mainnet.infura.io/v3/"

In [None]:
w3 = Web3(Web3.HTTPProvider(provider))

start_block = 18935888
end_block   = 18993488

transactions_data = []

for block_number in range(start_block, end_block + 1):
    block = w3.eth.get_block(block_number, full_transactions=True)
    block_timestamp = datetime.datetime.utcfromtimestamp(block.timestamp)
    
    for tx in block.transactions:
        tx_receipt = w3.eth.get_transaction_receipt(tx.hash)
        
        transactions_data.append({
            'block_number': block_number,
            'tx_hash': tx.hash.hex(),
            'gas_price': tx.gasPrice,
            'gas_used': tx_receipt.gasUsed,
            'timestamp': block_timestamp
        })

print("Dataframe saved. Number of transactions:", len(df_transactions))

df_transactions = pd.DataFrame(transactions_data)

df_transactions['gas_price_gwei'] = df_transactions['gas_price'].apply(lambda x: w3.from_wei(x, 'gwei')).astype(int)
df_transactions['tx_hash']        = df_transactions['tx_hash'].astype(str)
df_transactions['timestamp']      = pd.to_datetime(df_transactions['timestamp'])
df_transactions = df_transactions.set_index('timestamp').sort_index()

df_transactions.to_pickle('ethereum_transactions.pkl')

In [None]:
df_transactions

In [None]:
fetch_transactions([18935888], "d4d8ddd18ef6438ea49fd3d5006b7eba")

In [None]:
from web3 import Web3
import pandas as pd
from datetime import datetime
import numpy as np
from joblib import Parallel, delayed
import logging, os, json


logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')

data_dir = 'block_data'
if not os.path.exists(data_dir):
    os.makedirs(data_dir)

def save_block_data(block_number, data):
    """Save block transaction data to a JSON file."""
    try:
        file_path = os.path.join(data_dir, f'{block_number}.json')
        with open(file_path, 'w') as file:
            json.dump(data, file)
        logging.info(f"Block {block_number} data saved.")
    except Exception as e:
        logging.error(f"Error saving data for block {block_number}: {e}")
        
        

def load_block_data(block_number):
    """Check if block transaction data exists as a .pkl file."""
    file_path = os.path.join(data_dir, f'{block_number}.pkl')
    if os.path.exists(file_path):
        return True
    return False


def fetch_transactions(block_numbers, key):
    local_w3 = Web3(Web3.HTTPProvider("https://mainnet.infura.io/v3/" + key))
    transactions_data = []
    for block_number in block_numbers:
        block = local_w3.eth.get_block(block_number, full_transactions=True)
        block_timestamp = datetime.utcfromtimestamp(block.timestamp)
        for tx in block.transactions:
            tx_receipt = local_w3.eth.get_transaction_receipt(tx.hash)
            transactions_data.append({
                'block_number': block_number,
                'tx_hash': tx.hash.hex(),
                'gas_price': tx.gasPrice,
                'gas_used': tx_receipt.gasUsed,
                'timestamp': block_timestamp
            })
    save_block_data(block_number, transactions_data)
    return transactions_data

def main(start_block, end_block, keys):
    assert len(keys) == 3, "This setup requires exactly four API keys."
    
    num_jobs = len(keys)
    block_range = np.arange(start_block, end_block + 1)
    
    filtered_block_range = [block for block in block_range if not load_block_data(block)]
    
    split_block_ranges = np.array_split(filtered_block_range, num_jobs)
    
    # Manually specify each job
    results = Parallel(n_jobs=num_jobs)([
        delayed(fetch_transactions)(split_block_ranges[0].tolist(), keys[0]),
        delayed(fetch_transactions)(split_block_ranges[1].tolist(), keys[1]),
        delayed(fetch_transactions)(split_block_ranges[2].tolist(), keys[2])
    ])
    
    # Flatten the list of lists to a single list of transactions
    all_transactions = [item for sublist in results for item in sublist]

    # Create a DataFrame from the transactions
    df_transactions = pd.DataFrame(all_transactions)
    df_transactions['gas_price_gwei'] = df_transactions['gas_price'].apply(lambda x: Web3.fromWei(x, 'gwei')).astype(int)
    df_transactions['tx_hash'] = df_transactions['tx_hash'].astype(str)
    df_transactions['timestamp'] = pd.to_datetime(df_transactions['timestamp'])
    df_transactions = df_transactions.set_index('timestamp').sort_index()

    # Save the DataFrame to a .pkl file
    df_transactions.to_pickle('ethereum_transactions.pkl')
    print(f"Dataframe saved. Number of transactions: {len(df_transactions)}")

if __name__ == '__main__':
    start_block = 18935888
    end_block   = 18993488
    provider_keys = [
        "d4d8ddd18ef6438ea49fd3d5006b7eba",
        "b7b06ad6a7304e2197efa10b79e1c867",
        "58bce48fe80b451c96fee8ef35bbcce8",
    ]
    main(start_block, end_block, provider_keys)
