In [1]:
import pandas as pd
import json
from web3 import Web3
from datetime import datetime, timedelta
import time
import sys
import json
from collections import Counter

# Map token names to contract addresses
token_address_map = {
    'rETH': '0xae78736Cd615f374D3085123A210448E74Fc6393'
    # fill with rest
}

# Map market name to "Pool" contract address and abi filepath
contract_address_abi_map = {
    'AAVE': ('0x87870Bca3F3fD6335C3F4ce8392D69350B4fA4E2', './AAVE_Pool_ABI.json'),
    'COMPOUND': ('0xA17581A9E3356d9A858b789D68B4d866e593aE94', './Compound_ABI.json')
    # fill with rest
}

# Connect to ETH blockchain with infura API key
w3 = Web3(Web3.HTTPProvider('https://mainnet.infura.io/v3/7e4f5238262543919688a59d0fef4a1d'))

In [33]:
def get_initial_depo_blockNum(market, market_contract, user_wallet_address, token = 'rETH'):
    """
    Finds the lowest block number for rETH supply events related to a given user wallet address for a particular market.

    Parameters:
    - market: The specific market from our list of supported markets to analyze.
    - market_contract: The market pool contract instance from Web3.py.
    - user_wallet_address: The wallet address of the user as a string.
    - token: The specific reserve token to analyze (Defaults to rETH).

    Returns:
    - The lowest block number for the given user wallet address. Returns None if the user has no supply events.
    """
    # Lookup token address
    token_address = token_address_map[token]
    
    # Make sure the requested market is supported
    if market in contract_address_abi_map.keys():
    
        if market == 'AAVE':
            # For some reason it doesnt work when user filter vs onBehalfOf
            log_filter = {
                'reserve': token_address, #rETH token address
                'onBehalfOf': user_wallet_address,
            }
            
            # Get logs for all supply events for rETH on behalf of this particular user_wallet_address
            logs = market_contract.events.Supply().get_logs(fromBlock='earliest',
                                                               toBlock='latest',
                                                               argument_filters=log_filter)
            # In the case that there were no events
            if not logs:
                return 0
            
        elif market == 'COMPOUND':
            log_filter = {
                'asset': token_address, #rETH token address
                'from': user_wallet_address,
            }
            
            # Get logs for all supply events for rETH on behalf of this particular user_wallet_address
            logs = market_contract.events.SupplyCollateral().get_logs(fromBlock='earliest',
                                                               toBlock='latest',
                                                               argument_filters=log_filter)
            # In the case that there were no events
            if not logs:
                return 0
    
    # The market wasn't in our dictionary of supported markets
    else:
        print("UNSUPPORTED MARKET ENTERED")
        return
    
    return min(log['blockNumber'] for log in logs)

In [37]:
def fetch_logs(market, market_contract, user_address_filter, event_name, block_step = 100000):
    """
    Fetches logs for transactions filtered by user address and event type, from the latest block down to initial rETH deposit.

    Parameters:
    - market: The particular market we want to fetch logs from.
    - market_contract: The contract object to fetch logs from.
    - user_address_filter: The user wallet address to filter the logs.
    - event_name: The name of the event to fetch {'Borrow', 'Repay', 'Withdraw', 'FlashLoan'}
    - block_step: The number of blocks to step back in each iteration (default is 100000).

    Returns:
    - A dataframe of formatted logs.
    """
    
    # Get target block
    target_block = get_initial_depo_blockNum(market, market_contract, user_address_filter)
    
    if target_block == 0:
        print(f'User: {user_address_filter} returned no events of type: {event_name}')
        return []
    
    print(f'For User: {user_address_filter}\nFrom Block: {target_block}')

    # Dynamic print
    print(f'Fetching {event_name} events', end='')
    sys.stdout.flush()

    # Variables for looping
    logged_results = []
    target_log_count = float('inf')
    current_block = w3.eth.block_number    # Latest Block Number

    # Time
    start = time.time()

    # Loop counter
    iter_count = 0
    
    # Use while loop based on market --> possible event types (No reason to search for aave market events when we know we're looking for compound events)
    if market == 'AAVE':
        # Loop until target number reached or at first block
        while (len(logged_results) < target_log_count and current_block > target_block):
            #time.sleep(0.01)
            print('.', end='')

            from_block = max(current_block - block_step, target_block)

            try:
                # Access event
                event = getattr(market_contract.events, event_name)()

                # Fetch logs for the current block -- AAVE
                
                if (event_name == 'FlashLoan'):
                    current_logs = event.get_logs(
                        fromBlock=from_block,
                        toBlock=current_block,
                        argument_filters={'initiator': user_address_filter}
                    )
                elif (event_name == 'Borrow'):
                    current_logs = event.get_logs(
                        fromBlock=from_block,
                        toBlock=current_block,
                        argument_filters={'onBehalfOf': user_address_filter}
                    )
                elif (event_name == 'Withdraw'):
                    current_logs = event.get_logs(
                        fromBlock=from_block,
                        toBlock=current_block,
                        argument_filters={'to': user_address_filter}
                    )
                elif (event_name == 'Repay'):
                    current_logs = event.get_logs(
                        fromBlock=from_block,
                        toBlock=current_block,
                        argument_filters={'user': user_address_filter}
                    )

                # Format event logs
                for event in current_logs:
                    formatted_log = format_event_log(event, event_name)

                    # Add to aggregate log list
                    logged_results.append(formatted_log)

                # Decrement block number
                current_block = from_block - 1

            except Exception as e:
                # Print exception and block number and break
                print(f"\nError fetching logs for block {current_block}: {e}")
                break

            # Increment iter
            iter_count += 1
            
    elif market == 'COMPOUND':
        # Loop until target number reached or at first block
        while (len(logged_results) < target_log_count and current_block > target_block):
            #time.sleep(0.01)
            print('.', end='')

            from_block = max(current_block - block_step, target_block)

            try:
                # Access event
                event = getattr(market_contract.events, event_name)()
                
                # Fetch logs from the current block -- COMPOUND
                if (event_name == 'WithdrawCollateral'):
                    current_logs = event.get_logs(
                        fromBlock=from_block,
                        toBlock=current_block,
                        argument_filters={'to': user_address_filter}
                    )
                elif (event_name == 'SupplyCollateral'):
                    current_logs = event.get_logs(
                        fromBlock=from_block,
                        toBlock=current_block,
                        argument_filters={'from': user_address_filter}
                    )
                elif (event_name == 'BuyCollateral'):
                    current_logs = event.get_logs(
                        fromBlock=from_block,
                        toBlock=current_block,
                        argument_filters={'buyer': user_address_filter}
                    )
                elif (event_name == 'AbsorbCollateral'):
                    current_logs = event.get_logs(
                        fromBlock=from_block,
                        toBlock=current_block,
                        argument_filters={'borrower': user_address_filter}
                    )
                    
                # Format event logs
                for event in current_logs:
                    formatted_log = format_event_log(event, event_name)

                    # Add to aggregate log list
                    logged_results.append(formatted_log)

                # Decrement block number
                current_block = from_block - 1
                
            except Exception as e:
                # Print exception and block number and break
                print(f"\nError fetching logs for block {current_block}: {e}")
                break

            # Increment iter
            iter_count += 1

    # Print time
    print('\nFetching complete.')
    print(f'Time Elapsed: {time.time() - start}\n')

    # Trim list
    if len(logged_results) > target_log_count:
           logged_results = logged_results[-target_log_count:]

    # Return dataframed logs
    return pd.DataFrame(logged_results)



def format_event_log(event, event_name):
    """
    Formats the log of an event based on its type.

    Parameters:
    - event: The event log to format.
    - event_name: The name of the event.

    Returns:
    - A dictionary containing formatted log attributes.
    """
    
    # Format common attrs
    log = {
        'Event Type': event_name,
        'Transaction Hash': event['transactionHash'].hex(),
        'Address': event['address'],
        'Block Hash': event['blockHash'].hex(),
        'Block Number': event['blockNumber'],
    }
    
    # if else tree for event specific attrs -- AAVE
    if event_name == 'Withdraw':
        log.update({
            'Reserve': event['args']['reserve'],
            'User': event['args']['user'],
            'To': event['args']['to'],
            'Amount': event['args']['amount']
        })
        
    elif event_name == 'Borrow':
        log.update({
            'Reserve': event['args']['reserve'],
            'On Behalf Of': event['args']['onBehalfOf'],
            'User': event['args']['user'],
            'Amount': event['args']['amount'],
            'Borrow Rate': event['args']['borrowRate']
        })
        
    elif event_name == 'Repay':
        log.update({
            'Reserve': event['args']['reserve'],
            'User': event['args']['user'],
            'Repayer': event['args']['repayer'],
            'Amount': event['args']['amount'],
            'useAtokens': str(event['args']['useATokens']),
            
        })
        
    elif event_name == 'FlashLoan':
        log.update({
            'Target': event['args']['target'],
            'Asset': event['args']['asset'],
            'Referral Code': str(event['args']['referralCode']),
            'Initiator': event['args']['initiator'],
            'Amount': event['args']['amount'],
            'Premium': event['args']['premium']
        })
        
    # Start part of if else tree for Compound
    elif event_name == 'WithdrawCollateral':
        log.update({
            'Reserve': event['args']['asset'],
            'User': event['args']['src'],
            'To': event['args']['to'],
            'Amount': event['args']['amount']
        })
        
    elif event_name == 'SupplyCollateral':
        log.update({
            'Reserve': event['args']['asset'],
            'User': event['args']['from'],
            'To': event['args']['dst'],
            'Amount': event['args']['amount']
        })
        
    elif event_name == 'BuyCollateral':
        log.update({
            'Reserve': event['args']['asset'],
            'User': event['args']['buyer'],
            'Amount': event['args']['baseAmount'],
            'Collateral Amount': event['args']['collateralAmount']
        })
        
    elif event_name == 'AbsorbCollateral':
        log.update({
            'Reserve': event['args']['asset'],
            'User': event['args']['buyer'],
            'Amount': event['args']['baseAmount'],
            'Collateral Amount': event['args']['collateralAmount']
        })
    
    return log

In [53]:
def user_history(user_addresses, markets):
    """
    Fetches the interaction history of users with specified lending and borrowing markets.
    
    Parameters:
    - user_addresses: List of user wallet addresses.
    - markets: List of market names to scrape.
    
    Returns:
    - A dictionary mapping each user address to a list of dataframes (one for each interaction type).
    """
    interaction_history = {user: [] for user in user_addresses}
    aave_event_types = ['Withdraw', 'Borrow', 'Repay', 'FlashLoan']
    compound_event_types = ['WithdrawCollateral', 'SupplyCollateral', 'BuyCollateral', 'AbsorbCollateral']
    # Loop markets
    for market in markets:
        # Verify market entry is valid
        if market not in contract_address_abi_map.keys():
            print("UNSUPPORTED MARKET ENTERED")
            return []
                  
        # Lookup contract address and abi filepath
        contract_address, abi_filepath = contract_address_abi_map[market]
        
        # Web3 setup
        with open(abi_filepath) as f:
            abi = json.load(f)

        smart_contract = w3.eth.contract(address=contract_address, abi=abi)
        
        if market == 'AAVE':
            # Loop user addresses
            for user_address in user_addresses:
                # Loop event types
                for event_type in aave_event_types:
                    logs_df = fetch_logs(market, smart_contract, user_address, event_type, block_step = 250000)
                    
                    if logs_df.empty:
                        pass
                    else:
                        interaction_history[user_address].append(logs_df)
                  
        elif market == 'COMPOUND':
            # Loop user addresses
            for user_address in user_addresses:
                # Loop event types
                for event_type in compound_event_types:
                    logs_df = fetch_logs(market, smart_contract, user_address, event_type, block_step = 250000)
                    
                    if logs_df.empty:
                        pass
                    else:
                        interaction_history[user_address].append(logs_df)
                  
            
    return interaction_history
                

In [14]:
# Open the file in read mode
with open('./walletaddresses_last3months.txt', 'r') as file:
    # Initialize an empty list to store lines starting with '0x'
    wallet_addresses = []
    
    # Iterate through each line in the file
    for line in file:
        # Check if the line starts with '0x'
        if line.startswith('0x'):
            # If it does, append it to the list
            wallet_addresses.append(line.strip())  # strip() removes leading/trailing whitespaces
    
# Print the list of lines starting with '0x'
print(wallet_addresses)

['0xdE13a331adF3B9b6F32017BC63dFf2f61A926a5a', '0x4C14c8F88Eea50Eb30Dc4b2a035327146578c58F', '0x3Fd21B1850397f9ded94Db32fbaFB632E07FedF5', '0x4D0dDD57787d6C3AE8818e0A6E15aE9228d9a23c', '0x0418656Fdf3646968489b8e6aF6C25562803c69D', '0x4984D35DA85DC841Ae865BF1e39CDA5f1A193Ad8', '0x4984D35DA85DC841Ae865BF1e39CDA5f1A193Ad8', '0xADC0A53095A0af87F3aa29FE0715B5c28016364e', '0x1D9e9719C793B5A4a30Ec47816AF4B531d4f9D09', '0x2c537ac6209b5E392207783DF4671fdb932aFf44', '0x2c537ac6209b5E392207783DF4671fdb932aFf44', '0xF0ca1C00765e7e20767e2529ADc001a261AE2c43', '0x0cAcfC8C96B84BC5698fCdCB5ffD536B4a246170', '0xe5bD9c9792E13278FfD391Fed86Cf435869b0154', '0xADC0A53095A0af87F3aa29FE0715B5c28016364e', '0x92ef5c305647987A225bcD483aee3B82c24F72B9', '0xb497070466Dc15FA6420b4781bB0352257146495', '0xdae0b3CE40F66ec01383B73bc261f50b1585d44b', '0xADC0A53095A0af87F3aa29FE0715B5c28016364e', '0x587B938A8B21ea570325E91BDd01B161783A75e8', '0x2d9B182d340A9189457e83d6034cca98Fc81cc6a', '0x0cAcfC8C96B84BC5698fCdCB5ffD53

In [54]:
# TEST USER HISTORY
compound_recent_suppliers = {
'0x121E1573B7C6c15Aa4C036d54BFd2974Cf38a163',
'0xA490A0346808dDA91aea6698cB19e4697D9fc5cc',
'0xAF198e3593B61bE3469d856c57961DAA49BE4852',
'0xA86322a72Bd98c269E0D72Dc79594A25E3461b0B',
'0xc36ba2e21D0eb8BBAB48FBf90Db39F0813F56F2C',
'0x0E4D77edB9bf3fFcE7E5910EDce70e1B9F5a9A89',
'0xa8C6E8aA1cB02d2e8B6aa57d958915bDb2223320',
'0x266EedfA566C52584590E3f9B4532Ab051176793',
'0xd0670Bdab37812660B462259b56c27CC25Dd5e41',
}
interaction_list = user_history(compound_recent_suppliers, ['COMPOUND'])

For User: 0xAF198e3593B61bE3469d856c57961DAA49BE4852
From Block: 19010871
Fetching WithdrawCollateral events...
Fetching complete.
Time Elapsed: 0.1459944248199463

For User: 0xAF198e3593B61bE3469d856c57961DAA49BE4852
From Block: 19010871
Fetching SupplyCollateral events...
Fetching complete.
Time Elapsed: 0.156996488571167

For User: 0xAF198e3593B61bE3469d856c57961DAA49BE4852
From Block: 19010871
Fetching BuyCollateral events...
Fetching complete.
Time Elapsed: 0.15199518203735352

For User: 0xAF198e3593B61bE3469d856c57961DAA49BE4852
From Block: 19010871
Fetching AbsorbCollateral events...
Fetching complete.
Time Elapsed: 0.17212748527526855

For User: 0xa8C6E8aA1cB02d2e8B6aa57d958915bDb2223320
From Block: 19307784
Fetching WithdrawCollateral events.
Fetching complete.
Time Elapsed: 0.05200386047363281

For User: 0xa8C6E8aA1cB02d2e8B6aa57d958915bDb2223320
From Block: 19307784
Fetching SupplyCollateral events.
Fetching complete.
Time Elapsed: 0.04799938201904297

For User: 0xa8C6E8aA1c

In [55]:
all_dfs = []

for key, dfs_list in interaction_list.items():
    # Concatenate all DataFrames in the list for the current wallet address
    combined_df = pd.concat(dfs_list, ignore_index=True)
    all_dfs.append(combined_df)

# Concatenate all combined DataFrames into one
final_df = pd.concat(all_dfs, ignore_index=True)
final_df

Unnamed: 0,Event Type,Transaction Hash,Address,Block Hash,Block Number,Reserve,User,To,Amount
0,SupplyCollateral,0xa86b714122e0943398a8c771fbd05a7c54a001d70569...,0xA17581A9E3356d9A858b789D68B4d866e593aE94,0x0e16c9868727e8af8c8d50a66e2a2a2fd28b18e578c3...,19010871,0xae78736Cd615f374D3085123A210448E74Fc6393,0xAF198e3593B61bE3469d856c57961DAA49BE4852,0xAF198e3593B61bE3469d856c57961DAA49BE4852,1010000000000000
1,SupplyCollateral,0x5c4fb033800da2290e683ec0317b28a703744c24a8e8...,0xA17581A9E3356d9A858b789D68B4d866e593aE94,0x87d9c8b53dd56297953fbd45c4b62477a824487b4d6c...,19307784,0xae78736Cd615f374D3085123A210448E74Fc6393,0xa8C6E8aA1cB02d2e8B6aa57d958915bDb2223320,0xa8C6E8aA1cB02d2e8B6aa57d958915bDb2223320,26603873708152547177
2,SupplyCollateral,0xf959fe4e16300b0ab9a38d722c3879550a2e237ac9c0...,0xA17581A9E3356d9A858b789D68B4d866e593aE94,0x7132b2073b10be41e30583b3bb006ef2dfc94484cd89...,19138889,0xae78736Cd615f374D3085123A210448E74Fc6393,0xc36ba2e21D0eb8BBAB48FBf90Db39F0813F56F2C,0xc36ba2e21D0eb8BBAB48FBf90Db39F0813F56F2C,116405793348203100206
3,SupplyCollateral,0xad6b16f15ba6dbf07509e39edea7e5518a63f7f88ebf...,0xA17581A9E3356d9A858b789D68B4d866e593aE94,0xe3ac56d0d58e8bf2abb0eb1170ead72f708326bd8bfd...,19495406,0xae78736Cd615f374D3085123A210448E74Fc6393,0xd0670Bdab37812660B462259b56c27CC25Dd5e41,0xd0670Bdab37812660B462259b56c27CC25Dd5e41,90000000000000000
4,WithdrawCollateral,0x1089536487e09db5ce3315e8b3b2110132f5054ee911...,0xA17581A9E3356d9A858b789D68B4d866e593aE94,0x68647616b630722044927ed27ca026a26f3de64bda05...,19394564,0xae78736Cd615f374D3085123A210448E74Fc6393,0x266EedfA566C52584590E3f9B4532Ab051176793,0x266EedfA566C52584590E3f9B4532Ab051176793,4680622357910485500
5,SupplyCollateral,0x155104f5e675fd9d5a9e45e051ff6c2e9d5b737b593c...,0xA17581A9E3356d9A858b789D68B4d866e593aE94,0xeb863dec050e419a79591f00ddc331ef771609021822...,19394555,0xae78736Cd615f374D3085123A210448E74Fc6393,0x266EedfA566C52584590E3f9B4532Ab051176793,0x266EedfA566C52584590E3f9B4532Ab051176793,4680622368966520665
6,SupplyCollateral,0x81d2e9db87affcac1f3e7f9bf16bdf531298b3c80c8d...,0xA17581A9E3356d9A858b789D68B4d866e593aE94,0xc3f1143d8d57de487c3c87b70910b4f1b42e611d3b19...,18911780,0xae78736Cd615f374D3085123A210448E74Fc6393,0x121E1573B7C6c15Aa4C036d54BFd2974Cf38a163,0x121E1573B7C6c15Aa4C036d54BFd2974Cf38a163,8112001731285519
7,WithdrawCollateral,0x9e68c2755b5d1710ff11a2bbdee17613188ae9308d24...,0xA17581A9E3356d9A858b789D68B4d866e593aE94,0x193239c27bb6e7f4fc2e24084c5b71304d82e740b933...,19061942,0xae78736Cd615f374D3085123A210448E74Fc6393,0xA86322a72Bd98c269E0D72Dc79594A25E3461b0B,0xA86322a72Bd98c269E0D72Dc79594A25E3461b0B,109999997981971825
8,SupplyCollateral,0x46b1221adc9042a6064237de3e348add7f77d4319747...,0xA17581A9E3356d9A858b789D68B4d866e593aE94,0xb34511b65f49d4cdc80305ad95904e28e7de16b07032...,19040097,0xae78736Cd615f374D3085123A210448E74Fc6393,0xA86322a72Bd98c269E0D72Dc79594A25E3461b0B,0xA86322a72Bd98c269E0D72Dc79594A25E3461b0B,10000000000000000
9,SupplyCollateral,0x7619425f2bd027d0813f26af3ae128b446b9ca09275c...,0xA17581A9E3356d9A858b789D68B4d866e593aE94,0x857b1bf3809402b9b818d5af88504d5071f188b7f9ee...,19047872,0xae78736Cd615f374D3085123A210448E74Fc6393,0xA86322a72Bd98c269E0D72Dc79594A25E3461b0B,0xA86322a72Bd98c269E0D72Dc79594A25E3461b0B,100000000000000000
