In [62]:
import requests                         # For api get requests.
import datetime                         # For converting UTX into readable DateTime.
import toml                             # For interpreting Config file(s).
import json                             # For interpreting mining pool data ("pools.json").
import time                             # For safely managing API request timings to avoid getting timed out.
from IPython.display import Markdown    # For improving visual experience of cell outputs.
from json import JSONDecodeError        # For handling API request return issues.
from collections import Counter         # For counting address transaction frequencies within a block.

In [63]:
# Import the api configuration string data.
with open('blockchain_api_config.toml', 'r') as f:
    api_config = toml.loads(f.read())

In [64]:
# Import known mining pool addresses.
with open('pools.json', 'r') as f:
    mining_pools = json.load(f)

<h3>Note to examiners:</h3>
<p>It is <b>highly recommended that you do not run this file.</b> The latter half of the project makes api calls many thousands of times as part of the logic to locate the highest wallet balance of all addresses who participated in transactions within the most recent block.</p>
<br>
<p>As a result, the script can take well over <b>40 minutes</b> to run in its entirety, depending on the number of transactions in the latest block.</p>

<h1>At the time of your writing, what is the height of the most recent block mined and what is the
difficulty level?</h1>

In [65]:
def get_difficulty(_hash : str) -> int:
    """
    Gets the difficulty of a blockchain block in terms of the 0-requirement for a successful hash.
    :param _hash: (str) The block hash.
    :return: (int) The difficulty of the block.
    """
    # Initialise return variable.
    _difficulty : int = 0
    
    # Loop through the characters in the hash string received and count how many 0s. When a 0 isn't encountered, return _difficulty.
    for _letter in _hash:
        if _letter == "0":
            _difficulty += 1
            
        else:
            return _difficulty

In [66]:
# Get the latest block.
latest_block = requests.get(api_config['latest_block']['root']).json()
# Use the plaintext query 'getdifficulty' for getting the current abstract difficulty of the Bitcoin blockchain.
hash_difficulty = requests.get(api_config['blockchain_api']['root'] + "q/getdifficulty").json()

# We can easily access the height attribute in the json returned from our get request.
height = latest_block['height']
# Using my custom function, we can count how many 0's are required for a hash to be considered a valid block hash.
difficulty = get_difficulty(latest_block['hash'])

# Output the results.
Markdown(f"The height of the block is: <b>{height}</b>  "
         f"\nThe difficulty-level is: <b>{round(hash_difficulty/1000000000000, 2)} trillion</b>.  "
         f"\nThis is represented by <b>{difficulty}</b> initial '0' digits required for the hash of the block to be considered a successful hash.  "
         f"\nThe hash of the latest block at runtime is: <b>{latest_block['hash']}</b>")

The height of the block is: <b>870432</b>  
The difficulty-level is: <b>101.65 trillion</b>.  
This is represented by <b>19</b> initial '0' digits required for the hash of the block to be considered a successful hash.  
The hash of the latest block at runtime is: <b>0000000000000000000266da76f3c49b8f49772c172bf4d108231a01a5d0d72e</b>

<h1>What is the address of the miner of the block ? Can you unmask the identity of the
address?</h1>

In [67]:
def get_miner_addr(_tx : dict) -> str:
    """
    Takes a single transaction and returns the address of the miner.
    :param _tx: (dict) Transaction to get the miner address from.
    :return: (string) The address of the miner.
    """
    
    # Coinbase-transactions have "dust" outputs and OP_RETURN outputs.
    # The dust outputs can fund specific purposes for the network like secondary layers, ...
    #   OP_RETURN outputs can trigger script events by sending 0 satoshi to the wallet address, I believe.
    # In either event, these transactions can muddy, from a code point of view, which address is our miner's address.
    # To solve this, we need some simple logic to select the right address.
    
    # From what I have seen, the miner's address comes second in the list of output addresses, so we could just index that, ...
    #   but just to be certain, this logic will make sure it's correct.
    # This could be improved with some configuration for the mining reward, considering that is not a static value and changes ...
    #   every halving, but for our purposes this fixed value will suffice.
    
    # Look through all the potential output wallet addresses.
    for _out in _tx['out']:
        
        # If an address is available, and the value of satoshi in the transaction is equal to more than 3btc (current mining reward ~3.1btc) ...
        #   then we want to return that address, since that is the address which was sent the block reward.
        if (_out['addr']) and (int(_out['value']) > 300000000):
            return str(_out['addr'])
        
        # If the output of this transaction we are currently looking at doesn't fulfill those requirements, ...
        #   do the next iteration of the loop.
        else:
            pass
        
    print("Error finding address for output.")

In [68]:
def unmask_pool_name(_addr : str) -> str:
    """
    Checks known addresses of mining pools and returns that name if it exists.
    :param _addr: (str) Address of mining pool. Eg: 15MdAHnkxt9TMC2Rj595hsg8Hnv693pPBB
    :return: (str) The name of the mining pool. Eg: Mara Pool.
    
    If pool name is not located, returns 'Unknown'
    """
    
    # Loop through our known payout addresses which are linked to known mining pools.
    for _ in list(mining_pools['payout_addresses'].keys()):
        # If the current address is the address we pass to the function, return the name of the mining pool.
        if _addr == _:
            return mining_pools['payout_addresses'][_]['name']
        
    else:
        # If we cannot find the pool name, return 'Unknown', since that value seems to occupy 40+% of mined blocks.
        return 'Unknown'

In [69]:
# Access our transaction data and locate the first transaction which is our coinbase transaction in BTC blockchain.
txs = latest_block['txIndexes']
tx = requests.get(api_config['single_transaction']['root'] + str(txs[0])).json()

miner_addr = None

# Slight error checking in the illogical case that a non-coinbase transaction is obtained instead.
if len(tx['inputs']) == 1:
    miner_addr = get_miner_addr(tx)
else:
    print("Error: Coinbase Transaction not the selected transaction")
    
# See if we can identify if any known mining pools mined the block.
pool_name = unmask_pool_name(miner_addr)

Markdown(f"The Miner's Address is: <b>{miner_addr}</b>  "
         f"\nThe pool name is <b>{pool_name}</b>.")

The Miner's Address is: <b>bc1qwzrryqr3ja8w7hnja2spmkgfdcgvqwp5swz4af4ngsjecfz0w0pqud7k38</b>  
The pool name is <b>Unknown</b>.

In [70]:
# Proof that the unmasking function does work with a known address which should return Mara Pool.
# Here is proof it should return Mara Pool: https://www.blockchain.com/explorer/addresses/btc/15MdAHnkxt9TMC2Rj595hsg8Hnv693pPBB
r = unmask_pool_name("15MdAHnkxt9TMC2Rj595hsg8Hnv693pPBB")
print(r)
del r

MARA Pool


<h1>How many transactions does it contain?</h1>

In [71]:
# Since txs variable is a List type, we can just call the length of the list.
tx_num = len(txs)

Markdown(f"The number of transactions in the most recent block is: <b>{tx_num}</b>")

The number of transactions in the most recent block is: <b>6585</b>

<h1>Which address has the most balance?</h1>

<h3>Logic to solve this problem:</h3>

<p>
The issue presented by this question is that there is no endpoint for the blockchain api which gets the greatest balance of all addresses included within transactions of the block.
<br>
<p>
To solve this issue, we will get transaction data on each transaction within a block using the transaction endpoint. This will take a long time, due to API latency and good API practice to not overwhelm the API endpoint. We could just take a random value sample of the block and find the greatest balance of all of those addresses, but this approach doesn't give us the literal greatest balance of the addresses within the block, so I will commit to getting data on all transactions on the block.
</p>
<p>
<b>
The Logic of my approach:
</b>
</p>
<p>
> Get our transaction data.<br>
> Remove duplicate addresses so that we aren't sending balance requests for addresses multiple times.<br>
> Rather than sending a balance API request for each address, concatenating addresses into chunks of 50 will send fewer requests.<br>
> Look through each address of that chunk and check if the balance of that address is the greatest seen balance.<br>
> If it is, we record the wallet address and the associated balance. Otherwise, we keep searching through chunks of addresses.<br>
</p>
<p>
For better API handling, I will wrap the requests.get() function in a safe_requests() method which will attempt to handle response codes which indicate we are sending too many requests and use the time.sleep() function to control the sending of requests, such that we don't encounter more errors.
</p>

</p>

<i>Defining Functions</i>

In [91]:
def safe_request(url : str, retries : int = 3, wait : int = 2):
    """
    Introduces error handling for API requests. Accounts for reaching max request limit and codes that aren't expected.
    :param url: (str) The url to request.
    :param retries: (int) The number of retries before giving up.
    :param wait: (int) The number of seconds to wait between retries.
    :return: (dict or None) The response or None if it fails.
    """
    
    # _attempt represents an attempt out of the number of attempts the function allows, default = 3.
    for _attempt in range(retries):
        
        # We need try/catch blocks to stop errors disrupting the request process.
        try:
            response = requests.get(url)
            
            # If we get a positive get request status code, return the dict version of the response object.
            if response.status_code == 200:
                try:
                    return response.json()
                
                # If we encounter a JSONDecodeError then we have had something wrong returned to us from the API.
                except JSONDecodeError:
                    print("Error with decoding JSON from parsed URL.")
                    return None
            
            # If we have a status code of 429, we have exceeded our limit of tries within this time period and need to wait.    
            elif response.status_code == 429:
                # Define our retry_after variable as what we are told to retry after by the response from the API.
                retry_after = int(response.headers.get("Retry-After", 1))
                print(f"Rate has been limited, retrying after {retry_after} seconds.")
                time.sleep(retry_after)
            
            # If neither of the status codes seen above are returned, print the debug help and move to the next attempt.    
            else:
                print(f"Unexpected Status Code: {str(response.status_code)}")
        
        # If our try block returns an exception that isn't the JSONDecodeError, we'll raise a RequestException here.       
        except requests.exceptions.RequestException as e:
            print(f"Request Failed: {e}")
        
        # If our try on this attempt fails, wait the sleep amount of time and retry.    
        time.sleep(wait)
    
    return None
            

SyntaxError: f-string: single '}' is not allowed (1610145407.py, line 36)

In [73]:
def get_block_addresses(_current_block : dict, _in : bool) -> list:
    """
    Takes the current block and finds all addresses referenced within the block; addresses who either sent btc (_in=True) or received btc (_in=False).
    :param _current_block: (dict) The dictionary representing the current block.
    :param _in: (bool) True if you want to look at transaction inputs. False if you want to look at transaction outputs.
    :return: (list) Return a list of all the addresses of the given _in parameter.
    """
    # Initialise key variables.
    _addresses = []
    _txs = _current_block['txIndexes']
    
    # If we are looking at addresses that sent money in the block:
    if _in:
        # Look through each transaction except our coinbase-transaction:
        for _tx in _txs[1:]:
            # Get the relevant transaction data.
            _req = safe_request(api_config['single_transaction']['root'] + str(_tx))
                
            if not _req:
                continue
                
            # For each input from transaction, add that address to our address variable to be returned by the function.
            for _input in _req['inputs']:
                _addresses.append(_input['prev_out']['addr'])
    
    # If we are looking at address that received money in the block:            
    else:
        # Look through each transaction:
        for _tx in _txs:
            # Get the relevant transaction data.
            _req = safe_request(api_config['single_transaction']['root'] + str(_tx))
            
            if not _req:
                continue
                
            # For each output from our list of outputs:
            for _out in _req['out']:
                
                # If the value sent to that address is 0, it's for triggering scripts so ignore it.
                if _out['value'] == 0:
                    continue
                # Otherwise, if there is an address value in the output data we're currently looking at:
                elif 'addr' in _out.keys():
                    # Add that address to our list of addresses to be returned.
                    _addresses.append(_out['addr'])
                else:
                    continue
                
    return _addresses        

In [74]:
def addresses_to_str(_addresses : list) -> str:
    """
    Takes a list of addresses and converts it to a single string containing said addresses separated by pipes.
    :param _addresses: (list) List of addresses to convert.
    :return: (str) The string of all concatenated addresses separated by pipes.
    """
    # Initialise our return string.
    _rtrn_str : str = ""
    
    # For each address in our list:
    for _addr in _addresses:
        
        # If this is the first time of the loop, insert the first address into the string.
        if _rtrn_str == "":
            _rtrn_str = str(_addr)
        
        # Otherwise, we need to concatenate the return string with our new address with a pipe delimiter.    
        else:
            _rtrn_str += "|" + str(_addr)
            
    return _rtrn_str

In [75]:
def chunk_addresses(_addresses : list, _chunk_size : int = 50) -> list:
    """
    Takes a list of addresses and splits it into chunks of _chunk_size.
    For example, if you pass a list of 1000 it will return blocks of addresses equal to _chunk_size, default 50.
    
    This function should only be used in a loop as it returns a generator that yields chunks of addresses.
    
    :param _addresses: (list) List of addresses to split.
    :param _chunk_size: (int) The value of addresses to be grouped and returned.
    :return: (list) Acts as a generator. Returns a list containing _chunk_size number of addresses. 
    """
    # From 0 to the number of addresses we have, iterate in steps equal to our _chunk_size variable (default at 50)
    for i in range(0, len(_addresses), _chunk_size):
        # Yield, for further iteration, the index slice of the addresses equal to our chunk size.
        yield _addresses[i:i + _chunk_size]

In [76]:
def get_max_balance(_current_block : dict, _in : bool, _chunk_size : int = 50) -> dict:
    """
    Takes the current block and finds the maximum balance of the addresses in the block.
    :param _current_block: (dict) The dictionary representing the current block.
    :param _in: (bool) True if you want to look at transaction inputs. False if you want to look at transaction outputs.
    :param _chunk_size: (int) The value of addresses to be grouped and returned. Higher means more latency time but less compute time.
    :return: (dict) The address which held the maximum balance as the key, and the maximum balance as the value.
    """
    
    # Get our list of addresses.
    _addresses = get_block_addresses(_current_block, _in)
    
    # We get a lot of repeat addresses through the get_block_addresses() function.
    # Since we aren't worried about transaction order, and we don't want to get the max balance for an address twice, ...
    #   we can remove them to minimise api get requests.
    _unique_addr = list(set(_addresses))
    
    # Initialise our important counter/container variables.
    rtrn_dict : dict = {}
    max_balance : float = 0.0
    max_addr : str = ""
    
    # Iterate over our generator object returned from chunk_addresses() function.
    for _addr_chunk in chunk_addresses(_unique_addr, _chunk_size):
        # Turn our addresses into concatenated string which can be used in an api get request.
        _address_str = addresses_to_str(_addr_chunk)
        
        # Get the balance request of multiple addresses using pipe delimiter.
        balance_data = safe_request(api_config['balance']['root'] + _address_str)
        
        # For each address, look at the final balance of the address and see if it is the greatest value seen.
        for _addr, _bal_info in balance_data.items():
            balance = _bal_info['final_balance']
            max_balance = max(max_balance, balance)
            
            # If the max_balance is the balance for the current address, store that address.
            if balance == max_balance:
                max_addr = _addr
    
    
    rtrn_dict[max_addr] = max_balance        
            
    return rtrn_dict

In [77]:
def get_exchange_rates() -> dict:
    # Very simply gets exchangerates from the blockchain.
    return requests.get("https://blockchain.info/ticker").json()

In [78]:
def calc_currency_conversion(_exchange_data : dict, _currency : list, _btc : float) -> float:
    """
    Takes the exchange rate data and calculates the currency conversion.
    :param _exchange_data: (dict) The exchange rate data.
    :param _currency: (list) The currency to convert and crucially the time period for the data.
    :param _btc: (float) The btc value to convert. Crucially, not in satoshi.
    :return: (float) The currency conversion.
    """
    return _btc * int(_exchange_data[_currency[0]][_currency[1]])

In [79]:
def format_float_to_str(_float : float) -> str:
    # Converts a float value to string using string formatting.
    return "{:,.2f}".format(_float)

<i>End of defining functions</i>

In [80]:
# Gather exchange-rate data.
exchange_data = get_exchange_rates()

In [81]:
# Get the address with the highest balance, out of all the addresses that sent money in transactions within the current block.
in_max = get_max_balance(latest_block, True, 50)

# This cell may output 'Unexpected Status Code' but this is part of the safe_request() method and part of the retrying process.

Unexpected Status Code.
Unexpected Status Code.
Unexpected Status Code.
Unexpected Status Code.


In [82]:
# Unpack the dictionary of wallet_address : balance.
in_max_addr, in_max_balance = next(iter(in_max.items()))

# Just to clarify the logic of the line above:
#   in_max.items() returns a view object which will show, in tuple format, the data available in the in_max dict.
#   iter(in_max.items()) creates an iterable object which includes the tuples within the view object.
#   next(iter(in_max.items())) calls the next tuple (the first and only tuple) and returns that.
#   Since we only have one key-value pair in the dictionary, we can simply unpack this tuple to get our address and btc balance of that address.
# I have chosen this over multiple lines of code which call the values() and keys() methods separately for brevity's sake.

In [83]:
# Turn the satoshi balance returned by the get_max_balance() function into btc.
in_max_btc = in_max_balance / 100000000

# Find the converted value of the btc into USD using current exchange-rate data.
in_max_usd = calc_currency_conversion(exchange_data, ["USD", "15m"], in_max_btc)

# Format the result using more human-readable friendly approach.
in_max_usd_f = format_float_to_str(in_max_usd)

# Output the results.
Markdown(f"Max btc balance of addresses who sent money in this block is: <b>{in_max_btc}</b>  "
         f"\nThe USD conversion equates to: <b>${in_max_usd_f}</b>  "
         f"\nThe address with this value of bitcoin was: <b>{in_max_addr}</b>")

Max btc balance of addresses who sent money in this block is: <b>20427.30867759</b>  
The USD conversion equates to: <b>$1,825,323,021.50</b>  
The address with this value of bitcoin was: <b>bc1qm34lsc65zpw79lxes69zkqmk6ee3ewf0j77s3h</b>

In [84]:
# Same process as before, but for wallet addresses which received btc in transactions in the current block.
out_max = get_max_balance(latest_block, False, 50)

# This cell may output 'Unexpected Status Code' but this is part of the safe_request() method and part of the retrying process.

In [85]:
# Unpack our address and max balance. Reference 3 cells above for a breakdown of the logic of this cell.
out_max_addr, out_max_balance = next(iter(out_max.items()))

In [86]:
# Turn satoshi into btc, convert it into USD using exchange-rate data and format it for human consumption, as before.
out_max_btc = out_max_balance / 100000000
out_max_usd = calc_currency_conversion(exchange_data, ["USD", "15m"], out_max_btc)
out_max_usd_f = format_float_to_str(out_max_usd)

# Output the results.
Markdown(f"Max btc balance of addresses who received money in this block is: <b>{out_max_btc}</b>  "
         f"\nThe USD conversion equates to: <b>${out_max_usd_f}</b>  "
         f"\nThe address with this value of bitcoin was: <b>{out_max_addr}</b>")

Max btc balance of addresses who received money in this block is: <b>19885.40971951</b>  
The USD conversion equates to: <b>$1,776,900,556.31</b>  
The address with this value of bitcoin was: <b>bc1qm34lsc65zpw79lxes69zkqmk6ee3ewf0j77s3h</b>

In [87]:
# We have obtained the greatest value of the addresses who sent bitcoin and the addresses which received bitcoin.
# Now we have to determine which is greatest out of all of them and print that value and address.

# For assertions, Markdown() can't be used, or at least I had issues with it, so here I have to print to terminal.
if out_max_btc > in_max_btc:
    print(f"Therefor, the address with the greatest input balance is {out_max_addr} with {out_max_btc} btc. They sent the most money in this block.")
    
elif in_max_btc > out_max_btc:
    print(f"Therefor, the address with the greatest balance is {in_max_addr} with {in_max_btc} btc. They received the most money in this block.")
    
else:
    print(f"The input and output addresses with the greatest balance in this block had the same value!\n"
          f"The addresses tied with {in_max_btc} btc\n"
          f"The addresses were: {in_max_addr} and {out_max_addr}")

Therefor, the address with the greatest balance is bc1qm34lsc65zpw79lxes69zkqmk6ee3ewf0j77s3h with 20427.30867759 btc


<h1>Which address has most number of transactions?</h1>

In [88]:
# We can call the get_block_addresses() method again here with the True and False _in parameters to get all transaction data within the block.
total_addr = get_block_addresses(latest_block, True)
total_addr += get_block_addresses(latest_block, False)

# This cell may output 'Unexpected Status Code' but this is part of the safe_request() method and part of the retrying process.

In [89]:
# Python has a built-in Counter object which is perfect for iterable objects like our string.
#   Counter(total_addr) will return the number of times an address in our total_addr list is reference.
#       Like this: {"bc1qr4dl5wa7kl8yu792dceg9z5knl2gkn220lk7a9" : 3, "wallet_address..." : 17, ...}
addr_count = Counter(total_addr)

# We can use the Counter().most_common(n) method to get n most common occurrences from our Counter object.
#   .most_common(n) returns a list of tuples, and we will only have 1 tuple, so we need to index the list with [0] to unpack our tuple.
max_addr, max_count = addr_count.most_common(1)[0]

Markdown(f"The address with the most transactions in this block was: {max_addr}  "
         f"\nThey were involved in transactions (potentially as both senders and receivers) a total of {max_count} times in this block!")

The address with the most transactions in this block was: bc1p22y8nq8tx2w4tv0lgnlzf9vammdarpcv43kmh4ttkkka388t5g6s6mh50x  
They were involved in transactions (potentially as both senders and receivers) a total of 2243 times in this block!

<h1>When did this address become active? (first transaction on the network)</h1>

In [90]:
# blockchain.com/api has an endpoint for this in its plaintext query api.
# We can pass an address to the addressfirstseen endpoint, and it will return a UTC timestamp of when the address was first seen on the blockchain.
seen = requests.get(api_config['blockchain_api']['root'] + 'q/addressfirstseen/' + str(max_addr)).json()

# Convert the UTC timestamp to datetime object for string formatting.
seen_dt = datetime.datetime.utcfromtimestamp(seen)

# Output our results.
Markdown(f"This address saw its first activity on the Bitcoin blockchain on "
         f"{seen_dt.strftime('%d/%m/%y')} at {seen_dt.strftime('%H:%M')}.")

This address saw its first activity on the Bitcoin blockchain on 04/02/24 at 14:03.