## Setting up the environment for the project


In [None]:
# Librerie necessarie
import requests
from bs4 import BeautifulSoup
import urllib.robotparser
import pandas as pd

## Configuration


In [None]:
BASE_URL = "https://www.walletexplorer.com"
HEADERS = {
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36',
    'Accept-Language': 'it-IT,it;q=0.9,en-US;q=0.8,en;q=0.7',
    'Content-Type': 'application/x-www-form-urlencoded',
    'Referer': BASE_URL
}

### Check the robot.txt file


In [21]:

# Function to check if a URL is allowed by robots.txt
def is_allowed(url):
    return rp.can_fetch('*', url)
# Function to scrape a URL if allowed by robots.txt
def scrape_url(url):
    if is_allowed(url):
        response = requests.get(url)
        # Process the response
        print(response.status_code)
        print(response.text)
    else:
        print(f"Scraping blocked by robots.txt: {url}")




# Parse the robots.txt file 
rp = urllib.robotparser.RobotFileParser()
rp.set_url(BASE_URL + '/robots.txt')
rp.read()

if not rp.mtime():
   print("robots.txt could not be read or is not present.")


robots.txt could not be read or is not present.


## Extracting DeepBit.net and DiceOnCrack.com wallet addresses

I create a small pipeline to scrape the walletexplorer website and then extract the wallet addresses from the pages of the two websites by using two functions.


In [22]:
def get_walletexplorer_page():
    ''' Scrape the main page of WalletExplorer to find the search form '''
    try:
        main_walletexplore_page = requests.get(BASE_URL, headers=HEADERS,timeout=60)
        main_walletexplore_page.raise_for_status()
        print("WalletExplorer has been successfully accessed")
    except requests.exceptions.RequestException as e:
        print("Error while accessing WalletExplorer:", e)
        return None
    return main_walletexplore_page

In [23]:
def get_wallet_address(html_page,service_name):
    
    # Search the form in the page
    soup = BeautifulSoup(html_page.text, 'html.parser')
    search_form = soup.find('form', {'class':'main'})

    action_form = search_form.get('action')

    target_url = BASE_URL + action_form if action_form.startswith('/') else action_form


    # Open search page regarding 'service_name' and open the wallet addresses page
    try: 
        search_page = requests.get(target_url, headers=HEADERS, params={'wallet' :service_name},timeout=60)
        search_page.raise_for_status()
        print(f'Search page for "{service_name}" has been successfully accessed')
    except requests.exceptions.RequestException as e:
        print("Error while accessing the search page:", e)
        return None


    ## Scrape the search results and extract the wallet addresses of 'service_name'
    soup = BeautifulSoup(search_page.text, 'html.parser')

    # Find the url of the wallet addresses page
    span = soup.find('span', {'class': 'showother'})

    wallet_link = span.find('a').get('href')
    wallets_url = BASE_URL + wallet_link # create the full URL for the wallet addresses page
    try:
        wallet_addr_page = requests.get(wallets_url, headers=HEADERS,timeout=60)
        wallet_addr_page.raise_for_status()
    except requests.exceptions.RequestException as e:
        print(f"Error while accessing the wallets page: {e}")
        return None     

    # Scrape the wallet addresses page extracting the information from the table
    soup = BeautifulSoup(wallet_addr_page.text, 'html.parser')

    # Save the wallet address of 'service_name'
    wallet_addresses = []

    # Find the table containing the wallet addresses
    wallet_table = soup.find('table')

    for row in wallet_table.find_all('tr'):
        col = row.find('td')
        if col and col.find('a', href=True):
            addr = col.find('a')
            wallet_addresses.append(addr.text.strip())
    return wallet_addresses

In [None]:
deepbit_service = "DeepBit.net"
diceoncrack_service = "DiceOnCrack.com"

# Open the main page of WalletExplorer

main_walletexplore_page = get_walletexplorer_page()
if main_walletexplore_page is None:
    print("Failed to retrieve the main WalletExplorer page.")
else:
    # Get the wallet addresses for DeepBit.net
    print(f"Searching for wallet addresses of {deepbit_service}...")
    deepbit_wallet_addresses = get_wallet_address(main_walletexplore_page, deepbit_service)
    if deepbit_wallet_addresses is None:
        print(f"Failed to retrieve wallet addresses for {deepbit_service}.")
    
    # Get the wallet addresses for DiceOnCrack.com
    print(f"Searching for wallet addresses of {diceoncrack_service}...")
    diceoncrack_wallet_addresses = get_wallet_address(main_walletexplore_page, diceoncrack_service)
    if diceoncrack_wallet_addresses is None:
        print(f"Failed to retrieve wallet addresses for {diceoncrack_service}.")
#Print the results
print(f"DeepBit.net wallet addresses: {deepbit_wallet_addresses}")
print(f"DiceOnCrack.com wallet addresses: {diceoncrack_wallet_addresses}")


WalletExplorer has been successfully accessed
Searching for wallet addresses of DeepBit.net...
Search page for "DeepBit.net" has been successfully accessed
Searching for wallet addresses of DiceOnCrack.com...
Error while accessing the search page: ('Connection aborted.', ConnectionResetError(10054, 'An existing connection was forcibly closed by the remote host', None, 10054, None))
DeepBit.net wallet addresses: ['1VayNert3x1KzbpzMGt2qdqrAThiRovi8', '13NGmRF2SVRg3aKdGNVhXLmhA1JT9p87a8']
DiceOnCrack.com wallet addresses: None


## Deepbit.net's mining pool analysis


## 1. Deepbit.net's mined block distribution


In [None]:

#1) Identify the mapping between wallet addresses on the dataset
mapping = pd.read_csv('mapping.csv', engine='pyarrow', header=None)
mapping.columns = ['hash', 'address']

if mapping is None:
    print("Failed to retrieve the mapping dataset.")
    exit(1)

deepbit_mapped_addresses = set()

# Iterate through the wallet addresses and print the corresponding mapping
for address in deepbit_wallet_addresses:
    row = mapping[mapping['hash'] == address]
    if not row.empty:
        deepbit_mapped_addresses.add(row['address'].values[0])
    else:
        print(f"No mapping found for address: {address}")

# Print the mapped addresses
print(f"Mapped addresses for DeepBit.net: {deepbit_mapped_addresses}")



Mapped addresses for DeepBit.net: {np.int64(2536929), np.int64(2537090)}


In [None]:
#2) Find the transaction patterns for the wallet addresses

# Load datasets
transactions = pd.read_csv('transactions.csv', engine='pyarrow')
transactions.columns = ['timestamp', 'blockId', 'txId', 'isCoinbase', 'fee']

outputs = pd.read_csv('outputs.csv', engine='pyarrow')
outputs.columns = ['txId', 'position', 'addressId', 'amount', 'scripttype']

inputs = pd.read_csv('inputs.csv', engine='pyarrow')
inputs.columns = ['txId', 'prevTxId', 'prevTxpos']

# 1. Identify Deepbit addresses (assuming deepbit_mapped_addresses is predefined)
# deepbit_mapped_addresses = [...] 

# 2. Find all transactions that have AT LEAST ONE output to a Deepbit address
deepbit_txs = outputs.loc[outputs['addressId'].isin(deepbit_mapped_addresses), 'txId'].unique()

# 3. For these transactions, find INPUTS originating from Coinbase transactions
# 3a. Retrieve all inputs of Deepbit transactions
inputs_deepbit = inputs[inputs['txId'].isin(deepbit_txs)]

# 3b. Filter only inputs coming from Coinbase transactions
coinbase_txids = transactions[transactions['isCoinbase'] == 1]['txId']
inputs_deepbit = inputs_deepbit[inputs_deepbit['prevTxId'].isin(coinbase_txids)]

# 4. Verify that Deepbit transactions spend EXCLUSIVELY Coinbase outputs
# 4a. Count all inputs of Deepbit transactions
all_inputs_counts = inputs.groupby('txId').size().loc[deepbit_txs].rename('total_inputs').fillna(0)

# 4b. Count Coinbase inputs (already filtered in step 3)
coinbase_inputs_counts = inputs_deepbit.groupby('txId').size().rename('coinbase_inputs')

# 4c. Select only transactions where all inputs are from Coinbase
valid_spend = all_inputs_counts.index[
    all_inputs_counts == coinbase_inputs_counts.reindex(all_inputs_counts.index, fill_value=0)
]

# 5. Build final result
result = (
    inputs_deepbit[inputs_deepbit['txId'].isin(valid_spend)]
    .rename(columns={'prevTxId': 'coinbaseTx', 'txId': 'deepbitSpendTx'})
    [['coinbaseTx', 'deepbitSpendTx']]
    .drop_duplicates()
)

print("Number of identified Deepbit transactions:", len(result))
result.head(20)

Numero di transazioni Deepbit individuate: 15648


Unnamed: 0,coinbaseTx,deepbitSpendTx
3171177,1878755,1882617
3171330,1878900,1882704
3171817,1879037,1882913
3171931,1879137,1882982
3172084,1879201,1883074
3172188,1879256,1883123
3172251,1879372,1883160
3173128,1879674,1883599
3173170,1879778,1883633
3173335,1879896,1883718
