## Setting up the environment for the project


In [None]:
# Librerie necessarie
import requests
from bs4 import BeautifulSoup
import urllib.robotparser
import pandas as pd
import matplotlib.pyplot as plt


## Configuration


In [None]:
BASE_URL = "https://www.walletexplorer.com"
HEADERS = {
    'User-Agent': 'Mozilla/5.0 AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36',
    'Accept-Language': 'it-IT,it;q=0.9,en-US;q=0.8,en;q=0.7',
    'Content-Type': 'application/x-www-form-urlencoded',
    'Referer': BASE_URL
}

### Check the robot.txt file


In [None]:

# Function to check if a URL is allowed by robots.txt
def is_allowed(url):
    return rp.can_fetch('*', url)
# Function to scrape a URL if allowed by robots.txt
def scrape_url(url):
    if is_allowed(url):
        response = requests.get(url)
        # Process the response
        print(response.status_code)
        print(response.text)
    else:
        print(f"Scraping blocked by robots.txt: {url}")




# Parse the robots.txt file 
rp = urllib.robotparser.RobotFileParser()
rp.set_url(BASE_URL + '/robots.txt')
rp.read()

if not rp.mtime():
   print("robots.txt could not be read or is not present.")


## Extracting DeepBit.net and DiceOnCrack.com wallet addresses

I create a small pipeline to scrape the walletexplorer website and then extract the wallet addresses from the pages of the two websites by using two functions.


In [None]:
def get_walletexplorer_page():
    ''' Scrape the main page of WalletExplorer to find the search form '''
    try:
        main_walletexplore_page = requests.get(BASE_URL, headers=HEADERS)
        main_walletexplore_page.raise_for_status()
        print("WalletExplorer has been successfully accessed")
    except requests.exceptions.RequestException as e:
        print("Error while accessing WalletExplorer:", e)
        return None
    return main_walletexplore_page

In [None]:
def get_wallet_address(html_page,service_name):
    
    # Search the form in the page
    soup = BeautifulSoup(html_page.text, 'html.parser')
    search_form = soup.find('form', {'class':'main'})

    action_form = search_form.get('action')

    target_url = BASE_URL + action_form if action_form.startswith('/') else action_form


    # Open search page regarding 'service_name' and open the wallet addresses page
    try: 
        search_page = requests.get(target_url, headers=HEADERS, params={'wallet' :service_name})
        search_page.raise_for_status()
        print(f'Search page for "{service_name}" has been successfully accessed')
    except requests.exceptions.RequestException as e:
        print("Error while accessing the search page:", e)
        return None


    ## Scrape the search results and extract the wallet addresses of 'service_name'
    soup = BeautifulSoup(search_page.text, 'html.parser')

    # Find the url of the wallet addresses page
    span = soup.find('span', {'class': 'showother'})

    wallet_link = span.find('a').get('href')
    wallets_url = BASE_URL + wallet_link # create the full URL for the wallet addresses page
    try:
        wallet_addr_page = requests.get(wallets_url, headers=HEADERS)
        wallet_addr_page.raise_for_status()
    except requests.exceptions.RequestException as e:
        print(f"Error while accessing the wallets page: {e}")
        return None     

    # Scrape the wallet addresses page extracting the information from the table
    soup = BeautifulSoup(wallet_addr_page.text, 'html.parser')

    # Save the wallet address of 'service_name'
    wallet_addresses = []

    # Find the table containing the wallet addresses
    wallet_table = soup.find('table')

    for row in wallet_table.find_all('tr'):
        col = row.find('td')
        if col and col.find('a', href=True):
            addr = col.find('a')
            wallet_addresses.append(addr.text.strip())
    return wallet_addresses

In [None]:
deepbit_service = "DeepBit.net"
diceoncrack_service = "DiceOnCrack.com"

# Open the main page of WalletExplorer

main_walletexplore_page = get_walletexplorer_page()
if main_walletexplore_page is None:
    print("Failed to retrieve the main WalletExplorer page.")
else:
    # Get the wallet addresses for DeepBit.net
    print(f"Searching for wallet addresses of {deepbit_service}...")
    deepbit_wallet_addresses = get_wallet_address(main_walletexplore_page, deepbit_service)
    if deepbit_wallet_addresses is None:
        print(f"Failed to retrieve wallet addresses for {deepbit_service}.")
    
    # Get the wallet addresses for DiceOnCrack.com
    print(f"Searching for wallet addresses of {diceoncrack_service}...")
    diceoncrack_wallet_addresses = get_wallet_address(main_walletexplore_page, diceoncrack_service)
    if diceoncrack_wallet_addresses is None:
        print(f"Failed to retrieve wallet addresses for {diceoncrack_service}.")
#Print the results
print(f"DeepBit.net wallet addresses: {deepbit_wallet_addresses}")
print(f"DiceOnCrack.com wallet addresses: {diceoncrack_wallet_addresses}")


## Deepbit.net's mining pool analysis


## 1. Deepbit.net's mined block distribution


In [None]:

def map_wallet_addresses(wallet_addresses, mapping_df):
    """
    Mappa una lista di wallet addresses ai rispettivi addressId nel dataset di mapping.
    Restituisce un set di addressId trovati e stampa eventuali indirizzi non trovati.
    """
    mapped_addresses = set()
    for address in wallet_addresses:
        row = mapping_df[mapping_df['hash'] == address]
        if not row.empty:
            mapped_addresses.add(row['address'].values[0])
        else:
            print(f"No mapping found for address: {address}")
    print(f"Mapped addresses : {mapped_addresses}")
    return mapped_addresses

In [None]:
#1) Identify the mapping between wallet addresses on the dataset
mapping = pd.read_csv('mapping.csv', engine='pyarrow', header=None)
mapping.columns = ['hash', 'address']

if mapping is None:
    print("Failed to retrieve the mapping dataset.")
    exit(1)

deepbit_wallet_addresses = ["1VayNert3x1KzbpzMGt2qdqrAThiRovi8","13NGmRF2SVRg3aKdGNVhXLmhA1JT9p87a8"]

deepbit_mapped_addresses = map_wallet_addresses(deepbit_wallet_addresses, mapping)


In [None]:
#2) Find the transaction patterns for the wallet addresses

# Load datasets
transactions = pd.read_csv('transactions.csv', engine='pyarrow')
transactions.columns = ['timestamp', 'blockId', 'txId', 'isCoinbase', 'fee']

outputs = pd.read_csv('outputs.csv', engine='pyarrow')
outputs.columns = ['txId', 'position', 'addressId', 'amount', 'scripttype']

inputs = pd.read_csv('inputs.csv', engine='pyarrow')
inputs.columns = ['txId', 'prevTxId', 'prevTxpos']

# 1. Identify Deepbit addresses (assuming deepbit_mapped_addresses is predefined)
# deepbit_mapped_addresses = [...] 

# 2. Find all transactions that have AT LEAST ONE output to a Deepbit address
deepbit_txs = outputs.loc[outputs['addressId'].isin(deepbit_mapped_addresses), 'txId'].unique()

# 3. For these transactions, find INPUTS originating from Coinbase transactions
# 3a. Retrieve all inputs of Deepbit transactions
inputs_deepbit = inputs[inputs['txId'].isin(deepbit_txs)]

# 3b. Filter only inputs coming from Coinbase transactions
coinbase_txids = transactions[transactions['isCoinbase'] == 1]['txId']
inputs_deepbit = inputs_deepbit[inputs_deepbit['prevTxId'].isin(coinbase_txids)]

# 4. Verify that Deepbit transactions spend EXCLUSIVELY Coinbase outputs
# 4a. Count all inputs of Deepbit transactions
all_inputs_counts = inputs.groupby('txId').size().loc[deepbit_txs].rename('total_inputs').fillna(0)

# 4b. Count Coinbase inputs (already filtered in step 3)
coinbase_inputs_counts = inputs_deepbit.groupby('txId').size().rename('coinbase_inputs')

# 4c. Select only transactions where all inputs are from Coinbase
valid_spend = all_inputs_counts.index[
    all_inputs_counts == coinbase_inputs_counts.reindex(all_inputs_counts.index, fill_value=0)
]

# 5. Build final result
result = (
    inputs_deepbit[inputs_deepbit['txId'].isin(valid_spend)]
    .rename(columns={'prevTxId': 'coinbaseTx', 'txId': 'deepbitSpendTx'})
    [['coinbaseTx', 'deepbitSpendTx']]
    .drop_duplicates()
)

print("Number of identified Deepbit transactions:", len(result))
result.head(20)

In [None]:
# Lets prepare the data for Deepbit.net block distribution

deepbit_mined_blocks_df = transactions[transactions['txId'].isin(result['coinbaseTx'])].copy()

# Assumendo che 'deepbit_mined_blocks_df' abbia una colonna 'timestamp' già convertita in datetime
deepbit_mined_blocks_df['timestamp'] = pd.to_datetime(deepbit_mined_blocks_df['timestamp'], unit='s')
deepbit_mined_blocks_df.set_index('timestamp', inplace=True)

# 1. Conteggio giornaliero
daily = deepbit_mined_blocks_df.resample('D').size()

# 2. Conteggio settimanale
weekly = deepbit_mined_blocks_df.resample('W').size()

# 3. Conteggio mensile
monthly = deepbit_mined_blocks_df.resample('ME').size()

# Visualizzazione

fig, axs = plt.subplots(3, 1, figsize=(14, 12), sharex=False)
fig.suptitle('Distribuzione dei blocchi minati da Deepbit.net', fontsize=16)

axs[0].plot(daily.index, daily.values, label='Giornaliero', color='blue')
axs[0].set_title('Blocchi minati - Giornaliero')
axs[0].set_ylabel('Blocchi')
axs[0].grid(True, alpha=0.3)

axs[1].plot(weekly.index, weekly.values, label='Settimanale', color='green')
axs[1].set_title('Blocchi minati - Settimanale')
axs[1].set_ylabel('Blocchi')
axs[1].grid(True, alpha=0.3)

axs[2].plot(monthly.index, monthly.values, label='Mensile', color='orange')
axs[2].set_title('Blocchi minati - Mensile')
axs[2].set_ylabel('Blocchi')
axs[2].set_xlabel('Data')
axs[2].grid(True, alpha=0.3)

plt.tight_layout(rect=[0, 0, 1, 0.97])
plt.show()
# ...existing code...



In [None]:
# Supponiamo che 'result' contenga le coinbaseTx di Deepbit.net (come già ottenuto nel tuo notebook)
# E che 'transactions' sia il DataFrame delle transazioni con le colonne ['timestamp', 'blockId', 'txId', 'isCoinbase', 'fee']

#Lets prepare the data for Deepbit.net fee distribution
# 1. Estrai le transazioni coinbase di Deepbit.net
deepbit_coinbase_txs = transactions[transactions['txId'].isin(result['coinbaseTx'])].copy()
print(f"Number of Deepbit.net coinbase transactions: {deepbit_coinbase_txs.columns}")
# 2. Converte il timestamp in datetime
deepbit_coinbase_txs['datetime'] = pd.to_datetime(deepbit_coinbase_txs['timestamp'], unit='s')
print(deepbit_coinbase_txs.head(20))

a = deepbit_coinbase_txs[ deepbit_coinbase_txs['fee'] > 0 ]

print(f"Number of Deepbit.net coinbase transactions with fee > 0: {len(a)}")

total_fee = deepbit_coinbase_txs['fee'].sum()
print(f"Total fee collected by Deepbit.net: {total_fee} satoshis")

fee_per_block = deepbit_coinbase_txs.groupby('blockId')['fee'].sum()

fee_per_block.head(20)


## DiceOnCrack.com gambling service analysis


In [None]:

diceoncrack_wallet_addresses = [
    "12TaAbLWBNKB1NLYH92CPnC1DizQoNK6FN",
    "1CRACkbiJSxfDaLNEoaNsHjNtU4KttwHyo",
    "1CRACKafkXsQzUYmu2fUM3j9c2y4yDhvfh",
    "1CRACKLiwFrZbAQz1yb9w22onHCMLbiMTY",
    "12tAabLFLxvUzC5KuX7VKMM8bYRncbQ84E",
    "1CrAcKt3HE8LNsx4KKDvjqLvcr373wg5ke",
    "1AVFypuG2jUrYzjZa69C7hK59XkWUwvK1m",
    "1CRACK25QvpVdcEmPZVD5ixtf99cMF9stg",
    "1CracksLRtQMcTF4HXNrvPzRgvz7Qr6wNd",
    "13TAabLHjNzwg8Mj7XYn76FuVAqj32s8EM",
    "1CrAckQppdcfiiw4XzpsKrZrf9eDvUok9C",
    "19TAABLQTLxgWHTdm7yNJNstgeQFgxTP4f",
    "14TAAbLiw2QLuRJCGQ3iETYg3vcpweZkTE",
    "15TaABLmhxiRQ9DTX6ZcZ9S9RknVZmP5jX",
    "1tAabLBcZLVL7md9nAnvGMCYdbvq4UVZV",
    "1PipEaL8yRS8n93mUS16wT5SNDiMrMutv5",
    "1PipemCUjxq9LKww7CaLWUMeGVZL3bD3VM",
    "1LQXotaEjfmerkwrGB3dHnheujo7sng6vA",
    "1PipeBMryPGnN3Ms3HfnNjetCS4THmkpkS",
    "1PipeZHgQXcjAYsUQ4WRXyKZn1X3sJNrpk",
    "1PipePezjvE7vBukPyDUkhHEF54qK1nkeu",
    "1Q44t4knYY3PsQZUFAejhd7Wot79ecHe8e",
    "1F4VXTQRzVQfLaGEWcf697xj1g2cKqPire",
    "1Pipeb5iNYmURifrxPZfvwHsTiw9rEb2iu",
    "1PipeZofhJv1hxsxCadEeG1vHAK87f23LE",
    "17ZmFwCULT44K25kWDeYbHiGaJCrWtytjx",
    "13encD1Yagh8M6a9Wgb3YJxKHrHqXnYi8y",
    "1GD2EiVa1rbbXcmFceyM47YN16fzVwn9j"
]


diceoncrack_mapped_addresses = map_wallet_addresses(diceoncrack_wallet_addresses, mapping)



In [None]:
# Find the transaction of DiceOnCrack.com
# which are the transactions that have at least one input or output address of DiceOnCrack.com
def find_diceoncrack_transactions(diceoncrack_mapped_addresses, transactions, inputs, outputs):
    """Finds all transactions related to DiceOnCrack.com addresses."""
    
    # Find all outputs related to DiceOnCrack addresses
    diceoncrack_outputs = outputs[outputs['addressId'].isin(diceoncrack_mapped_addresses)]
    
    # Find all inputs related to DiceOnCrack addresses
    diceoncrack_inputs = inputs[inputs['addressId'].isin(diceoncrack_mapped_addresses)]
    
    # Combine both outputs and inputs to get unique transaction IDs
    diceoncrack_tx_ids = set(diceoncrack_outputs['txId']).union(set(diceoncrack_inputs['txId']))
    
    # Filter transactions based on these IDs
    diceoncrack_transactions = transactions[transactions['txId'].isin(diceoncrack_tx_ids)]
    # Order by blockId in ascending manner
    diceoncrack_transactions = diceoncrack_transactions.sort_values(by='blockId', ascending=True)
    
    return diceoncrack_transactions

# Find the transactions related to DiceOnCrack.com
diceoncrack_transactions = find_diceoncrack_transactions(diceoncrack_mapped_addresses, transactions, inputs, outputs)
print(f"Number of transactions related to DiceOnCrack.com: {len(diceoncrack_transactions)}")


In [None]:
# Considering the transactions related to DiceOnCrack.com done started from 1/09/2012 and identify the group of transactions that have the same blockId
def group_diceoncrack_transactions_by_block(diceoncrack_transactions):
    """Groups DiceOnCrack.com transactions by blockId."""
    
    # Group by blockId and aggregate transactions
    grouped_transactions = diceoncrack_transactions.groupby('blockId').agg({
        'txId': 'count',  # Count of transactions in each block
        'timestamp': 'first'  # First timestamp in each block
    }).reset_index()
    
    grouped_transactions.rename(columns={'txId': 'transaction_count'}, inplace=True)
    
    return grouped_transactions
