In [1]:
import requests
import pandas as pd

Functions

In [2]:
def get_blocks_data(n_blocks):
    blocks_data = []
    api_url = "https://blockchain.info/latestblock"
    response = requests.get(api_url)
    if response.status_code == 200:
        block_data = response.json()
        last_block_hash = block_data["hash"]
    else:
        print("Error:", response.status_code)
    for _ in range(n_blocks):
        api_url = f"https://blockchain.info/rawblock/{last_block_hash}"
        response = requests.get(api_url)
        if response.status_code == 200:
            block_data = response.json()
            blocks_data.append(block_data)
            last_block_hash = block_data['prev_block']
        else:
            print("Error:", response.status_code)
    return blocks_data

def get_response(url):
    response = requests.get(url)
    if response.status_code == 200:
        return response.json()
    else:
        print("Error:", response.status_code)

def get_tickers():
    api_url = f"https://api.blockchain.com/v3/exchange/tickers"
    return get_response(api_url)

def get_currencies():
    api_url = 'https://blockchain.info/ticker'
    return get_response(api_url)

def get_orders(symbol1, symbol2):
    api_url = f"https://api.blockchain.com/v3/exchange/l2/{symbol1}-{symbol2}"
    return get_response(api_url)
        
def get_transaction(chart_type):
    url = f'https://blockchain.info/charts/{chart_type}?format=json'
    return get_response(url)

def get_address(bitcoin_address):
    url = f'https://blockchain.info/rawaddr/{bitcoin_address}'
    
def get_multi_address(address_list):
    url = 'https://blockchain.info/multiaddr?active='#{}|{}'
    for i, address in enumerate(address_list):
        if (i != 0):
            url += "|"
        url += address
    return get_response(url)
    

In [3]:
tickers = get_tickers()

df_tickers = pd.DataFrame(tickers)
df_tickers[['symbol1', 'symbol2']] = df_tickers['symbol'].str.split('-', expand=True)
df_tickers = df_tickers.drop('symbol', axis=1)
df_tickers.to_csv("dataset/ticker.csv", sep=";")
df_tickers.head(5)

Unnamed: 0,price_24h,volume_24h,last_trade_price,symbol1,symbol2
0,0.05244,0.0,0.05244,TFUEL,USDC
1,0.10308,0.0,0.1029,XLM,EUR
2,1.5e-05,0.0,1.5e-05,ALGO,BTC
3,0.137,0.0,0.137,EFI,USDT
4,0.96,0.0,0.95,XTZ,USDT


In [4]:
currencies = get_currencies()
symbols_list = df_tickers['symbol1'].unique().tolist() + df_tickers['symbol2'].unique().tolist()
symbols_set = set(symbols_list)
cryptos_list = symbols_set.difference(currencies)

df_cryptos = pd.DataFrame(cryptos_list, columns=['symbol'])
df_cryptos.to_csv("dataset/crypto.csv", sep=';')
df_cryptos.head(5)

Unnamed: 0,symbol
0,CUSD
1,XLM
2,BAT
3,GALA
4,SNX


In [5]:
df_currencies = pd.DataFrame.from_dict(currencies, orient='index')
df_currencies = df_currencies.reset_index()
df_currencies = df_currencies.drop('index', axis=1)
df_currencies.to_csv("dataset/currency.csv", sep=';')
df_currencies.head(5)

Unnamed: 0,15m,last,buy,sell,symbol
0,55490315.29,55490315.29,55490315.29,55490315.29,ARS
1,99398.66,99398.66,99398.66,99398.66,AUD
2,331848.84,331848.84,331848.84,331848.84,BRL
3,87718.17,87718.17,87718.17,87718.17,CAD
4,58066.5,58066.5,58066.5,58066.5,CHF


In [6]:
orders_btc_usd = get_orders("BTC", "USD")
for order in orders_btc_usd['bids']:
    order['symbol1'] = "BTC"
    order['symbol2'] = "USD"
for order in orders_btc_usd['asks']:
    order['symbol1'] = "BTC"
    order['symbol2'] = "USD"


In [7]:
df_orders_sell = pd.DataFrame(orders_btc_usd["bids"])
df_orders_sell.to_csv("dataset/sell_order.csv")
df_orders_sell.head(5)

Unnamed: 0,px,qty,num,symbol1,symbol2
0,63409.5,0.878,1,BTC,USD
1,63409.43,0.996817,1,BTC,USD
2,63146.89,0.010828,1,BTC,USD
3,63001.0,0.11288,1,BTC,USD
4,61432.0,0.010021,1,BTC,USD


In [8]:
df_orders_buy = pd.DataFrame(orders_btc_usd["asks"])
df_orders_buy.to_csv("dataset/buy_order.csv")
df_orders_buy.head(5)

Unnamed: 0,px,qty,num,symbol1,symbol2
0,64280.0,0.000881,1,BTC,USD
1,64290.0,0.000881,1,BTC,USD
2,64988.99,0.001,1,BTC,USD
3,64989.0,0.0005,1,BTC,USD
4,64991.0,0.0005,1,BTC,USD


In [9]:
blocks = get_blocks_data(2)

In [10]:
transactions = []
inputs = []
outs = []
spending_outpoints = []
addresses = []
i_out = 0
i_input = 0

for block in blocks:
    for tx in block['tx']:
        transactions.append({
            'hash': tx['hash'], 
            'tx_index': tx['tx_index'],
            'time': tx['time'], 
            'fee': tx['fee'], 
            'weight': tx['weight'], 
            'vin_sz': tx['vin_sz'], 
            'vout_sz': tx['vout_sz']
        })
        for input in tx['inputs']:
            inputs.append({
                'tx_index': tx['tx_index'],
                'input_id': i_input, 
                'prev_out_tx_index': input['prev_out']['tx_index'], 
                'prev_out_n': input['prev_out']['n']
            })
            i_input += 1
        for out in tx['out']:
            outs.append({
                'out_id': i_out,
                'tx_index': out['tx_index'], 
                'n': out['n'], 
                'type': out['type'], 
                'spent': out['spent'], 
                'value': out['value'], 
                'addr': out.get('addr')
            })
            i_out += 1
            for outpoint in out['spending_outpoints']:
                spending_outpoints.append({
                    'out_id': i_out - 1,
                    'outpoint_tx_index': outpoint['tx_index']
                })
for out in outs:
    if out['addr'] is not None:
        addresses.append(out['addr'])
    

In [11]:
addresses_data = []
for i in range(0, len(addresses), 250):
    parte = addresses[i:i + 250]
    address = get_multi_address(parte)
    if address is not None:
        addresses_data.extend(address['addresses'])

In [12]:
df_transaction = pd.DataFrame(transactions)
df_transaction.to_csv("dataset/transactions.csv", sep=";")
df_transaction.head(5)

Unnamed: 0,hash,tx_index,time,fee,weight,vin_sz,vout_sz
0,9c1fb2beeffb56f38917867fc6726aaed2eafac7d5cc51...,2550502119100203,1713571854,0,1072,1,3
1,95bfa45afda983f3d6b9893a7e9200c12c4015bcdaffc3...,4625162177552666,1713571854,210100000,762,1,2
2,369bb55fe04fa333a53ab2d4071f255568f46899cb6d21...,944252477644004,1713518038,12874,1369,1,8
3,1535c7f76f92829d1add6058c131ac336da9700db168a5...,6026810733955871,1713517980,64593,6881,1,51
4,d90840c91c3639b7e414570df40dd6eb9060ad153d14a7...,3188892917292006,1713517979,64743,6897,1,51


In [13]:
df_out = pd.DataFrame(outs)
df_out.to_csv('dataset/out.csv', sep=";")
df_out.head(5)

Unnamed: 0,out_id,tx_index,n,type,spent,value,addr
0,0,2550502119100203,0,0,False,761084464,1HeXKmczG6MdYi6jfs3RYSyBayRBETcFh1
1,1,2550502119100203,1,0,False,0,
2,2,2550502119100203,2,0,False,0,
3,3,4625162177552666,0,0,False,1092,bc1phhvqk9mra3kvzfx90ryu60uny8m9rpsn2y3mjg0zqc...
4,4,4625162177552666,1,0,False,0,


In [19]:
df_input = pd.DataFrame(inputs)
df_input.to_csv('dataset/input.csv', sep=";")
df_input.head(5)

Unnamed: 0,tx_index,input_id,prev_out_tx_index,prev_out_n
0,2550502119100203,0,0,4294967295
1,4625162177552666,1,8294435896496132,0
2,944252477644004,2,7632255024439860,6
3,6026810733955871,3,4055999421930782,23
4,3188892917292006,4,7204602106021350,5


In [15]:
df_spending_outpoints = pd.DataFrame(spending_outpoints)
df_spending_outpoints.to_csv('dataset/spending_outpoints.csv', sep=';')
df_spending_outpoints

Unnamed: 0,out_id,outpoint_tx_index
0,156,7222372395294160
1,185,3232223852078352
2,301,8545641281170684
3,303,5605729482204423
4,305,3469760751891672
...,...,...
1867,19376,2462573382182806
1868,19377,5607187660665536
1869,19378,968837337358195
1870,19379,4823940106056562


In [16]:
df_address = pd.DataFrame(addresses_data)
df_address.to_csv('dataset/address.csv', sep=";")
df_address.head(5)

Unnamed: 0,address,final_balance,n_tx,total_received,total_sent
0,bc1q3huuvtw03pnn6d3ecp6ujfanfugcyr243uxjgf,15158249,114,23518166,8359917
1,3QpNsnamCopc4fTWEE9gWUyNYV7GxiGH8n,323414,5,1533236,1209822
2,bc1qpu3wqxdeaadhmj6lvqm86cfg530hqtqwx08c4n,43652579,131,122392956,78740377
3,bc1qkjag7v0xtu74sp50mz0a0c39wn3g5ncyuezjm6,509441,11,2426925,1917484
4,bc1qgxvt5smy0vfd0kwyvelkdyf5k9cclrm78pzhs3,519998,1,519998,0
