In [1]:
import pandas as pd
import os
import json
import hashlib
import base58
import bech32
import bech32m
import hashlib
import struct
from ecdsa import VerifyingKey, SECP256k1
from ecdsa.util import sigdecode_der

In [2]:
opcodes = {
    # Arithmetic
    "OP_0": "00",
    "OP_1": "51",
    "OP_2": "52",
    "OP_3": "53",
    "OP_4": "54",
    "OP_5": "55",
    "OP_6": "56",
    "OP_7": "57",
    "OP_8": "58",
    "OP_9": "59",
    "OP_10": "5a",
    "OP_11": "5b",
    "OP_12": "5c",
    "OP_13": "5d",
    "OP_14": "5e",
    "OP_15": "5f",
    "OP_16": "60",

    # Flow control
    "OP_IF": "63",
    "OP_NOTIF": "64",
    "OP_ELSE": "67",
    "OP_ENDIF": "68",
    "OP_VERIFY": "69",
    "OP_RETURN": "6a",

    # Stack
    "OP_TOALTSTACK": "6b",
    "OP_FROMALTSTACK": "6c",
    "OP_IFDUP": "73",
    "OP_DEPTH": "74",
    "OP_DROP": "75",
    "OP_DUP": "76",
    "OP_NIP": "77",
    "OP_OVER": "78",
    "OP_PICK": "79",
    "OP_ROLL": "7a",
    "OP_ROT": "7b",
    "OP_SWAP": "7c",
    "OP_TUCK": "7d",
    "OP_2DROP": "6d",
    "OP_2DUP": "6e",
    "OP_3DUP": "6f",
    "OP_2OVER": "70",
    "OP_2ROT": "71",
    "OP_2SWAP": "72",

    # Splice
    "OP_CAT": "7e",
    "OP_SUBSTR": "7f",
    "OP_LEFT": "80",
    "OP_RIGHT": "81",
    "OP_SIZE": "82",

    # Bitwise logic
    "OP_INVERT": "83",
    "OP_AND": "84",
    "OP_OR": "85",
    "OP_XOR": "86",
    "OP_EQUAL": "87",
    "OP_EQUALVERIFY": "88",

    # Numeric
    "OP_1ADD": "8b",
    "OP_1SUB": "8c",
    "OP_NEGATE": "8f",
    "OP_ABS": "90",
    "OP_NOT": "91",
    "OP_0NOTEQUAL": "92",
    "OP_ADD": "93",
    "OP_SUB": "94",
    "OP_MUL": "95",
    "OP_DIV": "96",
    "OP_MOD": "97",
    "OP_LSHIFT": "98",
    "OP_RSHIFT": "99",
    "OP_BOOLAND": "9a",
    "OP_BOOLOR": "9b",
    "OP_NUMEQUAL": "9c",
    "OP_NUMEQUALVERIFY": "9d",
    "OP_NUMNOTEQUAL": "9e",
    "OP_LESSTHAN": "9f",
    "OP_GREATERTHAN": "a0",
    "OP_LESSTHANOREQUAL": "a1",
    "OP_GREATERTHANOREQUAL": "a2",
    "OP_MIN": "a3",
    "OP_MAX": "a4",
    "OP_WITHIN": "a5",

    # Crypto
    "OP_RIPEMD160": "a6",
    "OP_SHA1": "a7",
    "OP_SHA256": "a8",
    "OP_HASH160": "a9",
    "OP_HASH256": "aa",
    "OP_CODESEPARATOR": "ab",
    "OP_CHECKSIG": "ac",
    "OP_CHECKSIGVERIFY": "ad",
    "OP_CHECKMULTISIG": "ae",
    "OP_CHECKMULTISIGVERIFY": "af",

    # Expansion
    "OP_NOP1": "b0",
    "OP_CHECKLOCKTIMEVERIFY": "b1",
    "OP_CHECKSEQUENCEVERIFY": "b2",
    "OP_NOP4": "b3",
    "OP_NOP5": "b4",
    "OP_NOP6": "b5",
    "OP_NOP7": "b6",
    "OP_NOP8": "b7",
    "OP_NOP9": "b8",
    "OP_NOP10": "b9"
}

# Add the ones commonly used in transaction types
transaction_opcodes = {
    "OP_DUP": opcodes["OP_DUP"],
    "OP_HASH160": opcodes["OP_HASH160"],
    "OP_EQUALVERIFY": opcodes["OP_EQUALVERIFY"],
    "OP_CHECKSIG": opcodes["OP_CHECKSIG"],
    "OP_EQUAL": opcodes["OP_EQUAL"],
    "OP_0": opcodes["OP_0"],
    "OP_PUSHBYTES_20": "14",  # Length of the following data
    "OP_PUSHBYTES_32": "20",
    "OP_PUSHBYTES_3": "03",
    "OP_PUSHBYTES_33":"21",
    "OP_PUSHBYTES_65":"65",
    "OP_PUSHBYTES_11" : '0b',
    "OP_PUSHNUM_1":'51',
     "OP_PUSHNUM_3":"53" # Length of the following data
}

opcodes.update(transaction_opcodes)

# print(opcodes)


In [3]:
def read_transactions():
    transactions = []
    for filename in os.listdir('mempool'):
        with open(os.path.join('mempool', filename), 'r') as file:
            transaction = json.load(file)
            transactions.append(transaction)
    return transactions

In [4]:
def hash_public_key(public_key_str,type = None):
    # Convert the public key string to bytes
    public_key_bytes = bytes.fromhex(public_key_str)
    
    # First, perform SHA256 hash on the public key
    sha256_hash = hashlib.sha256(public_key_bytes).digest()
    if type:
        return sha256_hash.hex()
    # Then, perform RIPEMD160 hash on the SHA256 hash
    else:
        ripemd160_hash = hashlib.new('ripemd160', sha256_hash).digest() 
        return ripemd160_hash.hex()

In [5]:
def convert_to_hex(data):
    # Convert the list of numbers to a bytes object
    bytes_data = bytes(data)
    
    # Convert the bytes to a hexadecimal string
    hex_string = bytes_data.hex()
    
    return hex_string

In [6]:
def decode_bech32(address,address_type):
    if address_type != 'v1_p2tr':     
        hrp, data = bech32.decode('bc',address)
    else:
        hrp, data = bech32m.decode('bc',address)
    return hrp, convert_to_hex(data)

In [7]:
def remove_base58check(address):
    # Decode the Base58 encoded address
    decoded_address = base58.b58decode(address)
    
    # Remove the last 4 bytes (checksum)
    address_without_checksum = decoded_address[:-4]
    
    # Convert the result back to bytes
    address_bytes = bytes(address_without_checksum)
    
    # Convert bytes to hexadecimal string
    hex_string = address_bytes.hex()
    
    return hex_string


In [8]:
def compute_script_length(script_hex):
    # Remove any leading "OP_" prefixes if present
    script_hex = script_hex.replace("OP_", "")
    # Convert the hexadecimal script to bytes
    script_bytes = bytes.fromhex(script_hex)
    # The length of the script is the length of the bytes
    script_length = len(script_bytes)
    return hex(script_length)


In [9]:
def compute_transaction_hash(version, input_count, previous_transaction_hash, previous_transaction_index, script_sig_length, script_sig, sequence, output_count, output, locktime):
    # Convert version, input_count, output_count, and locktime to little-endian hexadecimal strings
    version_hex = version.to_bytes(4, byteorder='little').hex()
    input_count_hex = input_count.to_bytes(1, byteorder='little').hex()
    output_count_hex = output_count.to_bytes(1, byteorder='little').hex()
    locktime_hex = locktime.to_bytes(4, byteorder='little').hex()
    sequence = sequence.to_bytes(4, byteorder='little').hex()

    # Reverse the byte order of the previous transaction hash
    previous_transaction_hash_reversed = bytes.fromhex(previous_transaction_hash)[::-1].hex()

    # Convert previous_transaction_index, script_sig_length, output1_value, output1_script_length, output2_value, output2_script_length to little-endian hexadecimal strings
    previous_transaction_index_hex = previous_transaction_index.to_bytes(4, byteorder='little').hex()
    # script_sig_length_hex = script_sig_length.to_bytes(1, byteorder='little').hex()
    
    tx_hex = (
        version_hex +
        input_count_hex +
        previous_transaction_hash_reversed +
        previous_transaction_index_hex +
        script_sig_length +
        script_sig +
        sequence +
        output_count_hex +
        output+
        locktime_hex
    )
    sighash_hex = "{:08x}".format(int('1000000', 16))
    tx_hex = tx_hex + sighash_hex
    # Double SHA256 hash the concatenated transaction
    tx_hash = hashlib.sha256(bytes.fromhex(tx_hex)).digest()

    # Reverse the byte order to get the transaction hash
    # transaction_hash = tx_hash[::-1].hex()
    # transaction_hash += '01000000'

    return tx_hash.hex()


In [10]:
def parse_element(hex_str, offset, element_size):
    """
    :param hex_str: string to parse the element from.
    :type hex_str: hex str
    :param offset: initial position of the object inside the hex_str.
    :type offset: int
    :param element_size: size of the element to extract.
    :type element_size: int
    :return: The extracted element from the provided string, and the updated offset after extracting it.
    :rtype tuple(str, int)
    """

    return hex_str[offset:offset+element_size], offset+element_size

def dissect_signature(hex_sig):
    """
    Extracts the r, s and ht components from a Bitcoin ECDSA signature.
    :param hex_sig: Signature in  hex format.
    :type hex_sig: hex str
    :return: r, s, t as a tuple.
    :rtype: tuple(str, str, str)
    """

    offset = 0
    # Check the sig contains at least the size and sequence marker
    assert len(hex_sig) > 4, "Wrong signature format."
    sequence, offset = parse_element(hex_sig, offset, 2)
    # Check sequence marker is correct
    assert sequence == '30', "Wrong sequence marker."
    signature_length, offset = parse_element(hex_sig, offset, 2)
    # Check the length of the remaining part matches the length of the signature + the length of the hashflag (1 byte)
    assert len(hex_sig[offset:])/2 == int(signature_length, 16) + 1, "Wrong length."
    # Get r
    marker, offset = parse_element(hex_sig, offset, 2)
    assert marker == '02', "Wrong r marker."
    len_r, offset = parse_element(hex_sig, offset, 2)
    len_r_int = int(len_r, 16) * 2   # Each byte represents 2 characters
    r, offset = parse_element(hex_sig, offset, len_r_int)
    # Get s
    marker, offset = parse_element(hex_sig, offset, 2)
    assert marker == '02', "Wrong s marker."
    len_s, offset = parse_element(hex_sig, offset, 2)
    len_s_int = int(len_s, 16) * 2  # Each byte represents 2 characters
    s, offset = parse_element(hex_sig, offset, len_s_int)
    # Get ht
    ht, offset = parse_element(hex_sig, offset, 2)
    assert offset == len(hex_sig), "Wrong parsing."

    return r, s, ht


In [11]:
def verify_signature(public_key_hex, r,s,message):
    # Convert public key and signature from hexadecimal strings
    # public_key_bytes = bytes.fromhex(public_key_hex)
    vk = VerifyingKey.from_string(bytes.fromhex(public_key_hex), curve=SECP256k1)
    # signature_bytes = bytes.fromhex(signature_der)
    # order = SECP256k1.order

    # # Decode the DER-encoded signature
    # r,s = sigdecode_der(signature_bytes[1:],order=order)  # Remove first byte (signature type)

    # Create a verifying key from the public key
    # verifying_key = VerifyingKey.from_string(public_key_bytes, curve=SECP256k1)
    # Concatenate r and s into a single byte string
    signature = r+s
    # vk.verify(bytes.fromhex(signature), bytes.fromhex(message), hashlib.sha256)

# Create a verifying key from the public key
    # verifying_key = VerifyingKey.from_string(public_key_bytes, curve=SECP256k1)
    # message = bytes.fromhex('4eac7bd28c274c5bed25333cbef3a221c975cbff8f5979c7ca7dc4fef8ad2ab4')

# Verify the signature
    return vk.verify(bytes.fromhex(signature), bytes.fromhex(message), hashlib.sha256)
    # return signature

    # Verify the signature
    # return verifying_key.verify(signature, b'')


In [12]:
transactions = read_transactions()

In [13]:
len(transactions)

8131

In [14]:
STXO = pd.DataFrame(columns=['block_id','version','locktime', 'input_count','txid', 'input index','scriptpubkey', 'scriptpubkey_asm', 'scriptpubkey_type',
                           'scriptpubkey_address', 'value', 'scriptsig', 'scriptsig_asm', 'witness',
                           'is_coinbase', 'sequence'])


UTXO = pd.DataFrame(columns=['scriptpubkey','scriptpubkey_asm','scriptpubkey_type','scriptpubkey_address','value'])
dfs = []
dfs_ = []
# Iterate through blocks
for block_id, block in enumerate(transactions):
    version = block['version']
    locktime = block['locktime']
    # Iterate through transactions in the block
    for transaction in block['vin']:
        try:
        # Access input transaction information
            input_count = len(block['vin'])
            txid = transaction['txid']
            input_index = transaction['vout']
            scriptpubkey = transaction['prevout']['scriptpubkey']
            scriptpubkey_asm = transaction['prevout']['scriptpubkey_asm']
            scriptpubkey_type = transaction['prevout']['scriptpubkey_type']
            scriptpubkey_address = transaction['prevout']['scriptpubkey_address']
            value = transaction['prevout']['value']
            scriptsig = transaction['scriptsig']
            scriptsig_asm = transaction['scriptsig_asm']
            witness = transaction['witness']
            is_coinbase = transaction['is_coinbase']
            sequence = transaction['sequence']
        except KeyError:
            input_count = len(block['vin'])
            txid = transaction['txid']
            input_index = transaction['vout']
            scriptpubkey = transaction['prevout']['scriptpubkey']
            scriptpubkey_asm = transaction['prevout']['scriptpubkey_asm']
            scriptpubkey_type = transaction['prevout']['scriptpubkey_type']
            scriptpubkey_address = transaction['prevout']['scriptpubkey_address']
            value = transaction['prevout']['value']
            scriptsig = transaction['scriptsig']
            scriptsig_asm = transaction['scriptsig_asm']
#             witness = transaction['witness']
            is_coinbase = transaction['is_coinbase']
            sequence = transaction['sequence']
        # Create DataFrame for current transaction
        df = pd.DataFrame({
            'block_id': [block_id],
            'input_count' : input_count,
            'txid': [txid],
            'version':[version],
            'locktime':[locktime],
            'input index': [input_index],
            'scriptpubkey': [scriptpubkey],
            'scriptpubkey_asm': [scriptpubkey_asm],
            'scriptpubkey_type': [scriptpubkey_type],
            'scriptpubkey_address': [scriptpubkey_address],
            'value': [value],
            'scriptsig': [scriptsig],
            'scriptsig_asm': [scriptsig_asm],
            'witness': [witness],
            'is_coinbase': [is_coinbase],
            'sequence': [sequence]
        })
        
        # Append DataFrame to list
        dfs.append(df)
    for transaction in block['vout']:
        try:
        # Access input transaction information
            scriptpubkey = transaction['scriptpubkey']
            scriptpubkey_asm = transaction['scriptpubkey_asm']
            scriptpubkey_type = transaction['scriptpubkey_type']
            scriptpubkey_address = transaction['scriptpubkey_address']
            value = transaction['value']
        except:
            scriptpubkey_asm = transaction['scriptpubkey_asm']
            scriptpubkey_type = transaction['scriptpubkey_type']
            scriptpubkey_address = ''
            value = transaction['value']
        df_ = pd.DataFrame({
            'block_id': [block_id],
            'scriptpubkey': [scriptpubkey],
            'scriptpubkey_asm': [scriptpubkey_asm],
            'scriptpubkey_type': [scriptpubkey_type],
            'scriptpubkey_address': [scriptpubkey_address],
            'value': [value]
        })
        dfs_.append(df_)

# Concatenate DataFrames
STXO = pd.concat(dfs, ignore_index=True)
UTXO = pd.concat(dfs_, ignore_index=True)

In [15]:
STXO['scriptpubkey_type'].unique()

array(['v0_p2wpkh', 'v1_p2tr', 'p2pkh', 'v0_p2wsh', 'p2sh'], dtype=object)

In [16]:
UTXO['scriptpubkey_type'].unique()

array(['p2sh', 'v0_p2wpkh', 'v1_p2tr', 'p2pkh', 'op_return', 'v0_p2wsh',
       'unknown'], dtype=object)

In [17]:
UTXO[UTXO['scriptpubkey_type'] == 'unknown'] 

Unnamed: 0,block_id,scriptpubkey,scriptpubkey_asm,scriptpubkey_type,scriptpubkey_address,value
1065,371,512102d51fcf29e1d910875dc00e66442c3051be637720...,OP_PUSHNUM_1 OP_PUSHBYTES_33 02d51fcf29e1d9108...,unknown,,796
1066,371,51210236d6529cc88f251f55539077761b3e2f491a6ab5...,OP_PUSHNUM_1 OP_PUSHBYTES_33 0236d6529cc88f251...,unknown,,796
1804,697,51210254de7a5999477d61249bf62e5e628b868508a32a...,OP_PUSHNUM_1 OP_PUSHBYTES_33 0254de7a5999477d6...,unknown,,796
1805,697,51210271cf3589a4ff65dd3daa10e1c16bb573eafa4687...,OP_PUSHNUM_1 OP_PUSHBYTES_33 0271cf3589a4ff65d...,unknown,,796
1988,803,512102061dcf3fe609ebbff2e1da31db1f5532d3543022...,OP_PUSHNUM_1 OP_PUSHBYTES_33 02061dcf3fe609ebb...,unknown,,796
...,...,...,...,...,...,...
19401,7908,51210293f7a6930a7fbdff0b1c801d2bd5304d8bbc3c26...,OP_PUSHNUM_1 OP_PUSHBYTES_33 0293f7a6930a7fbdf...,unknown,,801
19535,7977,512103a97463b69dd7dd30709527bbe720ae98a841f83b...,OP_PUSHNUM_1 OP_PUSHBYTES_33 03a97463b69dd7dd3...,unknown,,790
19536,7977,51210317718e52f86974af770a62ed0a9f127cce93c0c5...,OP_PUSHNUM_1 OP_PUSHBYTES_33 0317718e52f86974a...,unknown,,790
19685,8038,512102c35542d3fd76a8aa9c8dbe145c4b0126785446fe...,OP_PUSHNUM_1 OP_PUSHBYTES_33 02c35542d3fd76a8a...,unknown,,801


In [18]:
op_return_indices = UTXO[UTXO['scriptpubkey_type'] == 'op_return'].index.to_list()
unknown_indices = UTXO[UTXO['scriptpubkey_type'] == 'unknown'].index.to_list()

indices_to_drop = op_return_indices + unknown_indices
UTXO = UTXO.drop(indices_to_drop).reset_index(drop=True)

In [19]:
STXO = STXO.drop_duplicates(subset='txid',keep='first').reset_index().drop('index',axis=1)

In [20]:
input_output = pd.merge(STXO.groupby(by='block_id')['value'].sum(),UTXO.groupby(by='block_id')['value'].sum(),on='block_id',how='inner').rename(columns={'value_x':'input','value_y':'output'}).reset_index()

In [21]:
input_output['difference'] = input_output['input'] - input_output['output']

In [22]:
input_output[input_output['difference']<0]

Unnamed: 0,block_id,input,output,difference
83,90,245041368,620001244,-374959876
91,99,1146,482350,-481204
127,140,1229712,1529720,-300008
142,162,10600,669110,-658510
204,250,285783098,287363535,-1580437
...,...,...,...,...
5492,8012,88883574,99963000,-11079426
5495,8015,32276,5306606,-5274330
5496,8017,53526,171435,-117909
5552,8098,27558760,36568813,-9010053


In [100]:
STXO = pd.merge(input_output[['block_id','difference']],STXO,on='block_id',how='right')

In [23]:
index_to_remove = []
for i,v in STXO.iterrows():
    if v['block_id'] in input_output[input_output['difference']<=0]['block_id'].to_list():
        index_to_remove.append(i)
STXO = STXO.drop(index_to_remove).reset_index().drop('index',axis=1)

In [25]:
index_to_remove = []
for i,v in UTXO.iterrows():
    if v['block_id'] in input_output[input_output['difference']<=0]['block_id'].to_list():
        index_to_remove.append(i)
UTXO = UTXO.drop(index_to_remove).reset_index().drop('index',axis=1)

In [26]:
value_by_add = UTXO.groupby(by='scriptpubkey_address')['value'].agg(list). reset_index()

In [27]:
for i, v in STXO.iterrows():
    value_series = value_by_add[value_by_add['scriptpubkey_address'] == v['scriptpubkey_address']]['value']
    # print(v['scriptpubkey_address'])
    try:
        value_index = value_by_add[value_by_add['scriptpubkey_address'] == v['scriptpubkey_address']].index[0]
    except:
        pass
    if not value_series.empty:  # Check if the series is not empty
        value_list = value_series.iloc[0]
        if v['value'] in value_list:
            STXO.loc[i, 'First check'] = 1
                # Remove the value from the list
            value_list.remove(v['value'])
                # Update the DataFrame with the modified list
            value_by_add.at[value_index, 'value'] = value_list
        else:
            STXO.loc[i, 'First check'] = 0
    else:
        STXO.loc[i, 'First check'] = 0

In [28]:
invalid_block = list(set(STXO[STXO['First check']==0]['block_id']))

In [30]:
STXO = STXO.drop(STXO[STXO['First check'] == 0].index)

In [31]:
STXO = STXO.reset_index().drop('index',axis=1)

In [32]:
index_to_remove = []
for i,v in UTXO.iterrows():
    if v['block_id'] in invalid_block:
        index_to_remove.append(i)
UTXO = UTXO.drop(index_to_remove).reset_index().drop('index',axis=1)    

In [33]:
for i,v in STXO.iterrows():
    commands = v['scriptpubkey_asm'].split(' ')
    script_ = ''
    for cmd in commands:
        try:
            script_ += opcodes[cmd]
        except KeyError:
            script_ += cmd
    # print(script_)
    script_ = compute_script_length(script_)+script_
    STXO.loc[i,'script_'] = script_
    if v['scriptpubkey_type'] == 'p2pkh':
        STXO.loc[i,'hashed key'] = remove_base58check(v['scriptpubkey_address'])
        STXO.loc[i, 'compressed public key'] = v['scriptsig'][-66:]
        STXO.loc[i, 'signature'] = v['scriptsig'][:-66]
        try:
            # print(v['scriptpubkey_asm'])
            hashed_add = v['scriptpubkey_asm'].split(' ')[3]
            # print(hashed_add,v['hashed key'][2:])
            if hashed_add == v['hashed key'][2:]:
                STXO.loc[i, 'Second check'] = 1
            else:
                STXO.loc[i, 'Second check'] = 0
        except:
            pass
    elif v['scriptpubkey_type'] == 'p2sh':
        # print(len(v['witness']))
        STXO.loc[i,'hashed key'] = remove_base58check(v['scriptpubkey_address'])
        for j in v['witness']:
            if len(j) == 66:
                # print('seen')
                STXO.loc[i, 'compressed public key'] = j
            elif len(j)==142 or len(j)==144:
                # print('seen sig')
                STXO.loc[i, 'signature'] = j
        try:
            # print(v['scriptpubkey_asm'])
            hashed_add = v['scriptpubkey_asm'].split(' ')[2]
            # print(hashed_add,v['hashed key'])
            if hashed_add == v['hashed key'][2:]:
                STXO.loc[i, 'Second check'] = 1
            else:
                STXO.loc[i, 'Second check'] = 0
        except:
            pass
    elif v['scriptpubkey_type'] == 'v0_p2wpkh':
        if len(v['scriptpubkey'][4:]) == 40:
            STXO.loc[i,'hashed key'] = decode_bech32(v['scriptpubkey_address'],'other')[1]
            if len(v['witness']) == 2:
                for j in v['witness']:
                    if len(j) == 66:
                        STXO.loc[i,'compressed public key'] = j
                    elif len(j) == 142 or len(j)==144:
                        STXO.loc[i,'signature'] = j
            else:
                STXO.loc[i,'Third check'] = 'Unvalidated'
            try:
                # print(v['scriptpubkey_asm'])
                hashed_add = v['scriptpubkey_asm'].split(' ')[2]
                # print(hashed_add,v['hashed key'])
                if hashed_add == v['hashed key']:
                    STXO.loc[i, 'Second check'] = 1
                else:
                    STXO.loc[i, 'Second check'] = 0
            except:
                pass
        else:
            STXO.loc[i,'Third check'] = 'Unvalidated'
    elif v['scriptpubkey_type'] =='v0_p2wsh' :
        if len(v['scriptpubkey'][4:]) == 64:
            STXO.loc[i,'hashed key'] = decode_bech32(v['scriptpubkey_address'],'other')[1]
            # print(len(v['witness']))
            STXO.loc[i, 'compressed public key'] = v['witness'][-1] #last item in the witness is the redeem script (or compressed public key)
            STXO.at[i, 'signature'] = v['witness'][1:3] #first items are witness items which will be taken as signatures
            # print(v['signature'])
            # del v['signature'][-1]
            try:
                # print(v['scriptpubkey_asm'])
                hashed_add = v['scriptpubkey_asm'].split(' ')[2]
                # print(hashed_add,v['hashed key'])
                if hashed_add == v['hashed key']:
                    STXO.loc[i, 'Second check'] = 1
                else:
                    STXO.loc[i, 'Second check'] = 0
            except:
                pass
        else:
            STXO.loc[i,'Second check'] = 'Unvalidated'
    elif v['scriptpubkey_type'] == 'v1_p2tr':
        STXO.loc[i,'hashed key'] = decode_bech32(v['scriptpubkey_address'],'v1_p2tr')[1]
        try:
            # print(v['scriptpubkey_asm'])
            hashed_add = v['scriptpubkey_asm'].split(' ')[2]
            # print(hashed_add,v['hashed key'])
            if hashed_add == v['hashed key']:
                STXO.loc[i, 'Second check'] = 1
            else:
                STXO.loc[i, 'Second check'] = 0
        except:
            pass
    

In [42]:
invalid_blocks = list((STXO[(STXO['locktime'] > 850000) & (STXO['sequence'] == 4294967295)]['block_id']))
STXO.drop(STXO[(STXO['locktime'] > 850000) & (STXO['sequence'] == 4294967295)].index, inplace=True)
for i in invalid_blocks:
    UTXO.drop(UTXO[UTXO['block_id'] == i].index, inplace=True)

STXO = STXO.reset_index(drop=True)
UTXO = UTXO.reset_index(drop=True)


In [67]:
# op_codes = {'OP_0':'00','0P_1':'51','OP_2-OP_16':'52-60',''}
tx_hash = pd.DataFrame(columns= ['blockid','version','input_count','sequence','locktime','prev_trans_hash','prev_trans_index','script_sig_length','script_sig',
                                          'output_count','output_value','output_scriptlen','outputscriptpubkey'])

dfs = []

for i, v in STXO.iterrows():
    block_id = v['block_id']
    version = v['version']
    input_count = v['input_count']
    sequence = v['sequence']
    locktime = v['locktime']
    prev_trans_index = v['input index']
    prev_trans_hash = v['txid']
    script_sig_length = v['script_'][2:4]
    script_sig = v['script_'][4:]
    output_count = len(UTXO[UTXO['block_id'] == v['block_id']])
    output_values = []
    output_script_len = []
    output_script_pubkey = []
    for m, n in UTXO[UTXO['block_id'] == v['block_id']].iterrows():
        output_values.append(n['value'])
        commands = n['scriptpubkey_asm'].split(' ')
        script_ = ''
        for cmd in commands:
            try:
                script_ += opcodes[cmd]
            except KeyError:
                script_ += cmd
        # print(script_)
        output_script_len.append(compute_script_length(script_))
        output_script_pubkey.append(script_)
    # Create a DataFrame for the current transaction
    temp = pd.DataFrame({
        'block_id': [block_id],
        'version': [version],
        'input_count': [input_count],
        'sequence': [sequence],
        'locktime': [locktime],
        'prev_trans_hash': [prev_trans_hash],
        'prev_trans_index': [prev_trans_index],
        'script_sig_length': [script_sig_length],
        'script_sig': [script_sig],
        'output_count': [output_count],
        'output_values': [output_values],  # Store the list of output values
        'output_script_length': [output_script_len],  # Store the list of output script lengths
        'output_script_pubkey': [output_script_pubkey]  # Store the list of output script public keys
    })
    dfs.append(temp)

# Concatenate all DataFrames
tx_hash = pd.concat(dfs, ignore_index=True)


In [140]:
int('0x16',16).to_bytes(1, byteorder='little').hex()

'16'

In [141]:
for i, v in tx_hash.iterrows():
    version = int(v['version'])
    sequence = int(v['sequence'])
    input_count = int(v['input_count'])
    previous_transaction_hash = v['prev_trans_hash']
    previous_transaction_index = int(v['prev_trans_index'])
    script_sig_length = v['script_sig_length']
    script_sig = v['script_sig']
    output_count = int(v['output_count'])
    output_ = ''

    if output_count > 1:
        for value, script_length, output_script_pubkey in zip(v['output_values'],
                                                              v['output_script_length'],
                                                              v['output_script_pubkey']):
            value_hex = value.to_bytes(8, byteorder='little').hex()
            script_length = int(script_length, 16)  # Assuming script length is in hexadecimal format
            output_ += value_hex + script_length.to_bytes(2, byteorder='little').hex() + output_script_pubkey

    elif output_count == 1:
        value_hex = v['output_values'][0].to_bytes(8, byteorder='little').hex()
        script_length = int(v['output_script_length'][0], 16)
        output_script_pubkey = v['output_script_pubkey'][0]
        output_ += value_hex + script_length.to_bytes(2, byteorder='little').hex() + output_script_pubkey

    tx_hash.at[i, 'message'] = compute_transaction_hash(version, input_count, previous_transaction_hash,
                                                         previous_transaction_index, script_sig_length,
                                                         script_sig, sequence, output_count, output_, locktime)



In [142]:
tx_hash[tx_hash['block_id']==458]

Unnamed: 0,block_id,version,input_count,sequence,locktime,prev_trans_hash,prev_trans_index,script_sig_length,script_sig,output_count,output_values,output_script_length,output_script_pubkey,message
97,458,2,1,4294967295,0,6ce03f19092ded9a1832c68ce71e95bf15dc2c731975b9...,1,17,a914a3fc332779fb1a75d21da87b8cb6bc4fd383156987,2,"[7704, 797447]","[0x22, 0x17]",[5120717ac9d5fa00c7480ece7587cf2ae579eccbd3228...,03db3bc733dfffe05473db3fef20c9a969fcb375d8af32...


In [154]:
message = tx_hash[tx_hash['block_id']==458]['message'].iloc[0]

In [151]:
public_key = STXO[STXO['block_id']==458]['compressed public key'].iloc[0]

In [158]:
sig = STXO[STXO['block_id']==458]['signature'].iloc[0]

In [146]:
r,s,ht = dissect_signature('30450221008b64dc10d4a1770642cc4c30c1fdceca3f820ddadfcfbf0310cffc9b9c8d554b0220562a9d9b1ac7a250b33afab4301f764ffc18cf6813b43f3e677e50632aeebf9801')

In [132]:
r=r[1:]

In [147]:
verify_signature('020dae73cd67ca6318de16e3f51e1713519d4e0ceee0544b11cb25932486f7d5b3',r,s,'03db3bc733dfffe05473db3fef20c9a969fcb375d8af32c4b32b16cbb74950dd')

BadSignatureError: ('Malformed formatting of signature', MalformedSignature('Invalid length of signature, expected 64 bytes long, provided string is 65 bytes long'))

In [160]:
from ecdsa import VerifyingKey, SECP256k1
from ecdsa.util import sigdecode_der
import binascii

# Assuming signature is in hexadecimal format
signature_hex = "30450221008b64dc10d4a1770642cc4c30c1fdceca3f820ddadfcfbf0310cffc9b9c8d554b0220562a9d9b1ac7a250b33afab4301f764ffc18cf6813b43f3e677e50632aeebf98"

# Convert hexadecimal signature to bytes
signature_bytes = binascii.unhexlify(signature_hex)

# Decode DER signature into (r, s) components
r, s = sigdecode_der(signature_bytes, SECP256k1.order)

# Perform signature verification with (r, s) components
vk = VerifyingKey.from_string(binascii.unhexlify(public_key), curve=SECP256k1)
vk.verify_digest(signature=binascii.unhexlify(sig[:-2]), digest=binascii.unhexlify(message), sigdecode=sigdecode_der)


BadSignatureError: Signature verification failed

In [157]:
binascii.unhexlify(message)

b'\x03\xdb;\xc73\xdf\xff\xe0Ts\xdb?\xef \xc9\xa9i\xfc\xb3u\xd8\xaf2\xc4\xb3+\x16\xcb\xb7IP\xdd'

In [46]:
STXO.

Unnamed: 0,index,block_id,input_count,txid,version,locktime,input index,scriptpubkey,scriptpubkey_asm,scriptpubkey_type,...,scriptsig,scriptsig_asm,witness,is_coinbase,sequence,First check,script_,hashed key,signature,compressed public key
0,0,2,1,4bc8a6bbd9f01b7c20fb59adc81352ff087ff7dbc52efb...,2,834637,1,00145017ec1e0f2e6fd64561aa072931bcd261797559,OP_0 OP_PUSHBYTES_20 5017ec1e0f2e6fd64561aa072...,v0_p2wpkh,...,,,[304402206cb268614ab72910e5a975893e7cd2cb84aa5...,False,4294967294,1.0,0x1600145017ec1e0f2e6fd64561aa072931bcd261797559,5017ec1e0f2e6fd64561aa072931bcd261797559,304402206cb268614ab72910e5a975893e7cd2cb84aa58...,030b512819670cf864aa7605c7bfb32e37002db4f30c9d...
1,1,4,2,e3acb27a9fb6593822a4d8bbb8be9f9516e33123f196a4...,2,0,0,5120d7b0161160dc8ce46dfbf55c602ffeef5ed4ebcade...,OP_PUSHNUM_1 OP_PUSHBYTES_32 d7b0161160dc8ce46...,v1_p2tr,...,,,[265f6c3128efad948b8e1d11e1cd165c457e9e5da32f0...,False,2147483649,1.0,0x225120d7b0161160dc8ce46dfbf55c602ffeef5ed4eb...,d7b0161160dc8ce46dfbf55c602ffeef5ed4ebcadebbe0...,,
2,2,5,1,a74026e81488bc8ed65781b5904edb3fd401d40e8892fa...,2,0,7,51202691567e31e951fc72a28ebace6fd5ab716dd455e7...,OP_PUSHNUM_1 OP_PUSHBYTES_32 2691567e31e951fc7...,v1_p2tr,...,,,[937d2bbbde729857003c7c48bec4613141e42b8a12c8e...,False,4294967293,1.0,0x2251202691567e31e951fc72a28ebace6fd5ab716dd4...,2691567e31e951fc72a28ebace6fd5ab716dd455e7b150...,,
3,3,12,2,1a516bec4ff0b77e462dbb992276c2ba4e659c0d2a7960...,2,0,3,51202d071a3b480cd9b47db94593e4592074b81d5edaec...,OP_PUSHNUM_1 OP_PUSHBYTES_32 2d071a3b480cd9b47...,v1_p2tr,...,,,[e89cc97fe80467ecb83e99d686fd151acd2c3cdc512d4...,False,4294967295,1.0,0x2251202d071a3b480cd9b47db94593e4592074b81d5e...,2d071a3b480cd9b47db94593e4592074b81d5edaec2e00...,,
4,4,13,1,061b62a9ef7e4c42385f0de28dd4a3eb00be4754db440a...,2,0,18,5120d4456f43aa466cab7004b3c799078f1b5d1bba79c1...,OP_PUSHNUM_1 OP_PUSHBYTES_32 d4456f43aa466cab7...,v1_p2tr,...,,,[00c2dd07d56c3991f5b091308d7a512ad8bab1f00984a...,False,4294967293,1.0,0x225120d4456f43aa466cab7004b3c799078f1b5d1bba...,d4456f43aa466cab7004b3c799078f1b5d1bba79c154f9...,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1058,1059,8077,1,b99984400edbfa07ec94e18c151a03c3a727371a7be544...,1,0,0,512093ccdef9aa2328c4f9c5a20b590c97b9907af53f85...,OP_PUSHNUM_1 OP_PUSHBYTES_32 93ccdef9aa2328c4f...,v1_p2tr,...,,,[52bcc9ab208f84b29f90478446211028de8cfd9103861...,False,4294967293,1.0,0x22512093ccdef9aa2328c4f9c5a20b590c97b9907af5...,93ccdef9aa2328c4f9c5a20b590c97b9907af53f852e45...,,
1059,1060,8078,2,5481b0ab2e98b5864656863ea85f495466f17ae3e99221...,2,0,1,5120e845492d26e9d4515d59b57ee41402b7f536b7ea5e...,OP_PUSHNUM_1 OP_PUSHBYTES_32 e845492d26e9d4515...,v1_p2tr,...,,,[75e370cceeef73f044e0ac229b60848a7b395d282c2f7...,False,2147483649,1.0,0x225120e845492d26e9d4515d59b57ee41402b7f536b7...,e845492d26e9d4515d59b57ee41402b7f536b7ea5ebabc...,,
1060,1061,8084,1,897967a478729fd11f19fe5d8301dfb66eb32bb8687c13...,1,0,1,76a914b5be080fa5deb3238348ac6aa0240d574b564144...,OP_DUP OP_HASH160 OP_PUSHBYTES_20 b5be080fa5de...,p2pkh,...,483045022100bd718ba3d17788bd065c844a1d90b247f3...,OP_PUSHBYTES_72 3045022100bd718ba3d17788bd065c...,[72bbfc1b951cbdba98c1c0fde99fca1f122ebfc897cd1...,False,4294967293,1.0,0x1976a914b5be080fa5deb3238348ac6aa0240d574b56...,00b5be080fa5deb3238348ac6aa0240d574b564144,483045022100bd718ba3d17788bd065c844a1d90b247f3...,03b5fa04d793ed42510aa5e5a8583d11c54490cbba6aa7...
1061,1062,8102,1,a8f76e26aa307bbbff3c70927cee68fa7e9311a28a52c0...,2,0,0,5120372e205b858f2a752723cedda5426a54eb129f67a9...,OP_PUSHNUM_1 OP_PUSHBYTES_32 372e205b858f2a752...,v1_p2tr,...,,,[d9bfee29d548cf6b3d21a5d7dd18fdd14cbbf3dbc7b1d...,False,4294967293,1.0,0x225120372e205b858f2a752723cedda5426a54eb129f...,372e205b858f2a752723cedda5426a54eb129f67a9ea05...,,


In [101]:
STXO[STXO['scriptpubkey_type']=='p2sh']

Unnamed: 0,block_id,input_count,txid,version,locktime,input index,scriptpubkey,scriptpubkey_asm,scriptpubkey_type,scriptpubkey_address,...,scriptsig,scriptsig_asm,witness,is_coinbase,sequence,First check,script_,hashed key,signature,compressed public key
40,185,9,19e05e98b13855cd42eb19f67731b24fc28d793a4a36a9...,1,0,0,a9142a3fe0d35b3a2ee1a3cb38dd732cd3e1ac484fa587,OP_HASH160 OP_PUSHBYTES_20 2a3fe0d35b3a2ee1a3c...,p2sh,35YQpwL4QGsr9oGueDGhhV7MuNrcU1oBHF,...,16001425b6e5f856ec32e9a6d8eb9e9445f33b2e6567e9,OP_PUSHBYTES_22 001425b6e5f856ec32e9a6d8eb9e94...,[304402202e15fd425fe1667231afe0c4c2c6f1a4dd531...,False,4294967293,1.0,0x17a9142a3fe0d35b3a2ee1a3cb38dd732cd3e1ac484f...,052a3fe0d35b3a2ee1a3cb38dd732cd3e1ac484fa5,304402202e15fd425fe1667231afe0c4c2c6f1a4dd531a...,032d913cb626ba372fdd28354c63d7886a481db5c51c2c...
81,378,1,26add75d9ce9fc37214345e3239dc2cbd5bfa249b2848a...,2,834458,0,a914b6bb3d1ec1f6610ba14865e73436b5e139fb385187,OP_HASH160 OP_PUSHBYTES_20 b6bb3d1ec1f6610ba14...,p2sh,3JMDD2tTHi8PSnqZ4Py9pQ89WzJMWrCz5r,...,160014f62420cb38636c38450ec0b3525f54e21f040c3f,OP_PUSHBYTES_22 0014f62420cb38636c38450ec0b352...,[304402200145213cacd9de8335be935688cc7e4e5003d...,False,4294967293,1.0,0x17a914b6bb3d1ec1f6610ba14865e73436b5e139fb38...,05b6bb3d1ec1f6610ba14865e73436b5e139fb3851,304402200145213cacd9de8335be935688cc7e4e5003d4...,02959aad959e4d4a101ff592d85fba749cd0c5dd5f8bf5...
97,458,1,6ce03f19092ded9a1832c68ce71e95bf15dc2c731975b9...,2,0,1,a914a3fc332779fb1a75d21da87b8cb6bc4fd383156987,OP_HASH160 OP_PUSHBYTES_20 a3fc332779fb1a75d21...,p2sh,3Ge6BxoQ1FM4mP9LBddjGRLzWDuYWUArPu,...,1600149739ce7cc7adb754d8bb6822bd791c348919af04,OP_PUSHBYTES_22 00149739ce7cc7adb754d8bb6822bd...,[30450221008b64dc10d4a1770642cc4c30c1fdceca3f8...,False,4294967295,1.0,0x17a914a3fc332779fb1a75d21da87b8cb6bc4fd38315...,05a3fc332779fb1a75d21da87b8cb6bc4fd3831569,30450221008b64dc10d4a1770642cc4c30c1fdceca3f82...,020dae73cd67ca6318de16e3f51e1713519d4e0ceee054...
119,616,4,2a91a4c02433af6c9d3732d34f6a15ac81914e239b1486...,1,0,0,a914b7d9b72282ff96d0d6c63f5d3854975a9618350387,OP_HASH160 OP_PUSHBYTES_20 b7d9b72282ff96d0d6c...,p2sh,3JT8Pn4wQcahWYgnpgW1Sm7GgFy7gwxoh9,...,1600140e3ef8100ca01ef28d98ebad780a10b7137305b0,OP_PUSHBYTES_22 00140e3ef8100ca01ef28d98ebad78...,[3045022100dfad96761e61e9a64d13998723b91a18a5b...,False,4294967293,1.0,0x17a914b7d9b72282ff96d0d6c63f5d3854975a961835...,05b7d9b72282ff96d0d6c63f5d3854975a96183503,3045022100dfad96761e61e9a64d13998723b91a18a5ba...,02451c99c8e702807dff9b3b7a463871b13f79ea0bb837...
120,616,4,be8a6513863d30f5d889dfd149846de59363620005d9a8...,1,0,0,a914bbb72a93d55756dbf332b67a15858a7e649752e387,OP_HASH160 OP_PUSHBYTES_20 bbb72a93d55756dbf33...,p2sh,3JoZi2ypL29VjtAuZNfjT9gjGC6Qur899s,...,1600144208d148b8b4b1cf1323a723e9f5684bb614e0e8,OP_PUSHBYTES_22 00144208d148b8b4b1cf1323a723e9...,[3044022100a192f55315af58c59ab38b932a7e6b61ddd...,False,4294967293,1.0,0x17a914bbb72a93d55756dbf332b67a15858a7e649752...,05bbb72a93d55756dbf332b67a15858a7e649752e3,3044022100a192f55315af58c59ab38b932a7e6b61ddd3...,0235ee1dcfb616de8d00bebdc45966702d973c7f3ee1a6...
121,616,4,f062a7f5c9b7488a22933254a5e0a54d3f994a658987c8...,1,0,1,a9146dea20e377765312e0ad8e476ad83ddd06bae84d87,OP_HASH160 OP_PUSHBYTES_20 6dea20e377765312e0a...,p2sh,3BiC3z4VibRkjRDoCWAJPchXgPLHaTtZkf,...,1600140de16742b2ce90184fcdc4082c20a9b75581eb85,OP_PUSHBYTES_22 00140de16742b2ce90184fcdc4082c...,[30450221009185c93be07238837e40b32ecbc0e45838a...,False,4294967293,1.0,0x17a9146dea20e377765312e0ad8e476ad83ddd06bae8...,056dea20e377765312e0ad8e476ad83ddd06bae84d,30450221009185c93be07238837e40b32ecbc0e45838a8...,0253c2ac31943f87f3921d82075cb3edf82864fedd397d...
195,1063,4,9fb0eaedb382c8c8c897a240e5a9ae30633dabd9e2b6ba...,2,0,14,a914c7f6f3f6c1d38a77697e36b78fa5eba097d980f987,OP_HASH160 OP_PUSHBYTES_20 c7f6f3f6c1d38a77697...,p2sh,3KvLEK4JcZovZKSpYV3ztPM21mH7vSRGVJ,...,16001493bba9e044263999d2b0e52f6012bf78ced73c19,OP_PUSHBYTES_22 001493bba9e044263999d2b0e52f60...,[3045022100c915c5d223c4e2bb106e2de060bd4d29151...,False,4294967295,1.0,0x17a914c7f6f3f6c1d38a77697e36b78fa5eba097d980...,05c7f6f3f6c1d38a77697e36b78fa5eba097d980f9,3045022100c915c5d223c4e2bb106e2de060bd4d291515...,02cc7b48a74bdc8b8b4b36576b0e30a4c81c5115960061...
204,1113,9,e722a999a5953da17bd6aaedbaa5959bccfbfdfcdc6738...,2,0,14,a91405599d37fc1891c2c09b6aa29893a3112114552687,OP_HASH160 OP_PUSHBYTES_20 05599d37fc1891c2c09...,p2sh,32BJf15ehkb9t2BWD17DEe52CnTarRTHeP,...,1600145bb4fa703546d63a9ded1d32ae67d36b2996fd1b,OP_PUSHBYTES_22 00145bb4fa703546d63a9ded1d32ae...,[304402205b660562c61a6b416329aede318ac5cd2e0ea...,False,4294967295,1.0,0x17a91405599d37fc1891c2c09b6aa29893a311211455...,0505599d37fc1891c2c09b6aa29893a31121145526,304402205b660562c61a6b416329aede318ac5cd2e0ea7...,022bc38fb4879407feb7e42773e291606eda1ea51a73d0...
235,1309,19,bf8afa2b5b50c570c7a805eaf97814ebde98f7fce338b8...,2,0,13,a9142c0e6867ec03a073598aaf3e1fdf45d102954ba087,OP_HASH160 OP_PUSHBYTES_20 2c0e6867ec03a073598...,p2sh,35hxv4LGW5akMbcpWTiYZUB9B9YcR7WesZ,...,1600149b4e2665aa7741630bc00a8329c8a52a1ca5befd,OP_PUSHBYTES_22 00149b4e2665aa7741630bc00a8329...,[3045022100f8e6b942fe0e4e9301e2d037612308ebe2b...,False,4294967295,1.0,0x17a9142c0e6867ec03a073598aaf3e1fdf45d102954b...,052c0e6867ec03a073598aaf3e1fdf45d102954ba0,3045022100f8e6b942fe0e4e9301e2d037612308ebe2ba...,03490feb3b0b55498e94a43788ebdb66826e13b624efb3...
236,1309,19,1d6d6c58dc6a6aa84e0a6ddd7bb3a68d4df08dcfebe527...,2,0,5,a9142c0e6867ec03a073598aaf3e1fdf45d102954ba087,OP_HASH160 OP_PUSHBYTES_20 2c0e6867ec03a073598...,p2sh,35hxv4LGW5akMbcpWTiYZUB9B9YcR7WesZ,...,1600149b4e2665aa7741630bc00a8329c8a52a1ca5befd,OP_PUSHBYTES_22 00149b4e2665aa7741630bc00a8329...,[3045022100f3c9a24902e82ccc8d05e7928fbcf50aeb9...,False,4294967295,1.0,0x17a9142c0e6867ec03a073598aaf3e1fdf45d102954b...,052c0e6867ec03a073598aaf3e1fdf45d102954ba0,3045022100f3c9a24902e82ccc8d05e7928fbcf50aeb9f...,03490feb3b0b55498e94a43788ebdb66826e13b624efb3...


In [81]:
tx_hash[tx_hash['block_id'] == 33]['message'].iloc[0]

'05a3d907e8eedcf22762a9983fcb0659a9efb63515c633e8a235d128e99fd7dc'

In [196]:
import json

def generate_json(df):
    output = {
        "version": 1,
        "locktime": 0,
        "vin": [],
        "vout": []
    }

    for index, row in STXO.iterrows():
        vin_entry = {
            "txid": row['txid'],
            "vout": row['vout'],
            "prevout": {
                "scriptpubkey": row['scriptpubkey'],
                "scriptpubkey_asm": row['scriptpubkey_asm'],
                "scriptpubkey_type": row['scriptpubkey_type'],
                "scriptpubkey_address": row['scriptpubkey_address'],
                "value": row['value']
            },
            "scriptsig": row['scriptsig'],
            "scriptsig_asm": row['scriptsig_asm'],
            "witness": row['witness'],
            "is_coinbase": row['is_coinbase'],
            "sequence": row['sequence']
        }
        output['vin'].append(vin_entry)

        # Check UTXO for corresponding block number
        corresponding_output = df.loc[(df['block_id'] == row['block_id']) & (df['block_number'] == row['block_number']), ['scriptpubkey', 'scriptpubkey_asm', 'scriptpubkey_type', 'scriptpubkey_address', 'value']]
        for _, out_row in corresponding_output.iterrows():
            vout_entry = {
                "scriptpubkey": out_row['scriptpubkey'],
                "scriptpubkey_asm": out_row['scriptpubkey_asm'],
                "scriptpubkey_type": out_row['scriptpubkey_type'],
                "scriptpubkey_address": out_row['scriptpubkey_address'],
                "value": out_row['value']
            }
            output['vout'].append(vout_entry)

    return json.dumps(output, indent=2)

# Example usage:
# Assuming STXO is your DataFrame containing the required data
# json_output = generate_json(STXO)


'267644431e43677ceb3a7d92243d0deebfc915be370615b05803f0a99fbb310f'

In [47]:
import hashlib

# Hash function used in the merkle root function (and in bitcoin in general)
def hash256(hex_str):
    binary = bytes.fromhex(hex_str)
    hash1 = hashlib.sha256(binary).digest()
    hash2 = hashlib.sha256(hash1).digest()
    result = hash2.hex()
    return result

def merkleroot(txids):
    # Exit Condition: Stop recursion when we have one hash result left
    if len(txids) == 1:
        # Convert the result to a string and return it
        return txids[0]

    # Keep an array of results
    result = []

    # 1. Split up array of hashes into pairs
    for i in range(0, len(txids), 2):
        one = txids[i]
        two = txids[i + 1] if i + 1 < len(txids) else one

        # 2a. Concatenate each pair
        concat = one + two

        # 3. Hash the concatenated pair and add to results array
        result.append(hash256(concat))

    # Recursion: Do the same thing again for these results
    return merkleroot(result)


# Test (e.g. block 000000000003ba27aa200b1cecaad478d2b00432346c3f1f3986da1afd33e506)
txids = [
    "8c14f0db3df150123e6f3dbbf30f8b955a8249b62ac1d1ff16284aefa3d06d87",
    "fff2525b8931402dd09222c50775608f75787bd2b87e56995a7bdd30f79702c4",
    "6359f0868171b1d194cbee1af2f16ea598ae8fad666d9b012c8ed2b79a236ec4",
    "e9a66845e05d5abc0ad04ec80f774a7e585c6e8db975962d069a522137b80c1d"
]

# TXIDs must be in natural byte order when creating the merkle root
txids = ["".join(reversed([x[i:i+2] for i in range(0, len(x), 2)])) for x in txids]

# Create the merkle root
result = merkleroot(txids)

# Display the result in reverse byte order
print("".join(reversed([result[i:i+2] for i in range(0, len(result), 2)])))


f3e94742aca4b5ef85488dc37c06c3282295ffec960994b2c0d5ac2a25a95766


In [51]:
merkle_df = STXO.groupby('block_id')['txid'].agg(list).reset_index()

In [54]:
for i,v in merkle_df.iterrows():
    txids = ["".join(reversed([x[i:i+2] for i in range(0, len(x), 2)])) for x in v['txid']]
    merkle_df.loc[i,'merkle root'] = merkleroot(txids)

In [60]:
STXO = pd.merge(STXO,merkle_df,on='block_id',how='left').drop('txid_y',axis=1)

In [56]:
def calculate_bits_from_target(target_difficulty_hex):
    """
    Calculates the bits value from the target difficulty.

    Args:
        target_difficulty_hex (str): The target difficulty in hexadecimal format.

    Returns:
        str: The bits value as a hexadecimal string.
    """
    # Convert the target difficulty to an integer
    target_difficulty = int(target_difficulty_hex, 16)

    # Calculate the exponent
    exponent = 3 + (len(target_difficulty_hex) - 2) // 2

    # Calculate the coefficient
    coefficient = target_difficulty // (2 ** (8 * (exponent - 3)))

    # Combine the exponent and coefficient to create the bits value
    bits_value = (exponent << 24) + coefficient

    return hex(bits_value)  # Output the bits value as a hexadecimal string


In [57]:
calculate_bits_from_target('0000ffff00000000000000000000000000000000000000000000000000000000')

'0x22000000'

In [73]:
import pandas as pd
import hashlib
import time

def construct_block_header_df(STXO, difficulty_target_hex):
    """
    Constructs a DataFrame containing information necessary in a block header.

    Args:
        STXO (DataFrame): DataFrame containing transaction information.
        difficulty_target_hex (str): Difficulty target in hexadecimal format.

    Returns:
        DataFrame: DataFrame containing block header information.
    """
    # Convert the difficulty target from hexadecimal to an integer
    difficulty_target = int(difficulty_target_hex, 16)

    # Initialize an empty list to store block header information
    block_header_info = []

    # Get the current time
    current_time = int(time.time())

    # Iterate through each row in STXO DataFrame
    for i, row in STXO.iterrows():
        # Initialize the previous block hash as all zeros for the first row
        previous_block_hash = '0' * 64 if i == 0 else block_header_info[-1]['block_hash']

        # Calculate the merkle root (dummy calculation for demonstration)
        # merkle_root = 'dummy_merkle_root'

        # Initialize nonce
        nonce = 0

        # Mine the block by incrementing nonce until the hash is below the difficulty target
        while True:
            # Construct the block header dictionary
            block_header = {
                'block_id': row['block_id'],
                'version': row['version'],
                'previous_block_hash': previous_block_hash,
                'merkle_root': row['merkle root'],
                'current_time': current_time,
                'bits': '0x22000000',  # Assuming a fixed difficulty for demonstration
                'nonce': nonce
            }

            # Calculate the block hash by double hashing all block header info
            block_header_str = f"{block_header['version']}{block_header['previous_block_hash']}" \
                               f"{block_header['merkle_root']}{block_header['current_time']}" \
                               f"{block_header['bits']}{block_header['nonce']}"
            block_hash = hashlib.sha256(hashlib.sha256(block_header_str.encode()).digest()).hexdigest()

            # Check if the block hash is below the difficulty target
            if int(block_hash, 16) < difficulty_target:
                break  # Exit the loop if the hash is below the target

            # Increment nonce for the next iteration
            nonce += 1

        # Add the block hash to the block header dictionary
        block_header['block_hash'] = block_hash

        # Append the block header dictionary to the list
        block_header_info.append(block_header)

    # Convert the list of dictionaries to a DataFrame
    block_header_df = pd.DataFrame(block_header_info)

    return block_header_df

In [74]:
block_header_df = construct_block_header_df(STXO,'0000ffff00000000000000000000000000000000000000000000000000000000')

In [84]:
block_header_df['nonce'].iloc[0]

135286

In [114]:
def construct_coinbase_tx(height, block_reward,version):
    """
    Construct a coinbase transaction for a given block height and block reward.

    Args:
        height (int): The height of the block.
        block_reward (int): The reward for mining the block.

    Returns:
        dict: A dictionary representing the coinbase transaction.
    """
    coinbase_tx = {
        "version": version,
        "inputcount": int("01")+1,
        "inputs": [
            {
                "txid": "0" * 64,
                "vout": "ffffffff",
                "scriptsigsize": "08",
                "scriptsig": field(height, 4),
                "block_reward": block_reward,
                "sequence": "ffffffff",
                "coinbase": True
            }
        ]
    }
    return coinbase_tx

In [115]:
def mine_blocks(STXO, UTXO, block_header_df, subsidy, output_file):
    """
    Mine blocks based on transaction and block header information and write to an output file.

    Args:
        STXO (DataFrame): DataFrame containing spent transaction outputs.
        UTXO (DataFrame): DataFrame containing unspent transaction outputs.
        block_header_df (DataFrame): DataFrame containing block header information.
        subsidy (float): Block subsidy.
        output_file (str): Path to the output file.

    Returns:
        None
    """
    with open(output_file, 'w') as f:
        for _, block_header_info in block_header_df.iterrows():
            block_id = block_header_info['block_id']
            height = 840565  # Current block height
            block_header = construct_block_header(block_header_info)
            
            # Construct the transaction list from STXO and UTXO DataFrames
            transaction_list = []
            
            # Construct coinbase, input and output transactions
            for _, stxo_row in STXO[STXO['block_id'] == block_id].iterrows():
                version = stxo_row['version']
                block_fee = stxo_row['difference']
                block_reward = subsidy + block_fee
                coinbase_tx = construct_coinbase_tx(height, block_reward,version)
                vin = {"vin": {
                    "txid": stxo_row['txid'],
                    "vout": stxo_row['input index'],
                    "prevout": {'scriptpubkey': stxo_row['scriptpubkey'],
                                'scriptpubkey_address': stxo_row['scriptpubkey_address'],
                                'value': stxo_row['value']},
                    "scriptsig": stxo_row['scriptsig'],
                    "sequence": stxo_row['sequence'],
                    "witness": stxo_row['witness']
                }}
                transaction_list.append(vin)
            
            # Add UTXO transactions to the transaction list
            for _, utxo_row in UTXO[UTXO['block_id'] == block_id].iterrows():
                vout = {"vout": {
                    "scriptpubkey": utxo_row['scriptpubkey'],
                    "scriptpubkey_address": utxo_row['scriptpubkey_address'],
                    "value": utxo_row['value']
                }}
                transaction_list.append(vout)

            # Construct the block
            block = {
                "block_header": block_header,
                "locktime": 0,
                "coinbase_transaction": coinbase_tx,
                "transaction_list": transaction_list,
            }

            # Write the block to output file
            f.write(json.dumps(block, indent=2))
            f.write('\n\n')

In [80]:
STXO = STXO.rename(columns = {'txid_x':'txid'})

In [116]:
mine_blocks(STXO, UTXO, block_header_df, block_reward, output_file)

In [117]:
block_reward

304967308