In [None]:
import os
import re
import json
import pickle
from datetime import datetime
from bitcoinrpc.authproxy import AuthServiceProxy, JSONRPCException
from tqdm import tqdm

def create_directory_if_not_exists(directory):
    if not os.path.exists(directory):
        os.makedirs(directory)

# 使用正则表达式从文件名中提取索引数字
def extract_index(filename):
    match = re.search(r'index(\d+).json', filename)
    if match:
        return int(match.group(1))
    return -1

def save_transaction_data_to_json():

    # Setting Bitcoin Core RPC
    rpc_user = 'your_rpc_username'
    rpc_password = 'your_rpc_password'
    rpc_host = 'localhost'
    rpc_port = 8332

    # Setting init parameters
    transaction_count = 0
    directory_count = 1
    index_data = {}
    addresses =[]
    coinbase_data = {}
    block_height = 1

    address_directory = '../address'
    # Create address directory if not exists
    if not os.path.exists(address_directory):
        os.makedirs(address_directory)

    # Load index file get block_hash, directory_count, transaction_count
    index_directory = '../transactions/index'  # 替换为实际文件夹路径
    index_files = os.listdir(index_directory) # 使用os.listdir()获取文件夹中的文件列表并存储在index_files变量中
    print("index_files: ", index_files)
    index_files.sort(key=extract_index) # 对文件列表进行排序

    # 确保文件列表非空
    if index_files:
        # 获取最后一个文件的文件名
        last_file = index_files[-1]

        # 检查文件是否为JSON文件（可选）
        if last_file.endswith('.json'):
            json_filepath = os.path.join(index_directory, last_file)

            with open(json_filepath, 'r') as json_file:
                index_data = json.load(json_file)
                block_hash = list(index_data.keys())[-1]
                directory_count = int(index_data[block_hash])
                transaction_count = len(index_data) % 10000
                # 这里您可以处理JSON数据，data变量包含了JSON文件中的内容
                print(f"成功读取最后一个文件: {last_file}")
        else:
            print(f"最后一个文件 '{last_file}' 不是JSON文件")
    else:
        print(f"文件夹 '{index_directory}' 为空")

    print(f"Last block_hash: {block_hash}")
    print(f"Last directory_count: {directory_count}")
    print(f"Last transaction_count: {transaction_count}")

    # Use the block heights for the start and end times
    start_block_height = -1
    end_block_height = 310000

    try:
        rpc_connection = AuthServiceProxy(f"http://{rpc_user}:{rpc_password}@{rpc_host}:{rpc_port}")
        
        # Get raw tx data
        raw_transaction = rpc_connection.getrawtransaction(block_hash, True)
        # Initial start block height
        while start_block_height == -1:
            if block_height != -1:
                # 獲取區塊訊息
                block_info = rpc_connection.getblock(raw_transaction["blockhash"])
                print(f"Transaction {block_hash} is in block height {block_height}")
            else:
                print(f"Transaction {block_hash} is not yet confirmed in a block.")

            if "blockhash" in raw_transaction:
                block_hash = raw_transaction["blockhash"]

                # Use block_hash lookup block height
                block_info = rpc_connection.getblock(block_hash)
                block_height = block_info["height"]
                start_block_height = block_height
                print("start_block_height: ", start_block_height)
                print(f"Transaction with txid '{block_hash}' is in block height {block_height}")
                break
            else:
                print(f"Transaction with txid '{block_hash}' is not yet confirmed in a block.")
        
        previous_block_height = start_block_height  # 初始化先前區塊高度為起始高度

        for block_height in tqdm(range(start_block_height, end_block_height + 1)):
            
            # 確認區塊高度的變化
            if block_height != previous_block_height:
                # 更新先前區塊高度為目前處理的區塊高度
                previous_block_height = block_height

                block_hash = rpc_connection.getblockhash(block_height)
                print("block height: ",block_height)
                block = rpc_connection.getblock(block_hash)

                for tx_id in block['tx']:
                    # Check block hash in transaction
                    if "blockhash" in raw_transaction:
                        block_hash = raw_transaction["blockhash"]
                        transaction = rpc_connection.getrawtransaction(tx_id, True)
                        transaction_hash = transaction['txid']
                    else:
                        break
                    print("TXID: ", transaction_hash)
                    directory = str(directory_count).zfill(8)
                    print("File Numbers:", directory)

                    # Add this code to check if the directory exists, and create it if not
                    if not os.path.exists(directory):
                        os.makedirs(directory)

                    # Check transaction and address exist or not
                    file_path = os.path.join(directory, f'{transaction_hash}.json')

                    # Get output addresses and balancees and output txids
                    vout_sz = 0
                    addresses = []
                    balancees = []
                    txrefs = []
                    vout_list = transaction['vout']
                    for vout in vout_list:
                        # Get output balance
                        balance = vout['value']

                        if balance is not None and balance > 0:  # Check balance > 0
                            balancees.append(balance)
                            print("Balance:", balance)
                        else:
                            balancees.append(0)
                            print("No or empty output balance found for TXID:", transaction_hash)

                        # Get output txid
                        txref = transaction_hash
                        txrefs.append(txref)

                        if 'scriptPubKey' in vout and 'address' in vout['scriptPubKey']:
                            # Get output address
                            address = vout['scriptPubKey']['address']
                            addresses.append(address)

                        vout_sz += 1

                    # Check if there are output addresses before accessing addresses[0]
                    if len(addresses) > 0:
                        print("Output Addresses:", addresses)
                    else:
                        print("No output addresses found for TXID:", transaction_hash)

                    print("txrefs", txrefs)
                    
                    vin_sz = 0
                    # Get input txid
                    input_tx_id = []
                    vin_list = transaction['vin']

                    for vin in vin_list:
                        vin_sz +=1
                        if 'coinbase' not in vin and 'txid' in vin:
                            input_tx_id = vin['txid']
                            print("Input TXID:", input_tx_id)

                    # Get block time
                    timestamp = block['time']
                    # 使用datetime.utcfromtimestamp將Unix時間戳轉換為UTC日期時間對象
                    utc_datetime = datetime.utcfromtimestamp(timestamp)
                    # 使用strftime將日期時間對象格式化為'%Y-%m-%dT%H:%M:%S.%fZ'格式
                    formatted_date = utc_datetime.strftime('%Y-%m-%dT%H:%M:%S.%fZ')
                    # print(formatted_date)

                    # Save addresses to a JSON file
                    if addresses:
                        address_filename = f'{addresses[0]}.json'
                        address_directory_path = os.path.join(address_directory, address_filename)
                        
                        txrefs_list = []  # 用於存儲交易參考的列表

                        # Check if the file already exists
                        if os.path.exists(address_directory_path):
                            # If the file exists, load the existing data
                            with open(address_directory_path, 'r') as address_file:
                                existing_data = json.load(address_file)
                                if 'balance' in existing_data:
                                    existing_balance = existing_data['balance']
                                    # Update the balance with the new value, if available
                                    if balancees and float(balancees[0]) > 0:
                                        existing_data['balance'] = float(balancees[0])
                                    # Append the new transaction reference to the existing list
                                    txref_dict = {
                                        'tx_hash': transaction_hash,
                                        'tx_input_n': vin_sz,
                                        'block_height': block_height,
                                        'tx_output_n': vout_sz,
                                        'ref_balance': float(balancees[0]),
                                        'confirmed': formatted_date
                                    }
                                    txrefs_list = existing_data.get('txrefs', [])
                                    # Ensure the number of txrefs doesn't exceed 5000
                                    if len(txrefs_list) < 5000:
                                        txrefs_list.append(txref_dict)
                                    else:
                                        txrefs_list = txrefs_list[:5000]  # 只保留前面 5000 個交易參考
                                    existing_data['txrefs'] = txrefs_list

                            # Save the updated data back to the file
                            with open(address_directory_path, 'w') as address_file:
                                json.dump(existing_data, address_file, indent=4, default=str)
                        else:
                            # If the file does not exist, create a new JSON file with the initial data
                            txref_dict = {
                                'tx_hash': transaction_hash,
                                'tx_input_n': vin_sz,
                                'block_height': block_height,
                                'tx_output_n': vout_sz,
                                'ref_balance': float(balancees[0]),
                                'confirmed': formatted_date
                            }
                            txrefs_list.append(txref_dict)
                            with open(address_directory_path, 'w') as address_file:
                                json.dump({
                                    'balance': float(balancees[0]) if balancees else 0,
                                    'txrefs': txrefs_list
                                }, address_file, indent=4, default=str)
                    else:
                        # Handle the case where there are no output addresses
                        # You can skip this transaction or provide a default value
                        print("No output addresses found for TXID:", transaction_hash)

                    # Add this code to check if the directory exists, and create it if not
                    if not os.path.exists(directory):
                        os.makedirs(directory)

                    # Save transaction data to a JSON file
                    file_path = os.path.join(directory, f'{transaction_hash}.json')
                    with open(file_path, 'w') as file:
                        json.dump({
                            'tx_hash': transaction_hash,
                            'tx_input_n': input_tx_id,
                            'vin_sz': vin_sz,
                            'tx_output_n': txrefs,
                            'vout_sz': vout_sz,
                            'block_height': block_height,
                            'ref_balance': float(balancees[0]),
                            'confirmed': datetime.utcfromtimestamp(block['time']).strftime('%Y-%m-%d %H:%M:%S')
                        }, file, indent=4, default=str)

                    # Index file name
                    index_filename = f'index{directory_count}.json'
                    index_filepath = os.path.join(index_directory, index_filename)

                    # Update index data
                    index_data[transaction_hash] = directory

                    with open(index_filepath, 'w') as json_file:
                        json.dump(index_data, json_file)

                    transaction_count += 1
                    if transaction_count % 10000 == 0:
                        transaction_count = 0
                        index_data = {}
                        directory_count += 1

                    # Append the "coinbase" field to the coinbase_data
                    coinbase_data = {"tx2blk": index_data}

                    # Save coinbase data to coinbase.pkl
                    with open('../coinbase.pkl', 'wb') as coinbase_file:
                        pickle.dump(coinbase_data, coinbase_file)

    except JSONRPCException as e:
        print("RPC request error:", e.error)

    except Exception as e:
        print("Error:", str(e))

save_transaction_data_to_json()

In [None]:
import os
import json

directory = '../address'

# 遍歷目錄中的所有文件
for filename in os.listdir(directory):
    if filename.endswith('.json'):
        file_path = os.path.join(directory, filename)

        try:
            # 嘗試打開並解析 JSON 文件
            with open(file_path, 'r') as file:
                json.load(file)
        except json.JSONDecodeError:
            # 如果解析失敗，刪除該文件
            os.remove(file_path)
            print(f"Corrupted file '{filename}' has been deleted.")


In [None]:
import os

directory = '../address'
file_to_delete = '1H6YbozjMSaARR2AHTSkNyC6S63S4LD2JB.json'
file_path = os.path.join(directory, file_to_delete)

if os.path.exists(file_path):
    os.remove(file_path)
    print(f"File '{file_to_delete}' has been deleted successfully.")
else:
    print(f"File '{file_to_delete}' does not exist in the directory.")


In [None]:
import pandas as pd

# 指定 CSV 檔案路徑
csv_file_path = '../dataset_allmymerge.csv'

# 讀取 CSV 檔案，只選取 'address' 欄位
dataset_df = pd.read_csv(csv_file_path, usecols=['address'])

# 印出結果
addresses_list = dataset_df['address'].tolist()
print(addresses_list)

# 將 'address' 欄位的資料儲存為新的 CSV 檔案
output_csv_path = '../dataset_mymerge_address.csv'
dataset_df.to_csv(output_csv_path, index=False, header=['address'])

print(f"資料已儲存至 {output_csv_path}")

In [None]:
import os
import json

def fix_incomplete_json(file_path):
    with open(file_path, 'r+') as file:
        try:
            content = file.read()
            file.seek(0)  # 將檔案指標移回檔案開頭

            # 檢查缺少的 '}'、']' 或 ',' 符號
            brackets_to_check = [('{', '}'), ('[', ']'), (',', ',')]
            for open_bracket, close_bracket in brackets_to_check:
                open_count = content.count(open_bracket)
                close_count = content.count(close_bracket)

                if open_count > close_count:
                    diff = open_count - close_count
                    content += close_bracket * diff
                elif close_count > open_count:
                    diff = close_count - open_count
                    content = content.replace(close_bracket, '', diff)

            # 將修正後的內容重新寫入檔案
            file.write(content)
            file.truncate()  # 截斷檔案後面多餘的部分（若有）
            print(f"File '{file_path}' has been fixed successfully.")
        except Exception as e:
            print(f"Error fixing file '{file_path}': {e}")

directory = '../address'
file_to_fix = '1MPxhNkSzeTNTHSZAibMaS8HS1esmUL1ne.json'
file_path = os.path.join(directory, file_to_fix)

if os.path.exists(file_path):
    fix_incomplete_json(file_path)
else:
    print(f"File '{file_to_fix}' does not exist in the directory.")
