In [1]:
from dotenv import load_dotenv
import pandas as pd
import requests
import json
import csv
import os

load_dotenv()

def get_api_key():
    return os.getenv('ETHEREUM_API_KEY')

def get_connection_url(contract_address):
    return f'https://svc.blockdaemon.com/universal/v1/ethereum/mainnet/account/{contract_address}/txs'


OPTIONS = {
    "headers": {
        "accept": "application/json",
        "X-API-Key": get_api_key()
    }
}

def extract_transaction_hashes(logs):
    return set(log['id'] for log in logs)

def save_hashes_to_csv(hashes, filename):
    with open(filename, 'w', newline='') as f:
        writer = csv.writer(f)
        writer.writerow(['hash'])
        for tx_hash in hashes:
            writer.writerow([tx_hash])

def main(ts_start, ts_end, folder_name, contract_address):
    pages = 0
    total_logs_count = 0
    pageToken = None
    merged_logs = []
    
    try:
        if not os.path.exists(f'./data/{folder_name}/'):
            os.makedirs(f'./data/{folder_name}/')

        while True:
            payload = {
                "from": ts_start,
                "to": ts_end,
                "order": 'desc',
                "page_token": pageToken,
                "page_size": 100
            }
        
            response = requests.get(get_connection_url(contract_address), headers=OPTIONS['headers'], params=payload)
            response_json = response.json()

            response_data = response_json['data']
            
            pages += 1
            print(pages)

            if (response_json["total"] != 0):
                merged_logs.extend(response_data)

                if 'meta' not in response_json:
                    break

                pageToken = response_json["meta"]["paging"]["next_page_token"]

                if pageToken == None:
                    break
            
                total_logs_count += response_json["total"]
            else:
                break

        with open(f'./data/{folder_name}/merged_logs.json', 'w') as f:
            json.dump(merged_logs, f)

        print(f"Merged all logs into ./data/{folder_name}/merged_logs.json")
        print(f"Total logs fetched: {total_logs_count}")

        # Extract transaction hashes and save to CSV
        transaction_hashes = extract_transaction_hashes(merged_logs)
        save_hashes_to_csv(transaction_hashes, f'./data/{folder_name}/transaction_hashes.csv')

        print(f"Transaction hashes saved to ./data/{folder_name}/transaction_hashes.csv")
        print(f"Total transaction hashes: {len(transaction_hashes)}")
    except Exception as error:
        print('Error fetching transactions:', error)


**Relevant Timestamps**

1641899472 # 11 jan 2022 11:11:12 (deployment of contract 0xd3df in Moonbeam)

1671062400 # 15 Dec 2022

1722446626 # 31 Jul 2024

**Relevant Topics**
Deposits: Send(address,address,uint32,bytes32,uint256,bool): 0xa3d219cf126a12be40d7ad1ceef46231c987988dd4e686457b610e1b6b80a4bf

Withdrawals: Receive(uint64,address,address,address,uint256): 0x9f9a97db84f39202ca3b409b63f7ccf7d3fd810e176573c7483088b6f181bbbb

In [6]:
contract_address = "0x88a69b4e698a4b090df6cf5bd7b2d47325ad30a3"

main(1641899472, 1671062400, "logs-11Jan-14Dec", contract_address)

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277


In [34]:
contract_address = "0x88a69b4e698a4b090df6cf5bd7b2d47325ad30a3"    # DOES NOT WORK!!!!

main(1671062401, 1722446626, "logs-14Dec-today", contract_address)

1
Merged all logs into ./data/logs-14Dec-today/merged_logs.json
Total logs fetched: 0
Transaction hashes saved to ./data/logs-14Dec-today/transaction_hashes.csv
Total transaction hashes: 4


In [37]:
import pandas as pd

FOLDER_NAME = "logs-14Dec-today"

df1 = pd.read_csv(f'./data/{FOLDER_NAME}/etherscan-0x5d94309e5a0090b165fa4181519701637b6daeba.csv')
hashes_df1 = df1['Transaction Hash'].tolist()
df2 = pd.read_csv(f'./data/{FOLDER_NAME}/etherscan-0x049b51e531fd8f90da6d92ea83dc4125002f20ef.csv')
hashes_df2 = df2['Transaction Hash'].tolist()

merged = hashes_df1 + hashes_df2

save_hashes_to_csv(merged, f'./data/{FOLDER_NAME}/transaction_hashes.csv')

print(f"Transaction hashes saved to ./data/{FOLDER_NAME}/transaction_hashes.csv")
print(f"Total transaction hashes: {len(merged)}")

Transaction hashes saved to ./data/logs-14Dec-today/transaction_hashes.csv
Total transaction hashes: 1779


## Retrieve Transaction Receipts

In [2]:
from concurrent.futures import ThreadPoolExecutor, as_completed
import time

RATE_LIMIT = 20  # requests per second
REQUEST_INTERVAL = 1 / RATE_LIMIT

CONNECTION_URL = "https://svc.blockdaemon.com/ethereum/mainnet/native"

def fetch_receipt(tx_hash, folder_name):
    data = {
        "id": 1,
        "jsonrpc": "2.0",
        "method": "eth_getTransactionReceipt",
        "params": [tx_hash]
    }

    try:
        response = requests.post(CONNECTION_URL, headers=OPTIONS['headers'], json=data)
        response_json = response.json()
        if 'result' in response_json:
            return response_json['result']
        else:
            print(f"Error fetching receipt: {response_json}, {tx_hash}")
            return []
    except Exception as e:
        with open(f"./data/{folder_name}/errors.txt", "a") as error_file:
            error_file.write(f"Error retrieving transaction: {tx_hash}, {e}\n")

def process_hashes(hashes, folder_name):
    receipts = []
    total_hashes = len(hashes)
    progress_interval = max(1, total_hashes // 100)  # Update progress every 1%

    with ThreadPoolExecutor(max_workers=RATE_LIMIT) as executor:
        futures = {executor.submit(fetch_receipt, tx_hash, folder_name): tx_hash for tx_hash in hashes}
        
        for idx, future in enumerate(as_completed(futures), 1):
            try:
                result = future.result()
                receipts.append(result)

                if idx % progress_interval == 0 or idx == total_hashes:
                    progress_percentage = (idx / total_hashes) * 100
                    print(f"Progress: {progress_percentage:.2f}% ({idx}/{total_hashes})")
        
                time.sleep(REQUEST_INTERVAL)
            except Exception as e:
                with open(f"./data/{folder_name}/errors.txt", "a") as error_file:
                    error_file.write(f"Error retrieving transaction: {futures[future]}\n")
    return receipts

def retrieve_receipts(folder_name):
    # Read hashes from CSV
    hashes_df = pd.read_csv(f'./data/{folder_name}/transaction_hashes.csv')
    hashes = hashes_df['hash'].drop_duplicates().tolist()

    # Fetch receipts
    receipts = process_hashes(hashes, folder_name)

    # Save receipts to JSON
    with open(f'./data/{folder_name}/tx_receipts.json', 'w') as f:
        json.dump(receipts, f)

    # Print the count of processed receipts
    print(f"Total receipts fetched: {len(receipts)}")


In [5]:
retrieve_receipts("logs-11Jan-14Dec/missing-data-wglmr/0xba8d75baccc4d5c4bd814fde69267213052ea663")

Progress: 0.65% (1/154)
Progress: 1.30% (2/154)
Progress: 1.95% (3/154)
Progress: 2.60% (4/154)
Progress: 3.25% (5/154)
Progress: 3.90% (6/154)
Progress: 4.55% (7/154)
Progress: 5.19% (8/154)
Progress: 5.84% (9/154)
Progress: 6.49% (10/154)
Progress: 7.14% (11/154)
Progress: 7.79% (12/154)
Progress: 8.44% (13/154)
Progress: 9.09% (14/154)
Progress: 9.74% (15/154)
Progress: 10.39% (16/154)
Progress: 11.04% (17/154)
Progress: 11.69% (18/154)
Progress: 12.34% (19/154)
Progress: 12.99% (20/154)
Progress: 13.64% (21/154)
Progress: 14.29% (22/154)
Progress: 14.94% (23/154)
Progress: 15.58% (24/154)
Progress: 16.23% (25/154)
Progress: 16.88% (26/154)
Progress: 17.53% (27/154)
Progress: 18.18% (28/154)
Progress: 18.83% (29/154)
Progress: 19.48% (30/154)
Progress: 20.13% (31/154)
Progress: 20.78% (32/154)
Progress: 21.43% (33/154)
Progress: 22.08% (34/154)
Progress: 22.73% (35/154)
Progress: 23.38% (36/154)
Progress: 24.03% (37/154)
Progress: 24.68% (38/154)
Progress: 25.32% (39/154)
Progress: 

In [9]:
retrieve_receipts("logs-11Jan-14Dec")

Progress: 1.00% (359/35967)
Progress: 2.00% (718/35967)
Progress: 2.99% (1077/35967)
Progress: 3.99% (1436/35967)
Progress: 4.99% (1795/35967)
Progress: 5.99% (2154/35967)
Progress: 6.99% (2513/35967)
Progress: 7.99% (2872/35967)
Progress: 8.98% (3231/35967)
Progress: 9.98% (3590/35967)
Progress: 10.98% (3949/35967)
Progress: 11.98% (4308/35967)
Progress: 12.98% (4667/35967)
Progress: 13.97% (5026/35967)
Progress: 14.97% (5385/35967)
Progress: 15.97% (5744/35967)
Progress: 16.97% (6103/35967)
Progress: 17.97% (6462/35967)
Progress: 18.96% (6821/35967)
Progress: 19.96% (7180/35967)
Progress: 20.96% (7539/35967)
Progress: 21.96% (7898/35967)
Progress: 22.96% (8257/35967)
Progress: 23.96% (8616/35967)
Progress: 24.95% (8975/35967)
Progress: 25.95% (9334/35967)
Progress: 26.95% (9693/35967)
Progress: 27.95% (10052/35967)
Progress: 28.95% (10411/35967)
Progress: 29.94% (10770/35967)
Progress: 30.94% (11129/35967)
Progress: 31.94% (11488/35967)
Progress: 32.94% (11847/35967)
Progress: 33.94%

In [9]:
retrieve_receipts('logs-28Apr-today')

Progress: 1.00% (1109/110930)
Progress: 2.00% (2218/110930)
Progress: 3.00% (3327/110930)
Progress: 4.00% (4436/110930)
Progress: 5.00% (5545/110930)
Progress: 6.00% (6654/110930)
Progress: 7.00% (7763/110930)
Progress: 8.00% (8872/110930)
Progress: 9.00% (9981/110930)
Progress: 10.00% (11090/110930)
Progress: 11.00% (12199/110930)
Progress: 12.00% (13308/110930)
Progress: 13.00% (14417/110930)
Progress: 14.00% (15526/110930)
Progress: 15.00% (16635/110930)
Progress: 16.00% (17744/110930)
Progress: 17.00% (18853/110930)
Progress: 18.00% (19962/110930)
Progress: 18.99% (21071/110930)
Progress: 19.99% (22180/110930)
Progress: 20.99% (23289/110930)
Progress: 21.99% (24398/110930)
Progress: 22.99% (25507/110930)
Progress: 23.99% (26616/110930)
Progress: 24.99% (27725/110930)
Progress: 25.99% (28834/110930)
Progress: 26.99% (29943/110930)
Progress: 27.99% (31052/110930)
Progress: 28.99% (32161/110930)
Progress: 29.99% (33270/110930)
Progress: 30.99% (34379/110930)
Progress: 31.99% (35488/11

In [38]:
retrieve_receipts("logs-14Dec-today")

Progress: 0.96% (17/1779)
Progress: 1.91% (34/1779)
Progress: 2.87% (51/1779)
Progress: 3.82% (68/1779)
Progress: 4.78% (85/1779)
Progress: 5.73% (102/1779)
Progress: 6.69% (119/1779)
Progress: 7.64% (136/1779)
Progress: 8.60% (153/1779)
Progress: 9.56% (170/1779)
Progress: 10.51% (187/1779)
Progress: 11.47% (204/1779)
Progress: 12.42% (221/1779)
Progress: 13.38% (238/1779)
Progress: 14.33% (255/1779)
Progress: 15.29% (272/1779)
Progress: 16.25% (289/1779)
Progress: 17.20% (306/1779)
Progress: 18.16% (323/1779)
Progress: 19.11% (340/1779)
Progress: 20.07% (357/1779)
Progress: 21.02% (374/1779)
Progress: 21.98% (391/1779)
Progress: 22.93% (408/1779)
Progress: 23.89% (425/1779)
Progress: 24.85% (442/1779)
Progress: 25.80% (459/1779)
Progress: 26.76% (476/1779)
Progress: 27.71% (493/1779)
Progress: 28.67% (510/1779)
Progress: 29.62% (527/1779)
Progress: 30.58% (544/1779)
Progress: 31.53% (561/1779)
Progress: 32.49% (578/1779)
Progress: 33.45% (595/1779)
Progress: 34.40% (612/1779)
Progres

In [11]:
import os
import re
import json

def load_failed_hashes(filename):
    with open(filename, 'r') as f:
        hashes = [re.findall(pattern='0x[a-fA-F0-9]{64}', string=line)[0] for line in f]
        print(hashes)
    return hashes

def retrieve_failed_receipts(folder_name):
    errors_file = f'./data/{folder_name}/errors.txt'
    if not os.path.exists(errors_file):
        print(f"Error file {errors_file} does not exist.")
        return

    # Load failed transaction hashes from errors.txt
    failed_hashes = load_failed_hashes(f'./data/{folder_name}/errors.txt')

    # Fetch receipts for failed hashes
    failed_receipts = process_hashes(failed_hashes, folder_name)

    # Save failed receipts to JSON
    with open(f'./data/{folder_name}/tx_receipts_2.json', 'w') as f:
        json.dump(failed_receipts, f)

    # Print the count of processed failed receipts
    print(f"Total failed receipts fetched: {len(failed_receipts)}")

In [12]:
retrieve_failed_receipts('logs-11Jan-14Dec')

['0x02ea3831a14936046472fdc74a8377e2d31951ff39dea146edb59c80750b7561']
Progress: 100.00% (1/1)
Total failed receipts fetched: 1


For the second interval, we need to merge the json files with transaction receipts

In [13]:
import json

def merge_json_files(folder_name):
    file1 = f'./data/{folder_name}/tx_receipts.json'
    file2 = f'./data/{folder_name}/tx_receipts_2.json'
    output_file = f'./data/{folder_name}/merged_tx_receipts.json'

    with open(file1, 'r') as f1, open(file2, 'r') as f2:
        json1 = json.load(f1)
        json2 = json.load(f2)

    merged_json = json1 + json2

    with open(output_file, 'w') as f:
        json.dump(merged_json, f)

    print(f"Number of transaction receipts in {file1}: {len(json1)}")
    print(f"Number of transaction receipts in {file2}: {len(json2)}")
    print(f"Number of transaction receipts in the merged file: {len(merged_json)}")


In [14]:
merge_json_files("logs-11Jan-14Dec")

Number of transaction receipts in ./data/logs-11Jan-14Dec/tx_receipts.json: 35967
Number of transaction receipts in ./data/logs-11Jan-14Dec/tx_receipts_2.json: 1
Number of transaction receipts in the merged file: 35968


In [9]:
# remove duplicates from json file with result field of receipts
def remove_duplicates(file, folder_name):
    with open(file, 'r') as f:
        receipts = json.load(f)

    unique_receipts = []
    unique_hashes = set()

    for receipt in receipts:
        try:
            tx_hash = receipt.get('transactionHash')
            if tx_hash not in unique_hashes:
                unique_hashes.add(tx_hash)
                unique_receipts.append(receipt)
        except Exception as e:
            print(f"Error processing receipt: {receipt}, {e}")

    # Save unique receipts to a new file
    unique_output_file = f'./data/{folder_name}/unique_tx_receipts-new-2.json'
    with open(unique_output_file, 'w') as f:
        json.dump(unique_receipts, f)

    print(f"Unique receipts saved to {unique_output_file}")

    print(f"Removed {len(receipts) - len(unique_receipts)} duplicate receipts")
    print(f"Total receipts: {len(receipts)}")
    print(f"Total unique receipts: {len(unique_receipts)}")


In [16]:
tx_receipts = './data/logs-11Jan-14Dec/merged_tx_receipts.json'

remove_duplicates(tx_receipts, "logs-11Jan-14Dec")

Error processing receipt: None, 'NoneType' object has no attribute 'get'
Unique receipts saved to ./data/logs-11Jan-14Dec/unique_tx_receipts.json
Removed 1 duplicate receipts
Total receipts: 35968
Total unique receipts: 35967


In [41]:
tx_receipts = './data/logs-14Dec-today/tx_receipts.json'

remove_duplicates(tx_receipts, "logs-14Dec-today")

Unique receipts saved to ./data/logs-14Dec-today/unique_tx_receipts.json
Removed 0 duplicate receipts
Total receipts: 1779
Total unique receipts: 1779


In [12]:
from dotenv import load_dotenv
import pandas as pd
import concurrent.futures
import json
import requests
import os

load_dotenv()

def get_api_key():
    return os.getenv('ETHEREUM_API_KEY')

CONNECTION_URL = "https://svc.blockdaemon.com/ethereum/mainnet/native"


OPTIONS = {
    "headers": {
        "accept": "application/json",
        "X-API-Key": get_api_key()
    }
}

def get_block_data(block_number, errors_file):
    payload = {
        "id": 1,
        "jsonrpc": "2.0",
        "params": [],
        "method": "eth_getBlockByNumber"
    }

    try:
        payload['params'] = [block_number, False]
        response = requests.post(CONNECTION_URL, headers=OPTIONS['headers'], json=payload)

        if response.status_code == 200:
            block = response.json()["result"]
            block_no = int(block["number"], 16)
            timestamp = int(block["timestamp"], 16)
            transactions = len(block["transactions"])
            return f"{block_no},{transactions},{timestamp}\n"
        else:
            with open(errors_file, "a") as error_file:
                error_file.write(f"Error code: {block_number}\n")
            return "null,null,null\n"
    except Exception as e:
        with open(errors_file, "a") as error_file:
            error_file.write(f"Error retrieving block: {block_number}, {e}\n")
        return "null,null,null\n"

def get_blocks_data(folder_name):
    input_file = f'./data/{folder_name}/tx_receipts.json'
    output_file = f'./data/{folder_name}/blocks.csv'
    errors_file = f'./data/{folder_name}/errors.txt'

    with open(input_file, 'r') as file:
        tx_receipts = json.load(file)

    print(f"Extracting block number and Unix timestamp from {len(tx_receipts)} transaction receipts...")

    block_numbers = [tx["blockNumber"] for tx in tx_receipts]

    print(f"Extracted {len(block_numbers)} block numbers...")

    with open(output_file, "a") as blocks_file:
        blocks_file.write("block_number,transactions,timestamp\n")
        
        with concurrent.futures.ThreadPoolExecutor(max_workers=10) as executor:
            # Submit tasks for each block in the range
            futures = {executor.submit(get_block_data, block_number, errors_file): block_number for block_number in block_numbers if block_number is not None}

            # Process the completed tasks and write to the file
            for future in concurrent.futures.as_completed(futures):
                block_number = futures[future]
                try:
                    blocks_data = future.result()
                    blocks_file.write(blocks_data)
                except Exception as e:
                    print(f"Error processing block {block_number}: {e}")

    print(f'Extracted block number and Unix timestamp to {output_file}')

In [22]:
get_blocks_data('logs-11Jan-14Dec')

Extracting block number and Unix timestamp from 35967 transaction receipts...
Extracted 35967 block numbers...
Extracted block number and Unix timestamp to ./data/logs-11Jan-14Dec/blocks_2.csv


In [42]:
get_blocks_data('logs-14Dec-today')

Extracting block number and Unix timestamp from 1779 transaction receipts...
Extracted 1779 block numbers...
Extracted block number and Unix timestamp to ./data/logs-14Dec-today/blocks.csv


In [32]:
def retrieve_missing_blocks(folder_name):
    blocks_file = f'./data/{folder_name}/blocks.csv'
    receipts_file = f'./data/{folder_name}/tx_receipts.json'
    errors_file = f'./data/{folder_name}/errors_2.txt'

    blocks_data = pd.read_csv(blocks_file)

    with open(receipts_file, 'r') as file:
        tx_receipts = json.load(file)

    block_numbers = [tx["blockNumber"] for tx in tx_receipts if tx is not None]
    
    print("Loaded block numbers and block data...")

    print(len(block_numbers))
    print(len(blocks_data['block_number']))
    
    missing_blocks = set(block_numbers) - set(hex(block) for block in blocks_data['block_number'] if block is not None)

    print(f"Total missing blocks: {len(missing_blocks)}")

    with open(blocks_file, 'a') as blocks_file:
        for missing_block in missing_blocks:
            line = get_block_data(missing_block, errors_file)
            blocks_file.write(line)

In [33]:
retrieve_missing_blocks('logs-11Jan-14Dec')

Loaded block numbers and block data...
35966
35965
Total missing blocks: 1


For some reason, there was data being left out when using the Blockdaemon's Universal API. So we extracted transactions manually 
from Etherscan

In [22]:
import pandas as pd

txs = pd.read_csv('./data/logs-11Jan-14Dec/missing-data-with-native-eth/0x049b51e531fd8f90da6d92ea83dc4125002f20ef/aggregated.csv', sep=',')

txs = txs[txs['Status'] == 'Success']

txs.to_csv('./data/logs-11Jan-14Dec/missing-data-with-native-eth/0x049b51e531fd8f90da6d92ea83dc4125002f20ef/successful_txs.csv', index=False)

In [23]:
import json

with open("./data/logs-11Jan-14Dec/missing-data-with-native-eth/0x049b51e531fd8f90da6d92ea83dc4125002f20ef/tx_receipts.json", 'r') as f1:
    json1 = json.load(f1)

new_receipts = pd.read_csv("./data/logs-11Jan-14Dec/missing-data-with-native-eth/0x049b51e531fd8f90da6d92ea83dc4125002f20ef/successful_txs.csv")

new_json = []

for tx_receipt in json1:
    if tx_receipt['transactionHash'] in new_receipts['Transaction Hash'].tolist():
        new_json.append(tx_receipt)

print(len(new_json))

with open("./data/logs-11Jan-14Dec/missing-data-with-native-eth/0x049b51e531fd8f90da6d92ea83dc4125002f20ef/successful_txs_receipts.json", "w") as f:
    json.dump(new_json, f)
    

8922


In [25]:
# merge json files

file1 = './data/logs-11Jan-14Dec/unique_tx_receipts.json'
file2 = './data/logs-11Jan-14Dec/missing-data-with-native-eth/0x049b51e531fd8f90da6d92ea83dc4125002f20ef/successful_txs_receipts.json'
output_file = './data/logs-11Jan-14Dec/unique_tx_receipts-2.json'

with open(file1, 'r') as f1, open(file2, 'r') as f2:
    json1 = json.load(f1)
    json2 = json.load(f2)

merged_json = json1 + json2

with open(output_file, 'w') as f:
    json.dump(merged_json, f)

print(f"Number of transaction receipts in {file1}: {len(json1)}")
print(f"Number of transaction receipts in {file2}: {len(json2)}")
print(f"Number of transaction receipts in the merged file: {len(merged_json)}")

Number of transaction receipts in ./data/logs-11Jan-14Dec/unique_tx_receipts.json: 35967
Number of transaction receipts in ./data/logs-11Jan-14Dec/missing-data-with-native-eth/0x049b51e531fd8f90da6d92ea83dc4125002f20ef/successful_txs_receipts.json: 8922
Number of transaction receipts in the merged file: 44889


In [27]:
input_file = './data/logs-11Jan-14Dec/unique_tx_receipts-2.json'
remove_duplicates(input_file, "logs-11Jan-14Dec")

Unique receipts saved to ./data/logs-11Jan-14Dec/unique_tx_receipts-new.json
Removed 4229 duplicate receipts
Total receipts: 44889
Total unique receipts: 40660


In [32]:
with open("./data/logs-11Jan-14Dec/unique_tx_receipts-new.json", 'r') as f1, open(file1, 'r') as f2:
    json1 = json.load(f1)
    json2 = json.load(f2)


print(len(json1))
print(len(json2))

newly_added_tx_receipts = [tx_receipt for tx_receipt in json1 if tx_receipt not in json2]
print(len(newly_added_tx_receipts))

with open("./data/logs-11Jan-14Dec/missing-txs.json", 'w') as f:
    json.dump(newly_added_tx_receipts, f)

40660
35967
4693


In [58]:
blocks_file = f'./data/logs-11Jan-14Dec/blocks.csv'
receipts_file = f'./data/logs-11Jan-14Dec/missing-txs.json'
errors_file = f'./data/logs-11Jan-14Dec/errors_2.txt'

blocks_data = pd.read_csv(blocks_file)

with open(receipts_file, 'r') as file:
    tx_receipts = json.load(file)

block_numbers = [tx["blockNumber"] for tx in tx_receipts if tx is not None]

print("Loaded block numbers and block data...")

print(len(block_numbers))
print(len(blocks_data['block_number']))

missing_blocks = set(block_numbers) - set(hex(block) for block in blocks_data['block_number'] if block is not None)

print(f"Total missing blocks: {len(missing_blocks)}")

with open(blocks_file, 'a') as blocks_file:
    with concurrent.futures.ThreadPoolExecutor(max_workers=10) as executor:
        # Submit tasks for each block in the range
        futures = {executor.submit(get_block_data, missing_block, errors_file): missing_block for missing_block in missing_blocks}

        # Process the completed tasks and write to the file
        for future in concurrent.futures.as_completed(futures):
            block_number = futures[future]
            try:
                blocks_data = future.result()
                blocks_file.write(blocks_data)
            except Exception as e:
                print(f"Error processing block {block_number}: {e}")

Loaded block numbers and block data...
4693
35966
Total missing blocks: 4376


There is still missing data for deposits of WGLMR. So we extracted transactions manually from Etherscan as well

In [None]:
import pandas as pd

txs = pd.read_csv('./data/logs-11Jan-14Dec/missing-data-wglmr/0xba8d75baccc4d5c4bd814fde69267213052ea663/aggregated.csv', sep=',')

txs = txs[txs['Status'] == 'Success']

txs.to_csv('./data/logs-11Jan-14Dec/missing-data-wglmr/0xba8d75baccc4d5c4bd814fde69267213052ea663/successful_txs.csv', index=False)

In [7]:
# merge json files

file1 = './data/logs-11Jan-14Dec/unique_tx_receipts-new.json'
file2 = './data/logs-11Jan-14Dec/missing-data-wglmr/0xba8d75baccc4d5c4bd814fde69267213052ea663/tx_receipts.json'
output_file = './data/logs-11Jan-14Dec/unique_tx_receipts-3.json'

with open(file1, 'r') as f1, open(file2, 'r') as f2:
    json1 = json.load(f1)
    json2 = json.load(f2)

merged_json = json1 + json2

with open(output_file, 'w') as f:
    json.dump(merged_json, f)

print(f"Number of transaction receipts in {file1}: {len(json1)}")
print(f"Number of transaction receipts in {file2}: {len(json2)}")
print(f"Number of transaction receipts in the merged file: {len(merged_json)}")

Number of transaction receipts in ./data/logs-11Jan-14Dec/unique_tx_receipts-new.json: 40660
Number of transaction receipts in ./data/logs-11Jan-14Dec/missing-data-wglmr/0xba8d75baccc4d5c4bd814fde69267213052ea663/tx_receipts.json: 154
Number of transaction receipts in the merged file: 40814


In [11]:
input = './data/logs-11Jan-14Dec/unique_tx_receipts-3.json'
remove_duplicates(input, "logs-11Jan-14Dec")

Unique receipts saved to ./data/logs-11Jan-14Dec/unique_tx_receipts-new-2.json
Removed 0 duplicate receipts
Total receipts: 40814
Total unique receipts: 40814


In [13]:
get_blocks_data('logs-11Jan-14Dec/missing-data-wglmr/0xba8d75baccc4d5c4bd814fde69267213052ea663')

Extracting block number and Unix timestamp from 154 transaction receipts...
Extracted 154 block numbers...
Extracted block number and Unix timestamp to ./data/logs-11Jan-14Dec/missing-data-wglmr/0xba8d75baccc4d5c4bd814fde69267213052ea663/blocks.csv
