In [None]:
import sys
import pymongo
import psycopg2
import json

print(f'Python Version: {sys.version}')
print(f'Pymongo Version: {pymongo.__version__}')
print(f'Psycopg2 Version: {psycopg2.__version__}')
print(f'Json Version: {json.__version__}')

In [None]:
url = "mongodb://etlReaderAnalysis:etl_reader_analysis__Gr2rEVBXyPWzIrP@34.126.84.83:27017,34.142.204.61:27017,34.142.219.60:27017"
blocks_and_transaction_mongo_db_client = pymongo.MongoClient(url)

In [None]:
url = "mongodb://klgReaderAnalysis:klgReaderAnalysis_4Lc4kjBs5yykHHbZ@35.198.222.97:27017,34.124.133.164:27017,34.124.205.24:27017"
knowledge_graph_mongo_db_client = pymongo.MongoClient(url)

In [None]:
transferring_events_postgresql_connection = psycopg2.connect(dbname="postgres", user="student_token_transfer", password="svbk_2023", host="34.126.75.56", port="5432")

General settings being used throughout the notebook

In [None]:
SETTINGS = {
    'BLOCKCHAINS': ['ethereum_blockchain_etl', 'blockchain_etl'],
    'BLOCKCHAIN_TO_CHAIN_MAP': {
        'ethereum_blockchain_etl': 'chain_0x1',
        'blockchain_etl': 'chain_0x38'
    },
    'MIN_BALANCE_IN_USD' : 250_000,
    'WALLET_LIMIT': 10
}

Get native token price change log and store it in a json file

In [None]:
query = { '_id' : '0x1_0x0000000000000000000000000000000000000000' }

knowledge_graph_db = knowledge_graph_mongo_db_client['knowledge_graph']

price_change_logs_native_ethereum = knowledge_graph_db.smart_contracts.find(query)[0]
price_change_logs_native_ethereum = price_change_logs_native_ethereum['priceChangeLogs']

with open('./data/native_token_price_change_logs.json', 'w') as file:
    json.dump(price_change_logs_native_ethereum, file)

Get all the incoming transactions for a specified wallet_address. 

Only query transcations from in SETTINGS.BLOCKCHAINS defined blockchains

In [None]:
def getIncomingTransactionsForWalletAddress(wallet_address: str) -> map: 
    query = {
        'to_address': wallet_address
    }
    blockchain_transaction_mapping = {}
    for blockchain in SETTINGS['BLOCKCHAINS']:
        blockchain_db = blocks_and_transaction_mongo_db_client[blockchain]
        transactions = list(blockchain_db.transactions.find(query))
        blockchain_transaction_mapping[blockchain] = transactions
    return blockchain_transaction_mapping

Get all the outgoing transactions for a specified wallet_address. 

Only query transcations from in SETTINGS.BLOCKCHAINS defined blockchains

In [None]:
def getOutgoingTransactionsForWalletAddress(wallet_address: str) -> map: 
    query = {
        'from_address': wallet_address
    }
    blockchain_transaction_mapping = {}
    for blockchain in SETTINGS['BLOCKCHAINS']:
        blockchain_db = blocks_and_transaction_mongo_db_client[blockchain]
        transactions = list(blockchain_db.transactions.find(query))
        blockchain_transaction_mapping[blockchain] = transactions
    return blockchain_transaction_mapping

Main wallet crawling

Amount of wallets being crawled is defined in SETTINGS.WALLET_LIMIT
Min threshold for balance in USD is defined in SETTINGS.MIN_BALANCE_IN_USD

All crawled wallets are being stored in a json file with their corresponding outgoing and incoming transactions 

In [None]:
ALL_TRANSACTION_HASHES = {
    'ethereum_blockchain_etl': [],
    'blockchain_etl': []
}
WALLET_ADDRESSES_QUERIES = []
ALL_WALLETS = {}

query = {'balanceInUSD': {'$gt': SETTINGS['MIN_BALANCE_IN_USD']}, 'dailyAllTransactions': {'$exists':  "true"}, 'balanceChangeLogs': {'$exists':  "true"}}

knowledge_graph_db = knowledge_graph_mongo_db_client['knowledge_graph']

print('Crawling wallets with transaction information')

for wallet in knowledge_graph_db.wallets.find(query).limit(SETTINGS['WALLET_LIMIT']):
    wallet_address = wallet['address']
    WALLET_ADDRESSES_QUERIES.append(wallet_address)

    wallet['incoming_transactions'] = getIncomingTransactionsForWalletAddress(wallet_address)

    for blockchain in wallet['incoming_transactions']:
        for ts in wallet['incoming_transactions'][blockchain]:
            ALL_TRANSACTION_HASHES[blockchain].append(ts['hash'])
    wallet['outgoing_transactions'] = getOutgoingTransactionsForWalletAddress(wallet_address)

    for blockchain in wallet['outgoing_transactions']:
        for ts in wallet['outgoing_transactions'][blockchain]:
            ALL_TRANSACTION_HASHES[blockchain].append(ts['hash'])

    ALL_WALLETS[wallet_address] = wallet

with open(f'./data/wallets.json', 'w') as file:
        json.dump(ALL_WALLETS, file)

print(f'Queried wallets: {WALLET_ADDRESSES_QUERIES}')
print('Finished crawling wallets with transaction information')

For all transcations being crawled get corresponding transferring events and store them in a json file

In [None]:
print(f'Crawling transferring events for transactions in: {ALL_TRANSACTION_HASHES}')

ALL_TRANSFERRING_EVENTS = {
    'ethereum_blockchain_etl': [],
    'blockchain_etl': []
}
ALL_SMART_CONTRACTS_ADDRESSES = []

for blockchain in SETTINGS['BLOCKCHAINS']: 
    if len(ALL_TRANSACTION_HASHES[blockchain]) == 0: 
        continue
    chainID = SETTINGS['BLOCKCHAIN_TO_CHAIN_MAP'][blockchain]
    transferring_events_query = f"SELECT * FROM {chainID}.token_transfer WHERE transaction_hash IN %s"
    cursor = transferring_events_postgresql_connection.cursor()
    cursor.execute(transferring_events_query, (tuple(ALL_TRANSACTION_HASHES[blockchain]),))
    result = cursor.fetchall()
    ALL_SMART_CONTRACTS_ADDRESSES = ALL_SMART_CONTRACTS_ADDRESSES + [ item[0] for item in result ]
    ALL_TRANSFERRING_EVENTS[blockchain] = result

with open(f'./data/transferring_events.json', 'a') as file:
    json.dump(ALL_TRANSFERRING_EVENTS, file)

print(f'Queried transferring events: {ALL_TRANSFERRING_EVENTS}')
print('Finished crawling transferring events')

For each transferring events being crawled get corresponding smart contracts and store them in a json file

In [None]:
print(f'Crawling smart contracts for smart contract addresses in: {ALL_SMART_CONTRACTS_ADDRESSES}')

knowledge_graph_db = knowledge_graph_mongo_db_client['knowledge_graph']

query = {
    'address': { '$in': ALL_SMART_CONTRACTS_ADDRESSES }
}

smart_contracts = list(knowledge_graph_db.smart_contracts.find(query))
smart_contracts = {
    'smart_contracts': smart_contracts
}
with open('./data/smart_contracts.json', 'a') as file:
    json.dump(smart_contracts, file)   

print(f'Queried smart contracts: {smart_contracts}')
print('Finished crawling smart contracts')