In [1]:
# Imports
from google.cloud import storage
import os
import json
import subprocess 
from pathlib import Path
from glob import glob 
import time
import sys
import pyrebase

In [2]:
# Synchronize local block collection with Google Cloud bucket
bucket_name = "mina_mainnet_blocks"
output_dir = "mina_mainnet_blocks"
def download_blocks():
    Path(output_dir).mkdir(parents=True, exist_ok=True)
    command = f"gsutil -m rsync -r gs://{bucket_name} {output_dir}"
    print(f"Running command: {command}")
    sync_blocks = subprocess.run(command.split(), stdout=subprocess.PIPE, text=True)

    return glob(output_dir + "/*")


start = time.time()
files = download_blocks()
end = time.time()
print(f"{len(files)} Blocks Synchronized in {end-start} seconds")

Running command: gsutil -m rsync -r gs://mina_mainnet_blocks mina_mainnet_blocks
22937 Blocks Synchronized in 159.5471749305725 seconds


In [3]:
# Load into memory blocks
# This will also trim down block content
# If you're looking for something in a block and can't find it, this is (probably) why
def load_blocks(block_file_list):
    blocks = {}
    
    for file in block_file_list:
        state_hash = file.split("-")[1].split(".")[0]
        with open(file, "r", encoding = "ISO-8859-1") as json_file:
            contents = json_file.read()
            block = json.loads(contents)
            
            # Data is trimmed here, key structure left intact 
            # Kinda messy, but saves a LOT of memory and a fair bit of process time
            timestamp = block["scheduled_time"]
            previous_state_hash = block["protocol_state"]["previous_state_hash"]
            creator = block["protocol_state"]["body"]["consensus_state"]["block_creator"]
            height = block["protocol_state"]["body"]["consensus_state"]["global_slot_since_genesis"]
            charged = block["protocol_state"]["body"]["consensus_state"]["supercharge_coinbase"]
            block.clear()
            block["protocol_state"] = {}
            block["protocol_state"]["body"] = {}
            block["protocol_state"]["body"]["consensus_state"] = {}
            block["scheduled_time"] = timestamp
            block["protocol_state"]["previous_state_hash"] = previous_state_hash
            block["protocol_state"]["body"]["consensus_state"]["block_creator"] = creator
            block["protocol_state"]["body"]["consensus_state"]["global_slot_since_genesis"] = height
            block["protocol_state"]["body"]["consensus_state"]["supercharge_coinbase"] = charged

            blocks[state_hash] = block
            
    return blocks

start = time.time()
blocks = load_blocks(files)
end = time.time()
print(f"Parsed {len(blocks.keys())} Blocks in {end-start} seconds")

Parsed 22647 Blocks in 127.98336863517761 seconds


In [4]:
# Generate node mapping
from graph_tool import Graph
from graph_tool.draw import graph_draw, graphviz_draw, arf_layout, fruchterman_reingold_layout, sfdp_layout
g = Graph()
vertices = {}
# Retain hash data in vertex for later lookup
vertex_hash = g.new_vertex_property('string')

for state_hash in __builtins__.list(blocks.keys()): 
    block = blocks[state_hash]
    previous_state_hash = block["protocol_state"]["previous_state_hash"]
    # Add a node for this block
    if state_hash not in vertices:
        vertices[state_hash] = g.add_vertex()
        vertex_hash[vertices[state_hash]] = state_hash
    if previous_state_hash not in vertices: 
        vertices[previous_state_hash] = g.add_vertex()
        vertex_hash[vertices[previous_state_hash]] = previous_state_hash
    g.add_edge(vertices[state_hash], vertices[previous_state_hash])

In [5]:
# Determine node coordinates for visual display
# Force-Directed Layout
pos = sfdp_layout(g, p=4)

In [6]:
# Determine where the "endpoint" vertices are in our node network (forks + canonical endpoint)
in_degrees = g.get_in_degrees(g.get_vertices())
endpoints = set()
for vertex, in_degree in enumerate(in_degrees):
    if in_degree == 0:
        endpoints.add(vertex)

In [7]:
# For each endpoint, do checks to determine which is canonical
canonical = ('',0)

# Check #1: blockHeight comparison via block data
def heightCompare(blockList):
    longest = 0
    for index, block in enumerate(blockList):
        length = int(blocks[vertex_hash[block]]["protocol_state"]["body"]["consensus_state"]["global_slot_since_genesis"])
        if (length > longest):
            longest = length
            chainResult = (vertex_hash[block], block)
    return chainResult
        
# Check #2: Manually counting back each endpoint through the blockchain to find longest chain :V
# Essentually manually verifying the data stored in the blockchain - guaranteed to work, but SLOW
def crawl(block, count):
    count += 1
    node = g.vertex(block)
    if(sum(1 for neighbor in node.out_neighbors()) == 0):
        return count
    for neighbor in node.out_neighbors():
        return crawl(neighbor, count)

def manualCompare(blockList):
    longest = 0
    for block in blockList:
        length = crawl(block, 0)
        if(length > longest):
            longest = length
            manualResult = (vertex_hash[block], block)
    return manualResult

# Check #3: 

start = time.time()
canonical = heightCompare(endpoints)
end = time.time()
print(f"Found canonical chain using blockchain data at Node {canonical[1]} in {end-start} seconds")

Found canonical chain using blockchain data at Node 7578 in 0.028016328811645508 seconds


In [8]:
# Color our graph in a visually pleasing manner
# This is also used to paint nodes as "canonical" for use in sorting
blue = (0,0,1,1)
red = (1,0,0,1)
green = (0,1,0,1)
vertex_canon = g.new_vertex_property('bool')
vertex_color = g.new_vertex_property('vector<double>')
g.vertex_properties['vertex_canon'] = vertex_canon
g.vertex_properties['vertex_color'] = vertex_color

# Starts at the canonical node and paints anything in the canonical chain green
def colorCrawl(index):
    node = g.vertex(index)
    vertex_color[node] = green
    vertex_canon[node] = True
    if(sum(1 for neighbor in node.out_neighbors()) == 0):
        vertex_color[node] = blue # mark genesis node as blue
        return 0
    for neighbor in node.out_neighbors():
        return colorCrawl(neighbor)

# Paint every node red, then paint canonical nodes green
for v in g.vertices():
    vertex_color[v] = red
    vertex_canon[v] = False
    
# This is a recursive crawl - it's a BIG recursive crawl
# ENTERING THE DANGER ZONE
sys.setrecursionlimit(1000000) # Number must be higher than the canonical chain at all times

start = time.time()
colorCrawl(canonical[1])
end = time.time()
print(f"Recursively painted canonical chain in {end-start} seconds")

# Reset recursion limit
sys.setrecursionlimit(1000)

Recursively painted canonical chain in 0.27088212966918945 seconds


In [9]:
# Sort non-canonical chains into an array of forks for analysis
# 'forks' contains ALL forks
# 'shame' contains only forks longer than 5 blocks within the past seven days
# Remember, all data can be retrieved for a given block in a fork by calling on the list 'blocks'
forks = []
shame = []
last_week = (int(time.time())-604800)*1000
       
def forkCrawl(block, container):
    node = g.vertex(block)
    if vertex_canon[node]:
        forks.append(container)
        return 0
    else:
        container.append(vertex_hash[node])
        for neighbor in node.out_neighbors():
            return forkCrawl(neighbor, container)

for vertex in endpoints:
    container = []
    forkCrawl(vertex, container)
    
for fork in forks:
    for block in fork:
        if (int(blocks[block]["scheduled_time"]) > last_week and len(fork) > 5):
            shame.append(fork)

print(f"Processed {len(forks)} forks total, {len(shame)} of which are from the past week.")

Processed 9972 forks total, 69 of which are from the past week.


In [23]:
# Metadata calculations/analysis
# We now have our forks all nicely organized, and can gather metadata properly now
# Shove it in an array to send to database in next cell
staging = []

# Sort every single fork for database update
for fork in forks:
    forkdata = {}
    forkdata["length"] = len(fork)
    forkdata["blocks"] = []
    forkdata["creators"] = []
    forkdata["rewards"] = 0
    forkdata["latest"] = ['',0]
    for block in fork:
        # Blocks and Creators share an index. Block at index 0 was created by Creator at index 0, etc.
        forkdata["blocks"].append(block)
        forkdata["creators"].append(blocks[block]["protocol_state"]["body"]["consensus_state"]["block_creator"])
        if blocks[block]["protocol_state"]["body"]["consensus_state"]["supercharge_coinbase"]:
            forkdata["rewards"] += 1440
        else:
            forkdata["rewards"] += 720
        # Get most recent fork and use its hash + timestamp for the "ID"
        if int(blocks[block]["scheduled_time"]) > int(forkdata["latest"][1]):
            forkdata["latest"] = [block, blocks[block]["scheduled_time"]]
                
    staging.append(forkdata)
    

In [24]:
# Store all Metadata to Firebase DB:
fireconfig = {
  "apiKey": APIKEY,
  "authDomain": AUTHDOMAIN,
  "databaseURL": DATABASEURL,
  "storageBucket": BUCKET
}
firebase = pyrebase.initialize_app(fireconfig)
auth = firebase.auth()
user = auth.sign_in_with_email_and_password(USERNAME, PASSWORD)
user = auth.refresh(user['refreshToken'])
db = firebase.database()
db_json = db.child("forks").get(user["idToken"]).val()

# Only applies if the database is uninitialized/doesn't exist
# Directly populate the database with all data in the chain
if db_json is None:
    for fork in staging:
        db.child("forks").push(fork, user["idToken"])
    db_json = db.child("forks").get(user["idToken"]).val()
    
start = time.time()
initdb = len(db_json)

# Iterate through database, pruning deprecated content as we go
for unique in db_json:
    prune = True
    for fork in staging:
        if db_json[unique]["latest"] == fork["latest"]:
            prune = False
            break
    if prune:
        db.child("forks").child(unique).remove(user["idToken"])

# Fluff for a nice printout, optional
db_json = db.child("forks").get(user["idToken"]).val()
middb = len(db_json)
end = time.time()
print(f"Pruned {initdb-middb} deprecated forks from the database in {end-start} seconds.")

start = time.time()

# Iterate through forks, adding new content to the database as we go
for fork in staging:
    new_fork = True
    for unique in db_json:
        if fork["latest"] == db_json[unique]["latest"]:
            new_fork = False
            break
    if new_fork:
        db.child("forks").push(fork, user["idToken"])
        
# More fluff
db_json = db.child("forks").get(user["idToken"]).val()
finaldb = len(db_json)
end = time.time()
print(f"Added {(finaldb-middb)-(initdb-middb)} new forks to the database in {end-start} seconds.")

Pruned 0 deprecated forks from the database in 8.08246922492981 seconds.
Added 0 new forks to the database in 9.898189783096313 seconds.
