# Zcash Observatory data

Mitchell Krawiec-Thayer and Pranav Thirunavukkarasu

April 2020

Observatory R & D at [Insight](https://www.insightconsensus.com) supported by the [Zcash Foundation](https://grants.zfnd.org/proposals/21786689)

## Analysis Parameters

In [1]:
path_to_block_data = 'blocks_20200408.csv'

## Import libraries

In [2]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

## Import data

In [3]:
df = pd.read_csv('blocks_20200408.csv') # Import to data frame
df.describe() # High level summary of the data set

Unnamed: 0,block_height,miner_time,valid_time
count,10281.0,10281.0,10281.0
mean,781900.051649,1585770000.0,1585770000000.0
std,3434.295589,258722.9,258750000.0
min,776372.0,1585354000.0,1585354000000.0
25%,778941.0,1585547000.0,1585547000000.0
50%,781506.0,1585741000.0,1585741000000.0
75%,785262.0,1586023000.0,1586024000000.0
max,787835.0,1586218000.0,1586219000000.0


In [4]:
df.head() # Take a peeke to make sure it imported correctly

Unnamed: 0,block_height,block_hash,prev_hash,miner_target,miner_time,valid_time,transaction_hashes,inv_timestamps
0,777350.0,00000000014a976f53c53c63a2d52c0246e4c9ecf260b9...,0000000001afacebda04eb4ef20b46b34715d042473409...,0000000002ffff00000000000000000000000000000000...,1585428000.0,1585428000000.0,269090c8fa46f72c3090803a7ec5231d3e3f73c0035f3d...,1585427780187;1585427780195;1585427780309;1585...
1,776540.0,00000000016dbb649d01642fb8603d9cfb3337a38c3c36...,00000000008855524979caa571b3fe7031d3e769eb6190...,0000000002fc4d00000000000000000000000000000000...,1585366000.0,1585366000000.0,bd093b58b615184b82c4d7d4c1395cf0e6477470830fa3...,1585366448977;1585366448944;1585366449144;1585...
2,777066.0,00000000007826f5d75ea2268aa6703ff3087a1419daa7...,00000000007b4d4a898c18c291243e39998e52bcf67795...,0000000002e0d600000000000000000000000000000000...,1585406000.0,1585406000000.0,09821279cf3b13cac66fe4b6e57f3778f4621cee083a02...,1585406005599;1585406005554;1585406005524;1585...
3,777264.0,00000000031fac7a7d902a87dc887375b83fd64466304a...,00000000002fc315a0c67eb4b1c26e7c8b6f5055e3f1cf...,00000000033fdd00000000000000000000000000000000...,1585422000.0,1585422000000.0,0e61aaff225ead8a7d175853a2a9c161e1765bae525bbf...,1585421660806;1585421660865;1585421660793;1585...
4,777150.0,0000000002a6698519dc8abf12e82d2d74e4d4518243d4...,0000000000bb617c3c9603ba991dce9feb5df9265697ed...,0000000002c29f00000000000000000000000000000000...,1585412000.0,1585412000000.0,a08776d8cdeafbc4f675fcaf5a9dd73bc509249fff6d25...,1585412301058;1585412301031;1585412301126;1585...


## Feature Engineering

In [5]:
fork_heights = set()
dupe_count_buffer = []
apparent_latency_buffer = []
peer_count_buffer = []
prop_envelope_buffer = []

for r in range(len(df)):
    
    # Grab height
    this_height = df.iloc[r].block_height
    
    # How many versions were received at this_height?
    this_count = len(df[df['block_height'] == this_height])
    dupe_count_buffer.append(this_count)
    if this_count > 1:
        fork_heights.add(int(this_height)) # note this makes duplicates, so remember to unique() later

    missed_block = 0
    raw_timestamps = df.iloc[r].inv_timestamps
    # If multiple peers, inv_timestamps is parsed as a string, and must be split into list of integers
    if isinstance(raw_timestamps, str):
        peer_timestamps = [int(i) for i in raw_timestamps.split(';')] # split up the timestamps
        peer_count_buffer.append(int(len(peer_timestamps))) # record peer count
        prop_envelope_buffer.append(np.max(peer_timestamps)-np.min(peer_timestamps)) # add lower prop time
    else:
        if np.isnan(raw_timestamps):
            # Did not catch this live from any peers
            peer_timestamps = np.nan 
            peer_count_buffer.append(np.nan)
            prop_envelope_buffer.append(np.nan)
            missed_block = 1
        else:
            # Caught one copy, parsed as a float
            peer_timestamps = int(raw_timestamps)
            peer_count_buffer.append(1)
            prop_envelope_buffer.append(np.nan)

    # Difference between miner-reported (spoofable) timestamp in the block, and when the observatory node received it
    if not missed_block: 
        apparent_latency_buffer.append(int(df.iloc[r].valid_time - 1000*df.iloc[r].miner_time))
    else:
        apparent_latency_buffer.append(np.nan)
    
            
df['dupe_count'] = dupe_count_buffer
df['apparent_latency_ms'] = apparent_latency_buffer
df['peer_count'] = peer_count_buffer
df['prop_envelope_ms'] = prop_envelope_buffer
df.head()

Unnamed: 0,block_height,block_hash,prev_hash,miner_target,miner_time,valid_time,transaction_hashes,inv_timestamps,dupe_count,apparent_latency_ms,peer_count,prop_envelope_ms
0,777350.0,00000000014a976f53c53c63a2d52c0246e4c9ecf260b9...,0000000001afacebda04eb4ef20b46b34715d042473409...,0000000002ffff00000000000000000000000000000000...,1585428000.0,1585428000000.0,269090c8fa46f72c3090803a7ec5231d3e3f73c0035f3d...,1585427780187;1585427780195;1585427780309;1585...,1,48276.0,9.0,122.0
1,776540.0,00000000016dbb649d01642fb8603d9cfb3337a38c3c36...,00000000008855524979caa571b3fe7031d3e769eb6190...,0000000002fc4d00000000000000000000000000000000...,1585366000.0,1585366000000.0,bd093b58b615184b82c4d7d4c1395cf0e6477470830fa3...,1585366448977;1585366448944;1585366449144;1585...,1,28106.0,6.0,200.0
2,777066.0,00000000007826f5d75ea2268aa6703ff3087a1419daa7...,00000000007b4d4a898c18c291243e39998e52bcf67795...,0000000002e0d600000000000000000000000000000000...,1585406000.0,1585406000000.0,09821279cf3b13cac66fe4b6e57f3778f4621cee083a02...,1585406005599;1585406005554;1585406005524;1585...,1,11679.0,9.0,194.0
3,777264.0,00000000031fac7a7d902a87dc887375b83fd64466304a...,00000000002fc315a0c67eb4b1c26e7c8b6f5055e3f1cf...,00000000033fdd00000000000000000000000000000000...,1585422000.0,1585422000000.0,0e61aaff225ead8a7d175853a2a9c161e1765bae525bbf...,1585421660806;1585421660865;1585421660793;1585...,1,29854.0,10.0,143.0
4,777150.0,0000000002a6698519dc8abf12e82d2d74e4d4518243d4...,0000000000bb617c3c9603ba991dce9feb5df9265697ed...,0000000002c29f00000000000000000000000000000000...,1585412000.0,1585412000000.0,a08776d8cdeafbc4f675fcaf5a9dd73bc509249fff6d25...,1585412301058;1585412301031;1585412301126;1585...,1,16196.0,9.0,231.0


## Visualizations