
# Zcash Observatory data

Mitchell Krawiec-Thayer and Pranav Thirunavukkarasu

June 2020

Observatory R & D at Insight supported by the Zcash Foundation


## Parameters

In [1]:
path_to_files = '.'

## Import libraries

In [2]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import os;

## Import data

In [3]:
# What are the nearby directories
folders = [x[0] for x in os.walk(path_to_files)]

In [4]:
# global_df = pd.DataFrame()
node_names = list()
first_time = 1
for f in range(len(folders)):
    this_folder_raw = folders[f]
    if not (this_folder_raw[0:3] == './.' or this_folder_raw == '.'):
        node_name = this_folder_raw[2::]
        node_names.append(node_name)
        file_name = os.path.join(this_folder_raw,'inv_v1.csv')
        this_df = pd.read_csv(file_name)
        
        # Some feature engineering
        min_time = this_df.groupby('Hash').min()
        min_time = min_time.rename(columns={"Validated_Time": node_name+"_min", "Peer_IP": "min_IP", "Hash":'block_hash'})
        max_time = this_df.groupby('Hash').max()
        max_time = max_time.rename(columns={"Validated_Time": node_name+"_max", "Peer_IP": "max_IP", "Hash":'block_hash'})
        single_node_combined = pd.concat([min_time, max_time], axis=1)
        single_node_combined[node_name+'_prop_time'] = single_node_combined[node_name+'_max']-single_node_combined[node_name+'_min']
        single_node_combined = single_node_combined.filter(['block_hash', node_name+"_min", node_name+"_max",node_name+"_prop_time"], axis=1)
        
        if first_time:
            global_df = single_node_combined
            first_time = 0
        else:
            global_df = pd.concat([global_df,single_node_combined], axis=1, sort=True)
        

In [10]:
global_df.rename(index={'':'block_hash'})
global_df.head()

Unnamed: 0,mumbai_min,mumbai_max,mumbai_prop_time,virginia_min,virginia_max,virginia_prop_time,london_min,london_max,london_prop_time,africa_min,africa_max,africa_prop_time,global_min,global_max,global_prop_time
000000000000b86df2619ffbde1d3de76bdf1cc9c73b4acc91dccad6f234ec71,1590722000.0,1590722000.0,27.129,1590722000.0,1590722000.0,119.911,1590722000.0,1590722000.0,26.983,1590722000.0,1590722000.0,27.092,1590722000.0,1590722000.0,119.964
000000000000bc65600076a474767b0201c02fac53adb217dcafb8f44a3de6e4,1590754000.0,1590754000.0,12.173,1590754000.0,1590754000.0,12.125,1590754000.0,1590754000.0,12.103,1590754000.0,1590754000.0,8.697,1590754000.0,1590754000.0,12.576
000000000000c32bff2dcb5497f93438548bf10ef1e7ac541b0b0e81a100e1f1,1590774000.0,1590774000.0,61.857,1590774000.0,1590774000.0,187.656,1590774000.0,1590774000.0,61.846,1590774000.0,1590774000.0,162.4,1590774000.0,1590774000.0,187.729
0000000000016608d92021f3056e25742fc063ff7409fbd80f9cf0d290516a16,1590711000.0,1590711000.0,5.704,1590711000.0,1590711000.0,3.02,1590711000.0,1590711000.0,5.696,1590711000.0,1590711000.0,4.58,1590711000.0,1590711000.0,5.745
000000000002dd1be009ee3ee605f3c109ed3cc5514772f1169a7766e94bf77b,1590719000.0,1590720000.0,269.665,1590719000.0,1590719000.0,3.149,1590719000.0,1590720000.0,269.792,1590719000.0,1590719000.0,4.153,1590719000.0,1590720000.0,269.836


## Feature engineering

In [6]:
prop_list = list()
min_list = list()
max_list = list()
for h in range(len(global_df)):
    min_stamps = list()
    max_stamps = list()
    for loc_ind in range(len(node_names)):
        min_val_col = global_df[node_names[loc_ind]+"_min"]
        min_val = min_val_col[h]
        min_stamps.append(min_val)
        max_val_col = global_df[node_names[loc_ind]+"_max"]
        max_val = max_val_col[h]
        max_stamps.append(max_val)
        
    global_min = min(min_stamps)
    min_list.append(global_min)
    global_max = max(max_stamps)
    max_list.append(global_max)
    global_prop = global_max - global_min
    prop_list.append(global_prop)
    
global_df['global_min'] = min_list
global_df['global_max'] = max_list
global_df['global_prop_time'] = prop_list

In [7]:
global_df.head()

Unnamed: 0,mumbai_min,mumbai_max,mumbai_prop_time,virginia_min,virginia_max,virginia_prop_time,london_min,london_max,london_prop_time,africa_min,africa_max,africa_prop_time,global_min,global_max,global_prop_time
000000000000b86df2619ffbde1d3de76bdf1cc9c73b4acc91dccad6f234ec71,1590722000.0,1590722000.0,27.129,1590722000.0,1590722000.0,119.911,1590722000.0,1590722000.0,26.983,1590722000.0,1590722000.0,27.092,1590722000.0,1590722000.0,119.964
000000000000bc65600076a474767b0201c02fac53adb217dcafb8f44a3de6e4,1590754000.0,1590754000.0,12.173,1590754000.0,1590754000.0,12.125,1590754000.0,1590754000.0,12.103,1590754000.0,1590754000.0,8.697,1590754000.0,1590754000.0,12.576
000000000000c32bff2dcb5497f93438548bf10ef1e7ac541b0b0e81a100e1f1,1590774000.0,1590774000.0,61.857,1590774000.0,1590774000.0,187.656,1590774000.0,1590774000.0,61.846,1590774000.0,1590774000.0,162.4,1590774000.0,1590774000.0,187.729
0000000000016608d92021f3056e25742fc063ff7409fbd80f9cf0d290516a16,1590711000.0,1590711000.0,5.704,1590711000.0,1590711000.0,3.02,1590711000.0,1590711000.0,5.696,1590711000.0,1590711000.0,4.58,1590711000.0,1590711000.0,5.745
000000000002dd1be009ee3ee605f3c109ed3cc5514772f1169a7766e94bf77b,1590719000.0,1590720000.0,269.665,1590719000.0,1590719000.0,3.149,1590719000.0,1590720000.0,269.792,1590719000.0,1590719000.0,4.153,1590719000.0,1590720000.0,269.836
