In [None]:
from dateutil import parser
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
import matplotlib.ticker as ticker

def format_seconds_to_hms(x, pos):
    hours = int(x // 3600)
    minutes = int((x % 3600) // 60)
    #seconds = int(x % 60)
    return f"{hours:02}:{minutes:02}"

# [db disk size, live data disk size]
db_size_before = [269.3, 268.4]
db_size_after = [269, 268.9]

# timestamps = [ 
#     parser.isoparse(t) for t in [
#         "2025-04-12T17:42:26.839077Z", 
#         "2025-04-12T19:14:42.767906Z", 
#         "2025-04-12T21:14:50.559037Z", 
#         "2025-04-12T23:45:39.764118Z", 
#         "2025-04-14T18:47:04.191614Z", 
#         "2025-04-15T04:43:33.471736Z",
#         "2025-04-15T08:05:19.101426Z"
#     ]
# ]

timestamps = [ 
    parser.isoparse(t) for t in [
        "2025-05-29T07:02:58.364888Z",
        "2025-05-29T08:41:46.010495Z",
        "2025-05-29T10:47:09.806158Z",
        "2025-05-29T13:22:47.045034Z",
        "2025-05-30T16:24:04.835977Z",
        "2025-05-30T20:15:31.180192Z",
        "2025-05-30T22:49:32.019318Z"
    ]
]

db_timestamps = [ 
    parser.isoparse(t) for t in [
        "2025-05-31T09:42:35.597944Z", 
        "2025-05-31T09:43:13.493703Z", 
        "2025-05-31T09:47:58.264654Z", 
        "2025-05-31T13:08:09.070503Z", 
        "2025-05-31T13:26:11.908590Z",
        "2025-05-31T13:30:53.319793Z"
    ]
]

verify_timestamps = [ 
    parser.isoparse(t) for t in [
        "2025-05-31T13:30:53.319822Z", 
        "2025-05-31T13:32:02.187191Z", 
        "2025-05-31T13:37:06.205567Z", 
        "2025-05-31T19:47:07.857120Z", 
        "2025-05-31T19:51:54.342540Z", 
        "2025-05-31T19:53:13.971202Z"
    ]
]

new_timestamps = [ 
    parser.isoparse(t) for t in [
        "2025-05-25T13:40:53.824740Z", 
        "2025-05-25T15:18:10.188536Z", 
        "2025-05-25T17:25:17.486057Z", 
        "2025-05-25T20:18:57.448569Z", 
        "2025-05-26T23:13:08.831277Z", 
        "2025-05-27T03:02:16.977734Z",
        "2025-05-27T05:33:27.581280Z"
    ]
]

start = timestamps[0]
db_start = db_timestamps[0]
verify_start = verify_timestamps[0]
new_start = new_timestamps[0]

deltas = [(t - start).total_seconds() for t in timestamps]
db_deltas = [((t - db_start).total_seconds() + (v - verify_start).total_seconds()) for (t, v) in zip(db_timestamps, verify_timestamps)]
new_deltas = [(t - new_start).total_seconds() for t in new_timestamps]

block_count = [0, 500_000, 1_000_000, 1_500_000, 2_000_000, 2_500_000, 2_942_449]
db_block_count = [903_000, 1_000_000, 1_500_000, 2_000_000, 2_500_000, 2_942_861]
new_block_count = [0, 500_000, 1_000_000, 1_500_000, 2_000_000, 2_500_000, 2_938_181]

fig, ax = plt.subplots(figsize=(8, 6))
ax.plot(block_count, deltas, marker='o', linestyle='-', label="Full sync (old format)")
ax.plot(new_block_count, new_deltas, marker='o', linestyle='-', label="Full sync (new format)")
ax.plot(db_block_count, db_deltas, marker='o', linestyle='-', label="db format upgrade")
ax.xaxis.set_major_formatter(ticker.StrMethodFormatter('{x:,.0f}'))
ax.yaxis.set_major_formatter(ticker.FuncFormatter(format_seconds_to_hms))
ax.yaxis.set_major_locator(ticker.MultipleLocator(10800))
ax.legend()
ax.grid(True)
plt.xlabel("Block height")
plt.ylabel("Time elapsed")
plt.tight_layout()
plt.show()

In [None]:
from zcash_client import ZcashClient
from flyclient import FlyclientProof
import os

client = ZcashClient.from_conf('zcash.conf')
# ACTIVATION_HEIGHT = 903000
START_HEIGHT = 904000
CHAINTIP = 3000000

# Zcash hourly number of blocks mined
Dh = 48
# Estimated cost of a 51% attack on Zcash per hour
C51h = 2624 # USD

bandwidth_cols = ['height', 'cache_nodes', 'derive_parents', 'size']
attack_cols = ['height', 'c', 'L', 'size', 'attack_cost']

if not os.path.isfile('experiments/bandwidth_cost.csv'):
    bandwidth_df = pd.DataFrame(columns=bandwidth_cols)  
    for h in range(START_HEIGHT, CHAINTIP, 50000):
        for cache_nodes in [True, False]:
            for derive_parents in [True, False]:
                for i in range(30):
                    proof = FlyclientProof(client, override_chain_tip=h, enable_logging=False)
                    bandwidth_df.loc[-1] = {
                            'height': h,
                            'cache_nodes': cache_nodes,
                            'derive_parents': derive_parents,
                            'size': proof.calculate_total_download_size_bytes(cache_nodes=cache_nodes, derive_parents=derive_parents),
                        }
                    bandwidth_df.index = bandwidth_df.index + 1
                    bandwidth_df.sort_index()
    bandwidth_df.to_csv('experiments/bandwidth_cost.csv', index=False)
else:
    bandwidth_df = pd.read_csv('experiments/bandwidth_cost.csv')

bandwidth_df

In [None]:
if not os.path.isfile('experiments/attack_cost.csv'):
    attack_df = pd.DataFrame(columns=attack_cols)  
    for h in range(START_HEIGHT, CHAINTIP, 50000):
        for c in [0.2, 0.35, 0.5, 0.65, 0.8]:
            for L in [100, Dh * 3, Dh * 4, Dh * 5]:
                for i in range(30):
                    proof = FlyclientProof(client, override_chain_tip=h, enable_logging=False, c = c, L = L)
                    attack_df.loc[-1] = {
                            'height': h,
                            'c': c,
                            'L': L,
                            'size': proof.calculate_total_download_size_bytes(),
                            'attack_cost': None
                        }
                    attack_df.index = attack_df.index + 1
                    attack_df.sort_index()
    attack_df['attack_cost'] = np.around(C51h * (attack_df['c'] * attack_df['L'] / Dh)).astype(int)
    attack_df.to_csv('experiments/attack_cost.csv', index=False)
else:
    attack_df = pd.read_csv('experiments/attack_cost.csv')

attack_df

In [None]:
import seaborn as sns

SCALING_FACTOR = 2**20
HEADER_SIZE = 4 + 32 + 32 + 32 + 4 + 4 + 32 + 3 + 1344

scaled_df = bandwidth_df.copy()
scaled_df['size'] /= SCALING_FACTOR

def apply_optimization(row):
    if row['cache_nodes'] == True:
        if row['derive_parents'] == True:
            return 'Both'
        else:
            return 'Cache nodes'
    else:
        if row['derive_parents'] == True:
            return 'Derive parents'
        else:
            return 'None'

scaled_df['optimization'] = scaled_df.apply(apply_optimization, axis=1)

plt.figure(figsize=(10, 6))

# sns.scatterplot(data=scaled_df.query("cache_nodes==False and derive_parents==False"), x='height', y='size', color='lightblue', alpha=0.6, label='Proof size')
sns.lineplot(data=scaled_df, x='height', y='size', style='optimization', hue='optimization', errorbar=('ci', 95), err_style='bars', markers=True)
plt.axvline(903000, color='green', label='Heartwood', linestyle='-')
plt.axvline(1046400, color='green', label='Canopy', linestyle='--')
plt.axvline(1687104, color='green', label='NU5', linestyle='-.')
plt.axvline(2726400, color='green', label='NU6', linestyle = ':')

plt.xlabel("Chain length (million blocks)")
plt.ylabel("Proof size (MiB)")
plt.title("Proof size by chain length (c = 0.5, L = 100)")

# plt.xticks(df.columns, rotation=45)
ax = plt.gca()
ax.set_xlim(0, CHAINTIP)
ax.locator_params(nbins=50, axis='x')

plt.tight_layout()
plt.legend()
plt.grid()
plt.show()

In [None]:
plt.figure(figsize=(10, 6))
x_vals = np.arange(3_000_000)
y_vals = (HEADER_SIZE * x_vals)

plt.plot(x_vals, y_vals, color='red', label='SPV client')
#sns.scatterplot(data=bandwidth_df.query("c==0.5 and L==100"), x='height', y='size', color='lightblue', alpha=0.6, label='Proof size')
sns.lineplot(data=bandwidth_df.query("cache_nodes==True and derive_parents==True"), x='height', y='size', errorbar=('ci', 95), err_style='bars', markers=True, label='Flyclient')
plt.axvline(903000, color='green', label='Heartwood', linestyle='-')
plt.axvline(1046400, color='green', label='Canopy', linestyle='--')
plt.axvline(1687104, color='green', label='NU5', linestyle='-.')
plt.axvline(2726400, color='green', label='NU6', linestyle = ':')

plt.xlabel("Chain length (million blocks)")
plt.ylabel("Proof size (bytes)")
plt.title("Proof size by chain length (c = 0.5, L = 100)")

# plt.xticks(df.columns, rotation=45)
ax = plt.gca()
ax.set_yscale('log', base=2)
ax.set_xlim(0, CHAINTIP)
ax.locator_params(nbins=50, axis='x')

plt.tight_layout()
plt.legend()
plt.grid()
plt.show()

In [None]:
scaled_df = attack_df.copy()
scaled_df['size'] /= SCALING_FACTOR

plt.figure(figsize=(10, 6))

sns.lineplot(data=scaled_df, x='height', y='size', style='c', size='L', hue='L', errorbar=('ci', 95), err_style='bars', markers=False, palette='crest')
plt.axvline(903000, color='green', linestyle='-')

plt.xlabel("Chain length (million blocks)")
plt.ylabel("Proof size (MiB)")
plt.title("Proof size by chain length and (c, L) values")

ax = plt.gca()
ax.set_xlim(0, CHAINTIP)
ax.locator_params(nbins=50, axis='x')

plt.tight_layout()
plt.legend()
plt.grid()
plt.show()

In [None]:
g = sns.catplot(data=scaled_df, x='c', col='L', hue='L', y='attack_cost', kind='bar', palette='flare', estimator='average', height=4, aspect=.6)

g.set_axis_labels("c", "Attack cost (USD)")
sns.set_style('whitegrid')

plt.show()

In [None]:
df = scaled_df.copy()
df = df.groupby(['c', 'L']).mean()
df['cost_ratio'] = df['attack_cost']/df['size']

g = sns.catplot(data=df, x='c', col='L', hue='L', y='cost_ratio', kind='bar', palette='flare', estimator='average', height=4, aspect=.6)

g.set_axis_labels("c", "Attack cost/proof size ratio (USD/MiB)")
sns.set_style('whitegrid')

plt.show()