In [1]:
import clickhouse_connect
import pandas as pd
import polars as pl

pl.Config.set_fmt_str_lengths(200)
pl.Config.set_fmt_float("full")

polars.config.Config

### Direct connect to Cryptohouse
Connect to cryptohouse with client to do some initial querying and result aggregation.


### Limitations
- Can only do 1000 blocks, the direct access limits are still being figured out.


In [2]:
# Create a connection to the ClickHouse server
client = clickhouse_connect.get_client(
    host='crypto-clickhouse.clickhouse.com',
    username='crypto',
    password='',
    secure=True
)

In [3]:
# Execute the query and return as a pandas dataframe
query: str = """
SELECT
  *
FROM
  solana.transactions ARRAY
  JOIN accounts AS account
WHERE
  arrayExists(
    x -> x.1 IN [ 
        'Fc8bpeCMifWYv97pQ3k5xDvd98nuVg6yAaZrwmy4RRp6', -- zeta dex address
        'pythWSnswVUd12oZpeFP8e9CVaEqJg25g1Vtc2biRsT' -- pyth price feed
    ],
    accounts
  )
 // The block_slot range 282,300,000 to 282,900,000 is about 3 days and is a known range where probems were being experienced
#  AND block_slot > 282899000
#  AND block_slot < 282900000
  AND block_timestamp >= now() - INTERVAL 5 MINUTE
  AND account.2 = true -- Filter for signer accounts only
"""

In [4]:
results: pd.DataFrame = client.query_df(query)

In [5]:
df = pl.from_pandas(results)

In [6]:
df.group_by('status', 'log_messages').agg(
    pl.len().alias('count')).sort(by='count', descending=True)

status,log_messages,count
str,list[str],u32
"""0""","[""Program ComputeBudget111111111111111111111111111111 invoke [1]"", ""Program ComputeBudget111111111111111111111111111111 success"", … ""Program ZETAxsqBRek56DhiGXrn75yj2NHU3aYUnxvHXpkf3aD failed: custom program error: 0x1819""]",816
"""1""","[""Program 11111111111111111111111111111111 invoke [1]"", ""Program 11111111111111111111111111111111 success"", … ""Program 11111111111111111111111111111111 success""]",58
"""1""","[""Program HDwcJBJXjL9FpJ7UBsYBtaDjsBUhuLCUYoz3zr8SWWaQ invoke [1]"", ""Program log: Instruction: WriteEncodedVaa"", … ""Program ComputeBudget111111111111111111111111111111 success""]",11
"""0""","[""Program HDwcJBJXjL9FpJ7UBsYBtaDjsBUhuLCUYoz3zr8SWWaQ invoke [1]"", ""Program log: Instruction: WriteEncodedVaa"", … ""Program HDwcJBJXjL9FpJ7UBsYBtaDjsBUhuLCUYoz3zr8SWWaQ failed: custom program error: 0xbbb""]",10
"""1""","[""Program HDwcJBJXjL9FpJ7UBsYBtaDjsBUhuLCUYoz3zr8SWWaQ invoke [1]"", ""Program log: Instruction: WriteEncodedVaa"", … ""Program ComputeBudget111111111111111111111111111111 success""]",8
…,…,…
"""1""","[""Program ComputeBudget111111111111111111111111111111 invoke [1]"", ""Program ComputeBudget111111111111111111111111111111 success"", … ""Program pythWSnswVUd12oZpeFP8e9CVaEqJg25g1Vtc2biRsT success""]",1
"""1""","[""Program ComputeBudget111111111111111111111111111111 invoke [1]"", ""Program ComputeBudget111111111111111111111111111111 success"", … ""Program ZETAxsqBRek56DhiGXrn75yj2NHU3aYUnxvHXpkf3aD success""]",1
"""1""","[""Program ZETAxsqBRek56DhiGXrn75yj2NHU3aYUnxvHXpkf3aD invoke [1]"", ""Program log: Instruction: UpdatePricingV2"", … ""Program ZETAxsqBRek56DhiGXrn75yj2NHU3aYUnxvHXpkf3aD success""]",1
"""1""","[""Program ComputeBudget111111111111111111111111111111 invoke [1]"", ""Program ComputeBudget111111111111111111111111111111 success"", … ""Program ZETAxsqBRek56DhiGXrn75yj2NHU3aYUnxvHXpkf3aD success""]",1


In [7]:
# blocks with highest error rates
df.group_by('block_slot', 'status').agg(pl.len().alias('count')).sort(
    by='count', descending=True).filter(pl.col('status') == "0").head(10)

block_slot,status,count
i64,str,u32
284194723,"""0""",111
284194840,"""0""",63
284194878,"""0""",55
284195190,"""0""",51
284194727,"""0""",51
284195182,"""0""",43
284194774,"""0""",40
284195049,"""0""",39
284195089,"""0""",32
284195094,"""0""",28


In [8]:
df.group_by('block_slot', 'status').agg(pl.len().alias('count')).sort(
    by='count', descending=True).filter(pl.col('status') == "1").head(10)

block_slot,status,count
i64,str,u32
284194723,"""1""",31
284194771,"""1""",23
284195190,"""1""",20
284195089,"""1""",19
284195137,"""1""",19
284195106,"""1""",18
284194877,"""1""",18
284194798,"""1""",17
284194681,"""1""",17
284195006,"""1""",16


### What do failed messages look like?

In [15]:
high_fail_block = df.filter(pl.col('block_slot') == 284194723).group_by(
    'block_slot', 'status').agg(pl.len().alias('count')).sort(by='count', descending=True)
high_fail_block

block_slot,status,count
i64,str,u32
284194723,"""0""",111
284194723,"""1""",31


In [19]:
fail_df = df.filter(pl.col('block_slot') == 284194723)

In [20]:
fail_df.filter(pl.col('status') == "0").group_by('log_messages').agg(
    pl.len().alias('count')).sort(by='count', descending=True)

log_messages,count
list[str],u32
"[""Program ComputeBudget111111111111111111111111111111 invoke [1]"", ""Program ComputeBudget111111111111111111111111111111 success"", … ""Program ZETAxsqBRek56DhiGXrn75yj2NHU3aYUnxvHXpkf3aD failed: custom program error: 0x1819""]",111


In [21]:
for i in range(len(fail_df.filter(pl.col('status') == "0").group_by('log_messages').agg(pl.len().alias('count')).sort(by='count', descending=True).to_dicts())):
    print(fail_df.filter(pl.col('status') == "0").group_by('log_messages').agg(
        pl.len().alias('count')).sort(by='count', descending=True).to_dicts()[i])

{'log_messages': ['Program ComputeBudget111111111111111111111111111111 invoke [1]', 'Program ComputeBudget111111111111111111111111111111 success', 'Program ZETAxsqBRek56DhiGXrn75yj2NHU3aYUnxvHXpkf3aD invoke [1]', 'Program log: Instruction: UpdatePricingV3', 'Program log: AnchorError thrown in programs/zeta/src/lib.rs:1206. Error Code: InvalidOracleUpdate. Error Number: 6169. Error Message: Invalid oracle update.', 'Program ZETAxsqBRek56DhiGXrn75yj2NHU3aYUnxvHXpkf3aD consumed 8687 of 199850 compute units', 'Program ZETAxsqBRek56DhiGXrn75yj2NHU3aYUnxvHXpkf3aD failed: custom program error: 0x1819'], 'count': 111}


In [32]:
# Filter the DataFrame where any log_message contains 'vaa'.
filtered_df = fail_df.with_columns(
    default_match=pl.col("log_messages").list.eval(
        pl.element().str.contains("Error", literal=True, strict=True))
)

In [33]:
explode_filter = filtered_df.select('index', 'status', 'log_messages', 'default_match').explode(
    'log_messages', 'default_match').filter(pl.col('default_match') == True)

In [23]:
# we just get a generic invalid oracle update value.
explode_filter.group_by('log_messages', 'default_match').agg(
    pl.len().alias('count'))

log_messages,default_match,count
str,bool,u32
