In [1]:
import clickhouse_connect
import pandas as pd
import polars as pl

pl.Config.set_fmt_str_lengths(200)
pl.Config.set_fmt_float("full")

polars.config.Config

### Direct connect to Cryptohouse
Connect to cryptohouse with client to do some initial querying and result aggregation.


### Limitations
- Can only do 1000 blocks, the direct access limits are still being figured out.


```
[10:08]KemarTiti 🔮: Some of these transactions need to land sequentially

when we use Jito we put them all in the same bundle, since Jito bundles enforce the ordering, so they land in the right order

but when i looked at your data, they had were all landing (even in the same slot) but in the wrong order

you need to make sure
init vaa -> write vaa -> post price update -> close vaa lands in this order

The naive way of accomplishing this is sending the first, then wait for confirmation, then send the second etc..

Another way is Jito bundle
```

In [2]:
# Create a connection to the ClickHouse server
client = clickhouse_connect.get_client(
    host='crypto-clickhouse.clickhouse.com',
    username='crypto',
    password='',
    secure=True
)

In [3]:
# Execute the query and return as a pandas dataframe
query: str = """
SELECT
  *
FROM
  solana.transactions ARRAY
  JOIN accounts AS account
WHERE
  arrayExists(
    x -> x.1 IN [ 
        'Fc8bpeCMifWYv97pQ3k5xDvd98nuVg6yAaZrwmy4RRp6', -- zeta dex address
        'pythWSnswVUd12oZpeFP8e9CVaEqJg25g1Vtc2biRsT' -- pyth price feed
    ],
    accounts
  )
 // The block_slot range 282,300,000 to 282,900,000 is about 3 days and is a known range where probems were being experienced
 AND block_slot > 282899000
 AND block_slot < 282900000
  // AND block_timestamp >= now() - INTERVAL 5 MINUTE
  AND account.2 = true -- Filter for signer accounts only
"""

In [4]:
results: pd.DataFrame = client.query_df(query)

In [5]:
df = pl.from_pandas(results)

In [6]:
df.group_by('status', 'log_messages').agg(
    pl.len().alias('count')).sort(by='count', descending=True)

status,log_messages,count
str,list[str],u32
"""0""","[""Program ComputeBudget111111111111111111111111111111 invoke [1]"", ""Program ComputeBudget111111111111111111111111111111 success"", … ""Program ZETAxsqBRek56DhiGXrn75yj2NHU3aYUnxvHXpkf3aD failed: custom program error: 0x1819""]",702
"""1""","[""Program 11111111111111111111111111111111 invoke [1]"", ""Program 11111111111111111111111111111111 success"", … ""Program ComputeBudget111111111111111111111111111111 success""]",410
"""0""","[""Program pythWSnswVUd12oZpeFP8e9CVaEqJg25g1Vtc2biRsT invoke [1]"", ""Program log: Instruction: UpdatePriceFeed"", … ""Program pythWSnswVUd12oZpeFP8e9CVaEqJg25g1Vtc2biRsT failed: custom program error: 0x1777""]",134
"""1""","[""Program HDwcJBJXjL9FpJ7UBsYBtaDjsBUhuLCUYoz3zr8SWWaQ invoke [1]"", ""Program log: Instruction: CloseEncodedVaa"", … ""Program ComputeBudget111111111111111111111111111111 success""]",82
"""0""","[""Program HDwcJBJXjL9FpJ7UBsYBtaDjsBUhuLCUYoz3zr8SWWaQ invoke [1]"", ""Program log: Instruction: WriteEncodedVaa"", … ""Program HDwcJBJXjL9FpJ7UBsYBtaDjsBUhuLCUYoz3zr8SWWaQ failed: custom program error: 0xbbb""]",82
…,…,…
"""1""","[""Program pythWSnswVUd12oZpeFP8e9CVaEqJg25g1Vtc2biRsT invoke [1]"", ""Program log: Instruction: UpdatePriceFeed"", … ""Program ComputeBudget111111111111111111111111111111 success""]",1
"""1""","[""Program HDwcJBJXjL9FpJ7UBsYBtaDjsBUhuLCUYoz3zr8SWWaQ invoke [1]"", ""Program log: Instruction: WriteEncodedVaa"", … ""Program ComputeBudget111111111111111111111111111111 success""]",1
"""1""","[""Program HDwcJBJXjL9FpJ7UBsYBtaDjsBUhuLCUYoz3zr8SWWaQ invoke [1]"", ""Program log: Instruction: WriteEncodedVaa"", … ""Program ComputeBudget111111111111111111111111111111 success""]",1
"""1""","[""Program pythWSnswVUd12oZpeFP8e9CVaEqJg25g1Vtc2biRsT invoke [1]"", ""Program log: Instruction: UpdatePriceFeed"", … ""Program ComputeBudget111111111111111111111111111111 success""]",1


In [7]:
# blocks with highest error rates
df.group_by('block_slot', 'status').agg(pl.len().alias('count')).sort(
    by='count', descending=True).filter(pl.col('status') == "0").head(10)

block_slot,status,count
i64,str,u32
282899126,"""0""",127
282899186,"""0""",93
282899763,"""0""",71
282899519,"""0""",62
282899762,"""0""",44
282899963,"""0""",41
282899527,"""0""",37
282899038,"""0""",32
282899967,"""0""",31
282899127,"""0""",28


In [8]:
df.group_by('block_slot', 'status').agg(pl.len().alias('count')).sort(
    by='count', descending=True).filter(pl.col('status') == "1").head(10)

block_slot,status,count
i64,str,u32
282899246,"""1""",36
282899082,"""1""",27
282899583,"""1""",27
282899186,"""1""",24
282899123,"""1""",24
282899967,"""1""",21
282899886,"""1""",20
282899219,"""1""",20
282899417,"""1""",20
282899435,"""1""",20


### What does example log messages look like?

In [9]:
# only has successes
high_sucess_block = df.filter(pl.col('block_slot') == 282899246).group_by(
    'block_slot', 'status').agg(pl.len().alias('count')).sort(by='count', descending=True)

In [10]:
high_sucess_block

block_slot,status,count
i64,str,u32
282899246,"""1""",36


In [11]:
success_df = df.filter(pl.col('block_slot') == 282899246)

In [12]:
success_df.group_by('log_messages').agg(
    pl.len().alias('count')).sort(by='count', descending=True)

log_messages,count
list[str],u32
"[""Program ComputeBudget111111111111111111111111111111 invoke [1]"", ""Program ComputeBudget111111111111111111111111111111 success"", … ""Program ZETAxsqBRek56DhiGXrn75yj2NHU3aYUnxvHXpkf3aD success""]",1
"[""Program ComputeBudget111111111111111111111111111111 invoke [1]"", ""Program ComputeBudget111111111111111111111111111111 success"", … ""Program ZETAxsqBRek56DhiGXrn75yj2NHU3aYUnxvHXpkf3aD success""]",1
"[""Program ComputeBudget111111111111111111111111111111 invoke [1]"", ""Program ComputeBudget111111111111111111111111111111 success"", … ""Program ZETAxsqBRek56DhiGXrn75yj2NHU3aYUnxvHXpkf3aD success""]",1
"[""Program ComputeBudget111111111111111111111111111111 invoke [1]"", ""Program ComputeBudget111111111111111111111111111111 success"", … ""Program ZETAxsqBRek56DhiGXrn75yj2NHU3aYUnxvHXpkf3aD success""]",1
"[""Program ComputeBudget111111111111111111111111111111 invoke [1]"", ""Program ComputeBudget111111111111111111111111111111 success"", … ""Program ZETAxsqBRek56DhiGXrn75yj2NHU3aYUnxvHXpkf3aD success""]",1
…,…
"[""Program pythWSnswVUd12oZpeFP8e9CVaEqJg25g1Vtc2biRsT invoke [1]"", ""Program log: Instruction: UpdatePriceFeed"", … ""Program ComputeBudget111111111111111111111111111111 success""]",1
"[""Program ComputeBudget111111111111111111111111111111 invoke [1]"", ""Program ComputeBudget111111111111111111111111111111 success"", … ""Program ZETAxsqBRek56DhiGXrn75yj2NHU3aYUnxvHXpkf3aD success""]",1
"[""Program ComputeBudget111111111111111111111111111111 invoke [1]"", ""Program ComputeBudget111111111111111111111111111111 success"", … ""Program ZETAxsqBRek56DhiGXrn75yj2NHU3aYUnxvHXpkf3aD success""]",1
"[""Program pythWSnswVUd12oZpeFP8e9CVaEqJg25g1Vtc2biRsT invoke [1]"", ""Program log: Instruction: UpdatePriceFeed"", … ""Program ComputeBudget111111111111111111111111111111 success""]",1


In [13]:
success_df.select('index', 'log_messages').to_dicts()

[{'index': 209,
  'log_messages': ['Program ComputeBudget111111111111111111111111111111 invoke [1]',
   'Program ComputeBudget111111111111111111111111111111 success',
   'Program ZETAxsqBRek56DhiGXrn75yj2NHU3aYUnxvHXpkf3aD invoke [1]',
   'Program log: Instruction: UpdatePricingV2',
   'Program ZETAxsqBRek56DhiGXrn75yj2NHU3aYUnxvHXpkf3aD consumed 20398 of 199850 compute units',
   'Program ZETAxsqBRek56DhiGXrn75yj2NHU3aYUnxvHXpkf3aD success']},
 {'index': 810,
  'log_messages': ['Program pythWSnswVUd12oZpeFP8e9CVaEqJg25g1Vtc2biRsT invoke [1]',
   'Program log: Instruction: UpdatePriceFeed',
   'Program rec5EKMGg6MxZYaMdyBfgwp4d5rB9T1VQH5pJv5LtFJ invoke [2]',
   'Program log: Instruction: PostUpdate',
   'Program 11111111111111111111111111111111 invoke [3]',
   'Program 11111111111111111111111111111111 success',
   'Program rec5EKMGg6MxZYaMdyBfgwp4d5rB9T1VQH5pJv5LtFJ consumed 26779 of 68950 compute units',
   'Program rec5EKMGg6MxZYaMdyBfgwp4d5rB9T1VQH5pJv5LtFJ success',
   'Program pyt

In [14]:
# Filter the DataFrame where any log_message contains 'vaa'.
filtered_df = success_df.with_columns(
    default_match=pl.col("log_messages").list.eval(
        pl.element().str.contains("Vaa", literal=True, strict=True))
)

In [15]:
explode_filter = filtered_df.select('index', 'status', 'log_messages', 'default_match').explode(
    'log_messages', 'default_match').filter(pl.col('default_match') == True)

In [16]:
explode_filter.group_by('index', 'log_messages',
                        'default_match').agg(pl.len().alias('count'))

index,log_messages,default_match,count
i64,str,bool,u32
954,"""Program log: Instruction: CloseEncodedVaa""",true,1
810,"""Program log: Instruction: CloseEncodedVaa""",true,1
951,"""Program log: Instruction: WriteEncodedVaa""",true,1
926,"""Program log: Instruction: VerifyEncodedVaaV1""",true,1
407,"""Program log: Instruction: CloseEncodedVaa""",true,1
…,…,…,…
264,"""Program log: Instruction: WriteEncodedVaa""",true,1
807,"""Program log: Instruction: VerifyEncodedVaaV1""",true,1
951,"""Program log: Instruction: VerifyEncodedVaaV1""",true,1
404,"""Program log: Instruction: WriteEncodedVaa""",true,1


### What do failed messages look like?

In [28]:
high_fail_block = df.filter(pl.col('block_slot') == 282899126).group_by(
    'block_slot', 'status').agg(pl.len().alias('count')).sort(by='count', descending=True)
high_fail_block

block_slot,status,count
i64,str,u32
282899126,"""0""",127
282899126,"""1""",15


In [29]:
fail_df = df.filter(pl.col('block_slot') == 282899126)

In [30]:
fail_df.filter(pl.col('status') == "0").group_by('log_messages').agg(
    pl.len().alias('count')).sort(by='count', descending=True)

log_messages,count
list[str],u32
"[""Program ZETAxsqBRek56DhiGXrn75yj2NHU3aYUnxvHXpkf3aD invoke [1]"", ""Program log: Instruction: UpdatePricingV3"", … ""Program ZETAxsqBRek56DhiGXrn75yj2NHU3aYUnxvHXpkf3aD failed: custom program error: 0x1819""]",70
"[""Program ComputeBudget111111111111111111111111111111 invoke [1]"", ""Program ComputeBudget111111111111111111111111111111 success"", … ""Program ZETAxsqBRek56DhiGXrn75yj2NHU3aYUnxvHXpkf3aD failed: custom program error: 0x1819""]",57


In [31]:
for i in range(len(fail_df.filter(pl.col('status') == "0").group_by('log_messages').agg(pl.len().alias('count')).sort(by='count', descending=True).to_dicts())):
    print(fail_df.filter(pl.col('status') == "0").group_by('log_messages').agg(
        pl.len().alias('count')).sort(by='count', descending=True).to_dicts()[i])

{'log_messages': ['Program ZETAxsqBRek56DhiGXrn75yj2NHU3aYUnxvHXpkf3aD invoke [1]', 'Program log: Instruction: UpdatePricingV3', 'Program log: AnchorError thrown in programs/zeta/src/lib.rs:1206. Error Code: InvalidOracleUpdate. Error Number: 6169. Error Message: Invalid oracle update.', 'Program ZETAxsqBRek56DhiGXrn75yj2NHU3aYUnxvHXpkf3aD consumed 8687 of 200000 compute units', 'Program ZETAxsqBRek56DhiGXrn75yj2NHU3aYUnxvHXpkf3aD failed: custom program error: 0x1819'], 'count': 70}
{'log_messages': ['Program ComputeBudget111111111111111111111111111111 invoke [1]', 'Program ComputeBudget111111111111111111111111111111 success', 'Program ZETAxsqBRek56DhiGXrn75yj2NHU3aYUnxvHXpkf3aD invoke [1]', 'Program log: Instruction: UpdatePricingV3', 'Program log: AnchorError thrown in programs/zeta/src/lib.rs:1206. Error Code: InvalidOracleUpdate. Error Number: 6169. Error Message: Invalid oracle update.', 'Program ZETAxsqBRek56DhiGXrn75yj2NHU3aYUnxvHXpkf3aD consumed 8687 of 199850 compute units', '

In [32]:
# Filter the DataFrame where any log_message contains 'vaa'.
filtered_df = fail_df.with_columns(
    default_match=pl.col("log_messages").list.eval(
        pl.element().str.contains("Error", literal=True, strict=True))
)

In [33]:
explode_filter = filtered_df.select('index', 'status', 'log_messages', 'default_match').explode(
    'log_messages', 'default_match').filter(pl.col('default_match') == True)

In [23]:
# we just get a generic invalid oracle update value.
explode_filter.group_by('log_messages', 'default_match').agg(
    pl.len().alias('count'))

log_messages,default_match,count
str,bool,u32


### Successful txs

In [24]:
# successful log messages
fail_df.filter(pl.col('status') == "1").group_by('log_messages').agg(
    pl.len().alias('count')).sort(by='count', descending=True)

log_messages,count
list[str],u32
"[""Program 11111111111111111111111111111111 invoke [1]"", ""Program 11111111111111111111111111111111 success"", … ""Program ComputeBudget111111111111111111111111111111 success""]",2
"[""Program 11111111111111111111111111111111 invoke [1]"", ""Program 11111111111111111111111111111111 success"", … ""Program HDwcJBJXjL9FpJ7UBsYBtaDjsBUhuLCUYoz3zr8SWWaQ success""]",2


In [25]:
# successful log messages
fail_df.filter(pl.col('status') == "1").group_by('index', 'log_messages').agg(
    pl.len().alias('count')).sort(by='index', descending=False)

index,log_messages,count
i64,list[str],u32
1778,"[""Program 11111111111111111111111111111111 invoke [1]"", ""Program 11111111111111111111111111111111 success"", … ""Program ComputeBudget111111111111111111111111111111 success""]",2
1779,"[""Program 11111111111111111111111111111111 invoke [1]"", ""Program 11111111111111111111111111111111 success"", … ""Program HDwcJBJXjL9FpJ7UBsYBtaDjsBUhuLCUYoz3zr8SWWaQ success""]",2


In [26]:
for i in range(len(fail_df.filter(pl.col('status') == "1").group_by('index', 'log_messages').agg(
    pl.len().alias('count')).sort(by='index', descending=False).to_dicts())):
    print(fail_df.filter(pl.col('status') == "1").group_by('index', 'log_messages').agg(
    pl.len().alias('count')).sort(by='index', descending=False).to_dicts()[i])

{'index': 1778, 'log_messages': ['Program 11111111111111111111111111111111 invoke [1]', 'Program 11111111111111111111111111111111 success', 'Program HDwcJBJXjL9FpJ7UBsYBtaDjsBUhuLCUYoz3zr8SWWaQ invoke [1]', 'Program log: Instruction: InitEncodedVaa', 'Program HDwcJBJXjL9FpJ7UBsYBtaDjsBUhuLCUYoz3zr8SWWaQ consumed 2445 of 599850 compute units', 'Program HDwcJBJXjL9FpJ7UBsYBtaDjsBUhuLCUYoz3zr8SWWaQ success', 'Program HDwcJBJXjL9FpJ7UBsYBtaDjsBUhuLCUYoz3zr8SWWaQ invoke [1]', 'Program log: Instruction: WriteEncodedVaa', 'Program HDwcJBJXjL9FpJ7UBsYBtaDjsBUhuLCUYoz3zr8SWWaQ consumed 2692 of 597405 compute units', 'Program HDwcJBJXjL9FpJ7UBsYBtaDjsBUhuLCUYoz3zr8SWWaQ success', 'Program ComputeBudget111111111111111111111111111111 invoke [1]', 'Program ComputeBudget111111111111111111111111111111 success'], 'count': 2}
{'index': 1779, 'log_messages': ['Program 11111111111111111111111111111111 invoke [1]', 'Program 11111111111111111111111111111111 success', 'Program HDwcJBJXjL9FpJ7UBsYBtaDjsBUhuL

In [27]:
STOP

NameError: name 'STOP' is not defined

In [None]:
for i in range(len(fail_df.filter(pl.col('status') == "1").group_by('index', 'log_messages').agg(
    pl.len().alias('count')).sort(by='index', descending=False).to_dicts())):
    print(fail_df.filter(pl.col('status') == "1").group_by('index', 'log_messages').agg(
    pl.len().alias('count')).sort(by='index', descending=False).to_dicts()[i])

{'index': 265, 'log_messages': ['Program ZETAxsqBRek56DhiGXrn75yj2NHU3aYUnxvHXpkf3aD invoke [1]', 'Program log: Instruction: UpdatePricingV3', 'Program ZETAxsqBRek56DhiGXrn75yj2NHU3aYUnxvHXpkf3aD consumed 18846 of 200000 compute units', 'Program ZETAxsqBRek56DhiGXrn75yj2NHU3aYUnxvHXpkf3aD success'], 'count': 1}
{'index': 267, 'log_messages': ['Program ZETAxsqBRek56DhiGXrn75yj2NHU3aYUnxvHXpkf3aD invoke [1]', 'Program log: Instruction: UpdatePricingV3', 'Program ZETAxsqBRek56DhiGXrn75yj2NHU3aYUnxvHXpkf3aD consumed 18990 of 200000 compute units', 'Program ZETAxsqBRek56DhiGXrn75yj2NHU3aYUnxvHXpkf3aD success'], 'count': 1}
{'index': 312, 'log_messages': ['Program ZETAxsqBRek56DhiGXrn75yj2NHU3aYUnxvHXpkf3aD invoke [1]', 'Program log: Instruction: UpdatePricingV3', 'Program ZETAxsqBRek56DhiGXrn75yj2NHU3aYUnxvHXpkf3aD consumed 24504 of 200000 compute units', 'Program ZETAxsqBRek56DhiGXrn75yj2NHU3aYUnxvHXpkf3aD success'], 'count': 1}
{'index': 314, 'log_messages': ['Program ZETAxsqBRek56DhiGXrn