In [21]:
from pyspark.sql.types import StructType, StructField, StringType, LongType, DecimalType, DoubleType
from pyspark.sql import SparkSession

In [3]:
spark = SparkSession \
    .builder \
    .appName("VolumeCalculation") \
    .getOrCreate()

23/06/06 09:12:33 WARN SparkSession: Using an existing Spark session; only runtime SQL configurations will take effect.


In [60]:
token_transfers_schema = StructType([ \
    StructField("token_address", StringType(), True), \
    StructField("from_address", StringType(), True), \
    StructField("to_address", StringType(), True), \
    StructField("value", DecimalType(38, 0), True), \
    StructField("transaction_hash", StringType(), True), \
    StructField("log_index", LongType(), True), \
    StructField("block_number", LongType(), True), \
  ])

In [56]:
transactions_schema = StructType([ \
    StructField("hash", StringType(), True), \
    StructField("nonce", LongType(), True), \
    StructField("block_hash", StringType(), True), \
    StructField("block_number", LongType(), True), \
    StructField("transaction_index", LongType(), True), \
    StructField("from_address", StringType(), True), \
    StructField("to_address", StringType(), True), \
    StructField("value", DecimalType(38, 0), True), \
    StructField("gas", LongType(), True), \
    StructField("gas_price", LongType(), True), \
    StructField("input", StringType(), True), \
    StructField("block_timestamp", LongType(), True), \
    StructField("max_fee_per_gas", LongType(), True), \
    StructField("max_priority_fee_per_gas", LongType(), True), \
    StructField("transaction_type", LongType(), True) \
  ])

In [32]:
tokens_schema = StructType([ \
    StructField("address", StringType(), True), \
    StructField("symbol", StringType(), True), \
    StructField("name", StringType(), True), \
    StructField("decimals", LongType(), True), \
    StructField("total_supply", LongType(), True), \
    StructField("block_number", LongType(), True), \
  ])

In [33]:
cmc_historical_schema = StructType([ \
    StructField("id", LongType(), True), \
    StructField("rank", LongType(), True), \
    StructField("name", StringType(), True), \
    StructField("symbol", StringType(), True), \
    StructField("open", DoubleType(), True), \
    StructField("high", DoubleType(), True), \
    StructField("low", DoubleType(), True), \
    StructField("close", DoubleType(), True), \
    StructField("volume", DoubleType(), True), \
    StructField("marketCap", DoubleType(), True), \
                                    StructField("as", LongType(), True), \
    StructField("timestamp", LongType(), True), \
  ])

In [34]:
basePath = "./"

In [61]:
token_transfers_df = spark.read.format("csv") \
    .option("header", True) \
    .schema(token_transfers_schema) \
    .load(basePath + "/token_transfers/*/*/*.csv")

In [36]:
transactions_df = spark.read.format("csv") \
    .option("header", True) \
    .schema(transactions_schema) \
    .load(basePath + "/transactions/*/*/*.csv")

In [58]:
tokens_df = spark.read.format("csv") \
    .option("header", True) \
    .schema(tokens_schema) \
    .load(basePath + "/tokens/tokens.csv")

In [38]:
cmc_historicals_df = spark.read.format("csv") \
    .option("header", True) \
    .schema(cmc_historical_schema) \
    .load(basePath + "/tokens/cmc.csv")

In [39]:
%%html
<style>
div.output_area pre {
    white-space: pre;
}
</style>

In [45]:
# from pyspark.sql.functions import expr


# tx_volume_df = token_transfers_df \
#     .join( \
#         transactions_df, \
#         transactions_df["hash"] == token_transfers_df["transaction_hash"], \
#         "inner" \
#     ) \
#     .join( \
#         tokens_df, \
#         tokens_df["address"] == token_transfers_df["token_address"], \
#         "inner" \
#     ) \
#     .join( \
#         cmc_historicals_df, \
#         (expr("'symbol' RLIKE 'symbol'")) & \
#           (transactions_df["block_timestamp"] * 1000 < cmc_historicals_df["timestamp"]) & \
#           (transactions_df["block_timestamp"] * 1000 > cmc_historicals_df["timestamp"] - 86400 * 1000), \
#         "inner" \
#     ) 

FloatProgress(value=0.0, bar_style='info', description='Progress:', layout=Layout(height='25px', width='50%'),…

In [46]:
# tx_volume_df.show(10)

FloatProgress(value=0.0, bar_style='info', description='Progress:', layout=Layout(height='25px', width='50%'),…

An error was encountered:
Invalid status code '404' from https://spark-lighter.octan.network/lighter/api/sessions/a726a2ca-becc-4131-89fe-c0dfdbfefde4/statements/c5639e28-3470-4b49-ac79-b8ba608bb78d with error payload: {"message":"Not Found","logref":null,"path":null,"_links":{"self":{"href":"/lighter/api/sessions/a726a2ca-becc-4131-89fe-c0dfdbfefde4/statements/c5639e28-3470-4b49-ac79-b8ba608bb78d","templated":false,"profile":null,"deprecation":null,"title":null,"hreflang":null,"type":null,"name":null}},"_embedded":{"errors":[{"message":"Page Not Found","logref":null,"path":null,"_links":{},"_embedded":{}}]}}


In [62]:
token_transfers_df.createOrReplaceTempView("token_transfers_df")
transactions_df.createOrReplaceTempView("transactions_df")
tokens_df.createOrReplaceTempView("tokens_df")
cmc_historicals_df.createOrReplaceTempView("cmc_historicals_df")

In [89]:
result_df = spark.sql("""
SELECT  tt.token_address, cmc.timestamp, sum((tt.value / POWER(10, c.decimals)) * cmc.open) as tx_volume
FROM token_transfers_df tt
JOIN transactions_df txn ON tt.transaction_hash = txn.hash
JOIN tokens_df c ON tt.token_address = c.address
JOIN cmc_historicals_df cmc ON c.symbol = cmc.symbol
WHERE txn.block_timestamp < cmc.timestamp AND txn.block_timestamp >  cmc.timestamp - 86400
GROUP BY tt.token_address, cmc.timestamp
""")

result_df.withColumn('tx_volume', result_df['tx_volume'].cast(DecimalType(38, 0))).show(10, False)



+------------------------------------------+----------+-----------+
|token_address                             |timestamp |tx_volume  |
+------------------------------------------+----------+-----------+
|0x2170ed0880ac9a755fd29b2688956bd959f933f8|1685318399|1054769    |
|0x8c4f591f1701f0d510c7c3c27a0d20e43c855ee0|1685318399|74104796376|
|0xbf5140a22578168fd562dccf235e5d43a02ce9b1|1685318399|2285       |
|0x4338665cbb7b2485a8855a139b75d5e34ab0db94|1685318399|14300      |
|0x128aedc7f41ffb82131215e1722d8366faad0cd4|1685318399|72084      |
|0x72cfefd170c58bf6cfb860c67cabd1f74600186a|1685318399|6327704    |
|0xba2ae424d960c26247dd6c32edc70b295c744c43|1685318399|101832     |
|0x14016e85a25aeb13065688cafb43044c2ef86784|1685318399|5707       |
|0x17eec92a6b4bbfa2f716d4a0da0f1965ba9ae84a|1685318399|982145     |
|0x1401ebb4ba084f0f4144e558a7e0af77dd6a3d73|1685318399|1751819    |
+------------------------------------------+----------+-----------+
only showing top 10 rows

