In [1]:
# Create the Spark Session
from pyspark.sql import SparkSession

spark = SparkSession \
    .builder \
    .appName("btc_streamer") \
    .config("spark.streaming.stopGracefullyOnShutdown", True) \
    .config('spark.jars.packages', 'org.apache.spark:spark-sql-kafka-0-10_2.12:3.3.0') \
    .config("spark.sql.shuffle.partitions", 1) \
    .master("local[*]") \
    .getOrCreate()

:: loading settings :: url = jar:file:/Users/michieldekoninck/pyspark/jars/ivy-2.5.1.jar!/org/apache/ivy/core/settings/ivysettings.xml


Ivy Default Cache set to: /Users/michieldekoninck/.ivy2/cache
The jars for the packages stored in: /Users/michieldekoninck/.ivy2/jars
org.apache.spark#spark-sql-kafka-0-10_2.12 added as a dependency
:: resolving dependencies :: org.apache.spark#spark-submit-parent-f2af2ac9-86ad-47ac-830c-16573cc25835;1.0
	confs: [default]
	found org.apache.spark#spark-sql-kafka-0-10_2.12;3.3.0 in central
	found org.apache.spark#spark-token-provider-kafka-0-10_2.12;3.3.0 in central
	found org.apache.kafka#kafka-clients;2.8.1 in central
	found org.lz4#lz4-java;1.8.0 in central
	found org.xerial.snappy#snappy-java;1.1.8.4 in central
	found org.slf4j#slf4j-api;1.7.32 in central
	found org.apache.hadoop#hadoop-client-runtime;3.3.2 in central
	found org.spark-project.spark#unused;1.0.0 in central
	found org.apache.hadoop#hadoop-client-api;3.3.2 in central
	found commons-logging#commons-logging;1.1.3 in central
	found com.google.code.findbugs#jsr305;3.0.0 in central
	found org.apache.commons#commons-pool2;2.1

In [2]:
spark

In [4]:
streaming_df = spark.readStream\
    .format("kafka") \
    .option("kafka.bootstrap.servers", "localhost:9092") \
    .option("subscribe", "bitcoin") \
    .option("startingOffsets", "earliest") \
    .load()

In [6]:
from pyspark.sql.types import StringType, StructField, StructType, ArrayType, DoubleType, FloatType, IntegerType, LongType
from pyspark.sql.functions import from_json
from pyspark.sql.functions import explode, col


schema = StructType([
    StructField("price", DoubleType(), True),
    StructField("volume_24h", DoubleType(), True),
    StructField("volume_24h_change_24h", DoubleType(), True),
    StructField("market_cap", LongType(), True),
    StructField("market_cap_change_24h", DoubleType(), True),
    StructField("percent_change_15m", DoubleType(), True),
    StructField("percent_change_30m", DoubleType(), True),
    StructField("percent_change_1h", DoubleType(), True),
    StructField("percent_change_6h", DoubleType(), True),
    StructField("percent_change_12h", DoubleType(), True),
    StructField("percent_change_24h", DoubleType(), True),
    StructField("percent_change_7d", DoubleType(), True),
    StructField("percent_change_30d", DoubleType(), True),
    StructField("percent_change_1y", DoubleType(), True),
    StructField("ath_price", DoubleType(), True),
    StructField("ath_date", StringType(), True),
    StructField("percent_from_price_ath", DoubleType(), True),
    StructField("symbol", StringType(), True),
    StructField("beta_value", DoubleType(), True)
])

# Cast the value from binary to string, since Kafka sends messages as bytes
kafka_df =streaming_df.selectExpr("CAST(value AS STRING) as json_string")

# Parse the JSON string in the 'value' column using the defined schema
json_df = kafka_df.withColumn("json_data", from_json(col("json_string"), schema))

json_df

DataFrame[json_string: string, json_data: struct<price:double,volume_24h:double,volume_24h_change_24h:double,market_cap:bigint,market_cap_change_24h:double,percent_change_15m:double,percent_change_30m:double,percent_change_1h:double,percent_change_6h:double,percent_change_12h:double,percent_change_24h:double,percent_change_7d:double,percent_change_30d:double,percent_change_1y:double,ath_price:double,ath_date:string,percent_from_price_ath:double,symbol:string,beta_value:double>]

In [None]:
# Parse the JSON string in the 'value' column using the defined schema
json_df = kafka_df.withColumn("json_data", from_json(col("json_string"), schema))

# Extract specific fields from the JSON
extracted_df = json_df.select(
    col("json_data.price"),
    col("json_data.volume_24h"),
    col("json_data.market_cap"),
    col("json_data.symbol"),
    col("json_data.percent_change_24h")
)

# Write the output to console (for debugging) or further processing
query = extracted_df.writeStream \
    .outputMode("append") \
    .format("console") \
    .start()

# Await termination of the streaming query
query.awaitTermination()

In [76]:
streaming_df.printSchema()

root
 |-- key: binary (nullable = true)
 |-- value: binary (nullable = true)
 |-- topic: string (nullable = true)
 |-- partition: integer (nullable = true)
 |-- offset: long (nullable = true)
 |-- timestamp: timestamp (nullable = true)
 |-- timestampType: integer (nullable = true)



In [23]:
import os, requests

url = f"https://api.coinpaprika.com/v1/tickers/btc-bitcoin"
    
headers = {
    'Accept-Encoding': 'gzip',
    'Authorization': f'Bearer {os.getenv("COINCAP_API_KEY")}',
}
    
response = requests.get(url, headers=headers)
data = response.json()

In [24]:
data

{'id': 'btc-bitcoin',
 'name': 'Bitcoin',
 'symbol': 'BTC',
 'rank': 1,
 'total_supply': 19758244,
 'max_supply': 21000000,
 'beta_value': 0.979908,
 'first_data_at': '2010-07-17T00:00:00Z',
 'last_updated': '2024-09-24T08:36:35Z',
 'quotes': {'USD': {'price': 63669.604706256774,
   'volume_24h': 27957015512.798916,
   'volume_24h_change_24h': -4.82,
   'market_cap': 1258000158196,
   'market_cap_change_24h': 0.07,
   'percent_change_15m': -0.07,
   'percent_change_30m': 0.08,
   'percent_change_1h': 0.38,
   'percent_change_6h': 0.94,
   'percent_change_12h': 0.4,
   'percent_change_24h': 0.07,
   'percent_change_7d': 8.3,
   'percent_change_30d': -0.23,
   'percent_change_1y': 141.8,
   'ath_price': 73686.92856165291,
   'ath_date': '2024-03-14T07:07:09Z',
   'percent_from_price_ath': -13.59}}}

24/09/24 10:42:09 ERROR Inbox: Ignoring error
org.apache.spark.SparkException: Exception thrown in awaitResult: 
	at org.apache.spark.util.SparkThreadUtils$.awaitResult(SparkThreadUtils.scala:56)
	at org.apache.spark.util.ThreadUtils$.awaitResult(ThreadUtils.scala:310)
	at org.apache.spark.rpc.RpcTimeout.awaitResult(RpcTimeout.scala:75)
	at org.apache.spark.rpc.RpcEnv.setupEndpointRefByURI(RpcEnv.scala:102)
	at org.apache.spark.rpc.RpcEnv.setupEndpointRef(RpcEnv.scala:110)
	at org.apache.spark.util.RpcUtils$.makeDriverRef(RpcUtils.scala:36)
	at org.apache.spark.storage.BlockManagerMasterEndpoint.driverEndpoint$lzycompute(BlockManagerMasterEndpoint.scala:124)
	at org.apache.spark.storage.BlockManagerMasterEndpoint.org$apache$spark$storage$BlockManagerMasterEndpoint$$driverEndpoint(BlockManagerMasterEndpoint.scala:123)
	at org.apache.spark.storage.BlockManagerMasterEndpoint.isExecutorAlive$lzycompute$1(BlockManagerMasterEndpoint.scala:688)
	at org.apache.spark.storage.BlockManagerMasterE

In [22]:
diction = data['quotes']['USD'] 
diction['symbol'] = data['symbol']
diction['beta_value'] = data['beta_value']
diction['timestamp'] = data['last_updated']
del diction['ath_price']
del diction['ath_date']
del diction['percent_from_price_ath']
diction

{'price': 63723.96699995236,
 'volume_24h': 33554610549.493538,
 'volume_24h_change_24h': 114.41,
 'market_cap': 1259052978344,
 'market_cap_change_24h': 1.42,
 'percent_change_15m': 0.19,
 'percent_change_30m': 0.37,
 'percent_change_1h': 0.78,
 'percent_change_6h': 0.22,
 'percent_change_12h': -1,
 'percent_change_24h': 1.41,
 'percent_change_7d': 10.2,
 'percent_change_30d': -1.26,
 'percent_change_1y': 142.44,
 'symbol': 'BTC',
 'beta_value': 0.980327,
 'timestamp': '2024-09-23T15:55:36Z'}

24/09/24 07:02:52 WARN HeartbeatReceiver: Removing executor driver with no recent heartbeats: 2352345 ms exceeds timeout 120000 ms
24/09/24 07:02:52 WARN SparkContext: Killing executors is not supported by current scheduler.
24/09/24 07:51:12 ERROR Inbox: Ignoring error
org.apache.spark.SparkException: Exception thrown in awaitResult: 
	at org.apache.spark.util.SparkThreadUtils$.awaitResult(SparkThreadUtils.scala:56)
	at org.apache.spark.util.ThreadUtils$.awaitResult(ThreadUtils.scala:310)
	at org.apache.spark.rpc.RpcTimeout.awaitResult(RpcTimeout.scala:75)
	at org.apache.spark.rpc.RpcEnv.setupEndpointRefByURI(RpcEnv.scala:102)
	at org.apache.spark.rpc.RpcEnv.setupEndpointRef(RpcEnv.scala:110)
	at org.apache.spark.util.RpcUtils$.makeDriverRef(RpcUtils.scala:36)
	at org.apache.spark.storage.BlockManagerMasterEndpoint.driverEndpoint$lzycompute(BlockManagerMasterEndpoint.scala:124)
	at org.apache.spark.storage.BlockManagerMasterEndpoint.org$apache$spark$storage$BlockManagerMasterEndpoint$

In [91]:
from pyspark.sql.types import StringType, StructField, StructType, ArrayType, DoubleType, FloatType, IntegerType, LongType
json_schema = StructType([StructField('data', ArrayType(StructType([ \
StructField('price', FloatType(), nullable=False), \
StructField('volume_24h', DoubleType(), nullable=False), \
StructField('volume_24h_change_24h', FloatType(), nullable=False), \
StructField('market_cap', IntegerType(), nullable=False),\
StructField('market_cap_change_24h', FloatType(), nullable=False), \
StructField('percent_change_15m', FloatType(), nullable=False), \
StructField('percent_change_30m', FloatType(), nullable=False), \
StructField('percent_change_1h', FloatType(), nullable=False), \
StructField('percent_change_6h', FloatType(), nullable=False), \
StructField('percent_change_12h', IntegerType(), nullable=False),\
StructField('percent_change_24h', FloatType(), nullable=False), \
StructField('percent_change_7d', FloatType(), nullable=False), \
StructField('percent_change_30d', FloatType(), nullable=False), \
StructField('percent_change_1y', FloatType(), nullable=False), \
StructField('BTC', StringType(), nullable=False), \
StructField('beta_value', FloatType(), nullable=False)])),nullable=False)])

In [151]:
# Parse value from binay to string
json_df = streaming_df.selectExpr("CAST(key AS STRING)", "CAST(value AS STRING)")

json_df.printSchema()


root
 |-- key: string (nullable = true)
 |-- value: string (nullable = true)



In [140]:
from pyspark.sql.functions import from_json

json_expanded_df = json_df.withColumn("value", from_json("value", json_schema)).select("value.*") 

json_expanded_df.printSchema()

root
 |-- data: array (nullable = true)
 |    |-- element: struct (containsNull = true)
 |    |    |-- price: float (nullable = true)
 |    |    |-- volume_24h: double (nullable = true)
 |    |    |-- volume_24h_change_24h: float (nullable = true)
 |    |    |-- market_cap: integer (nullable = true)
 |    |    |-- market_cap_change_24h: float (nullable = true)
 |    |    |-- percent_change_15m: float (nullable = true)
 |    |    |-- percent_change_30m: float (nullable = true)
 |    |    |-- percent_change_1h: float (nullable = true)
 |    |    |-- percent_change_6h: float (nullable = true)
 |    |    |-- percent_change_12h: integer (nullable = true)
 |    |    |-- percent_change_24h: float (nullable = true)
 |    |    |-- percent_change_7d: float (nullable = true)
 |    |    |-- percent_change_30d: float (nullable = true)
 |    |    |-- percent_change_1y: float (nullable = true)
 |    |    |-- BTC: string (nullable = true)
 |    |    |-- beta_value: float (nullable = true)



AnalysisException: [UNRESOLVED_COLUMN.WITH_SUGGESTION] A column or function parameter with name `price` cannot be resolved. Did you mean one of the following? [`data`].;
'Project ['price]
+- Project [value#561.data AS data#564]
   +- Project [key#539, from_json(StructField(data,ArrayType(StructType(StructField(price,FloatType,false),StructField(volume_24h,DoubleType,false),StructField(volume_24h_change_24h,FloatType,false),StructField(market_cap,IntegerType,false),StructField(market_cap_change_24h,FloatType,false),StructField(percent_change_15m,FloatType,false),StructField(percent_change_30m,FloatType,false),StructField(percent_change_1h,FloatType,false),StructField(percent_change_6h,FloatType,false),StructField(percent_change_12h,IntegerType,false),StructField(percent_change_24h,FloatType,false),StructField(percent_change_7d,FloatType,false),StructField(percent_change_30d,FloatType,false),StructField(percent_change_1y,FloatType,false),StructField(BTC,StringType,false),StructField(beta_value,FloatType,false)),true),false), value#540, Some(Europe/Brussels)) AS value#561]
      +- Project [cast(key#311 as string) AS key#539, cast(value#312 as string) AS value#540]
         +- StreamingRelationV2 org.apache.spark.sql.kafka010.KafkaSourceProvider@5dc0e718, kafka, org.apache.spark.sql.kafka010.KafkaSourceProvider$KafkaTable@530702d0, [startingOffsets=earliest, kafka.bootstrap.servers=localhost:9092, subscribe=bitcoin], [key#311, value#312, topic#313, partition#314, offset#315L, timestamp#316, timestampType#317], StreamingRelation DataSource(org.apache.spark.sql.SparkSession@329c251,kafka,List(),None,List(),None,Map(kafka.bootstrap.servers -> localhost:9092, subscribe -> bitcoin, startingOffsets -> earliest),None), kafka, [key#304, value#305, topic#306, partition#307, offset#308L, timestamp#309, timestampType#310]


In [142]:

# Apply Schema to JSON value column and expand the value
from pyspark.sql.functions import from_json

json_expanded_df = json_df.withColumn("value", from_json("value", json_schema)).select("value.*") 

json_expanded_df.printSchema()

root
 |-- data: array (nullable = true)
 |    |-- element: struct (containsNull = true)
 |    |    |-- price: float (nullable = true)
 |    |    |-- volume_24h: double (nullable = true)
 |    |    |-- volume_24h_change_24h: float (nullable = true)
 |    |    |-- market_cap: integer (nullable = true)
 |    |    |-- market_cap_change_24h: float (nullable = true)
 |    |    |-- percent_change_15m: float (nullable = true)
 |    |    |-- percent_change_30m: float (nullable = true)
 |    |    |-- percent_change_1h: float (nullable = true)
 |    |    |-- percent_change_6h: float (nullable = true)
 |    |    |-- percent_change_12h: integer (nullable = true)
 |    |    |-- percent_change_24h: float (nullable = true)
 |    |    |-- percent_change_7d: float (nullable = true)
 |    |    |-- percent_change_30d: float (nullable = true)
 |    |    |-- percent_change_1y: float (nullable = true)
 |    |    |-- BTC: string (nullable = true)
 |    |    |-- beta_value: float (nullable = true)



In [112]:
query = json_expanded_df \
    .writeStream \
    .format("console") \
    .option("checkpointLocation", "checkpoint_dir") \
    .start()

query.awaitTermination()

24/09/23 16:22:06 WARN ResolveWriteToStream: spark.sql.adaptive.enabled is not supported in streaming DataFrames/Datasets and will be disabled.
24/09/23 16:22:06 WARN StreamingQueryManager: Stopping existing streaming query [id=2c25dccb-fd05-4488-8951-8cf7ef297d58, runId=1656591d-74e3-4452-9b92-7afcf724396e], as a new run is being started.
24/09/23 16:22:06 WARN OffsetSeqMetadata: Updating the value of conf 'spark.sql.shuffle.partitions' in current session from '1' to '4'.
24/09/23 16:22:06 WARN AdminClientConfig: The configuration 'key.deserializer' was supplied but isn't a known config.
24/09/23 16:22:06 WARN AdminClientConfig: The configuration 'value.deserializer' was supplied but isn't a known config.
24/09/23 16:22:06 WARN AdminClientConfig: The configuration 'enable.auto.commit' was supplied but isn't a known config.
24/09/23 16:22:06 WARN AdminClientConfig: The configuration 'max.poll.records' was supplied but isn't a known config.
24/09/23 16:22:06 WARN AdminClientConfig: The 

-------------------------------------------
Batch: 16
-------------------------------------------
+----+
|data|
+----+
|NULL|
+----+



ERROR:root:KeyboardInterrupt while sending command.
Traceback (most recent call last):
  File "/Users/michieldekoninck/.pyenv/versions/3.10.6/envs/kafka_streamer/lib/python3.10/site-packages/py4j/java_gateway.py", line 1038, in send_command
    response = connection.send_command(command)
  File "/Users/michieldekoninck/.pyenv/versions/3.10.6/envs/kafka_streamer/lib/python3.10/site-packages/py4j/clientserver.py", line 511, in send_command
    answer = smart_decode(self.stream.readline()[:-1])
  File "/Users/michieldekoninck/.pyenv/versions/3.10.6/lib/python3.10/socket.py", line 705, in readinto
    return self._sock.recv_into(b)
KeyboardInterrupt


KeyboardInterrupt: 

In [66]:
from pyspark.sql.functions import explode, col


exploded_df = json_expanded_df.select('data').withColumn("data", explode("data"))
exploded_df.printSchema()

root
 |-- data: struct (nullable = true)
 |    |-- price: float (nullable = true)
 |    |-- volume_24h: double (nullable = true)
 |    |-- volume_24h_change_24h: float (nullable = true)
 |    |-- market_cap: integer (nullable = true)
 |    |-- market_cap_change_24h: float (nullable = true)
 |    |-- percent_change_15m: float (nullable = true)
 |    |-- percent_change_30m: float (nullable = true)
 |    |-- percent_change_1h: float (nullable = true)
 |    |-- percent_change_6h: float (nullable = true)
 |    |-- percent_change_12h: integer (nullable = true)
 |    |-- percent_change_24h: float (nullable = true)
 |    |-- percent_change_7d: float (nullable = true)
 |    |-- percent_change_30d: float (nullable = true)
 |    |-- percent_change_1y: float (nullable = true)
 |    |-- BTC: string (nullable = true)
 |    |-- beta_value: float (nullable = true)



In [67]:
flattened_df = exploded_df \
    .selectExpr("data.price as price", "data.volume_24h as volume_24h", 
                "data.volume_24h_change_24h as volume_24h_change_24h", "data.market_cap as market_cap",
                "data.market_cap_change_24h as market_cap_change_24h", "data.percent_change_15m as percent_change_15m",
                "data.percent_change_30m as percent_change_30m", "data.percent_change_1h as percent_change_1h",
                "data.percent_change_6h as percent_change_6h", "data.percent_change_12h as percent_change_12h",
                "data.percent_change_24h as percent_change_24h", "data.percent_change_7d as percent_change_7d",
                "data.percent_change_30d as percent_change_30d", "data.percent_change_1y as percent_change_1y",
                "data.BTC as BTC", "data.beta_value as beta_value") 
    
flattened_df.printSchema()

root
 |-- price: float (nullable = true)
 |-- volume_24h: double (nullable = true)
 |-- volume_24h_change_24h: float (nullable = true)
 |-- market_cap: integer (nullable = true)
 |-- market_cap_change_24h: float (nullable = true)
 |-- percent_change_15m: float (nullable = true)
 |-- percent_change_30m: float (nullable = true)
 |-- percent_change_1h: float (nullable = true)
 |-- percent_change_6h: float (nullable = true)
 |-- percent_change_12h: integer (nullable = true)
 |-- percent_change_24h: float (nullable = true)
 |-- percent_change_7d: float (nullable = true)
 |-- percent_change_30d: float (nullable = true)
 |-- percent_change_1y: float (nullable = true)
 |-- BTC: string (nullable = true)
 |-- beta_value: float (nullable = true)



-------------------------------------------
Batch: 10
-------------------------------------------
+----+
|data|
+----+
|NULL|
+----+



In [59]:
# df = spark\
#       .readStream \
#       .format("kafka") \
#       .option("kafka.bootstrap.servers", "localhost:9092") \
#       .option("subscribe", "bitcoin") \
#       .option("startingOffsets", "earliest") \
#       .load()
      
query = flattened_df \
    .writeStream \
    .format("console") \
    .option("checkpointLocation", "checkpoint_dir") \
    .outputMode("complete") \
    .start()

query.awaitTermination()

24/09/23 15:51:15 WARN ResolveWriteToStream: spark.sql.adaptive.enabled is not supported in streaming DataFrames/Datasets and will be disabled.


AnalysisException: Complete output mode not supported when there are no streaming aggregations on streaming DataFrames/Datasets;
Project [data#691.price AS price#693, data#691.volume_24h AS volume_24h#694, data#691.volume_24h_change_24h AS volume_24h_change_24h#695, data#691.market_cap AS market_cap#696, data#691.market_cap_change_24h AS market_cap_change_24h#697, data#691.percent_change_15m AS percent_change_15m#698, data#691.percent_change_30m AS percent_change_30m#699, data#691.percent_change_1h AS percent_change_1h#700, data#691.percent_change_6h AS percent_change_6h#701, data#691.percent_change_12h AS percent_change_12h#702, data#691.percent_change_24h AS percent_change_24h#703, data#691.percent_change_7d AS percent_change_7d#704, data#691.percent_change_30d AS percent_change_30d#705, data#691.percent_change_1y AS percent_change_1y#706, data#691.BTC AS BTC#707, data#691.beta_value AS beta_value#708]
+- Project [data#691]
   +- Generate explode(data#663), false, [data#691]
      +- Project [data#663]
         +- Project [value#661.data AS data#663]
            +- Project [from_json(StructField(data,ArrayType(StructType(StructField(price,FloatType,true),StructField(volume_24h,DoubleType,true),StructField(volume_24h_change_24h,FloatType,true),StructField(market_cap,IntegerType,true),StructField(market_cap_change_24h,FloatType,true),StructField(percent_change_15m,FloatType,true),StructField(percent_change_30m,FloatType,true),StructField(percent_change_1h,FloatType,true),StructField(percent_change_6h,FloatType,true),StructField(percent_change_12h,IntegerType,true),StructField(percent_change_24h,FloatType,true),StructField(percent_change_7d,FloatType,true),StructField(percent_change_30d,FloatType,true),StructField(percent_change_1y,FloatType,true),StructField(BTC,StringType,true),StructField(beta_value,FloatType,true)),true),true), value#613, Some(Europe/Brussels)) AS value#661]
               +- Project [cast(value#544 as string) AS value#613]
                  +- StreamingRelationV2 org.apache.spark.sql.kafka010.KafkaSourceProvider@3f78339c, kafka, org.apache.spark.sql.kafka010.KafkaSourceProvider$KafkaTable@31bfda03, [startingOffsets=earliest, kafka.bootstrap.servers=localhost:9092, subscribe=bitcoin], [key#543, value#544, topic#545, partition#546, offset#547L, timestamp#548, timestampType#549], StreamingRelation DataSource(org.apache.spark.sql.SparkSession@16eed49e,kafka,List(),None,List(),None,Map(kafka.bootstrap.servers -> localhost:9092, subscribe -> bitcoin, startingOffsets -> earliest),None), kafka, [key#536, value#537, topic#538, partition#539, offset#540L, timestamp#541, timestampType#542]


-------------------------------------------
Batch: 9
-------------------------------------------
+-----+----------+---------------------+----------+---------------------+------------------+------------------+-----------------+-----------------+------------------+------------------+-----------------+------------------+-----------------+---+----------+
|price|volume_24h|volume_24h_change_24h|market_cap|market_cap_change_24h|percent_change_15m|percent_change_30m|percent_change_1h|percent_change_6h|percent_change_12h|percent_change_24h|percent_change_7d|percent_change_30d|percent_change_1y|BTC|beta_value|
+-----+----------+---------------------+----------+---------------------+------------------+------------------+-----------------+-----------------+------------------+------------------+-----------------+------------------+-----------------+---+----------+
+-----+----------+---------------------+----------+---------------------+------------------+------------------+-----------------+------

In [37]:
df = spark\
      .readStream \
      .format("kafka") \
      .option("kafka.bootstrap.servers", "localhost:9092") \
      .option("subscribe", "bitcoin") \
      .option("startingOffsets", "earliest") \
      .load()
      
query = flattened_df \
    .writeStream \
    .format("console") \
    .option("checkpointLocation", "path/to/HDFS/dir") \
    .start()

query.awaitTermination()

24/09/23 15:34:40 WARN ResolveWriteToStream: spark.sql.adaptive.enabled is not supported in streaming DataFrames/Datasets and will be disabled.
24/09/23 15:34:40 WARN StreamingQueryManager: Stopping existing streaming query [id=fea4c1ab-b277-430a-a538-0f5cc54a7dca, runId=d6df1f01-e184-4963-b051-c190422d2429], as a new run is being started.
24/09/23 15:34:40 WARN AdminClientConfig: The configuration 'key.deserializer' was supplied but isn't a known config.
24/09/23 15:34:40 WARN AdminClientConfig: The configuration 'value.deserializer' was supplied but isn't a known config.
24/09/23 15:34:40 WARN AdminClientConfig: The configuration 'enable.auto.commit' was supplied but isn't a known config.
24/09/23 15:34:40 WARN AdminClientConfig: The configuration 'max.poll.records' was supplied but isn't a known config.
24/09/23 15:34:40 WARN AdminClientConfig: The configuration 'auto.offset.reset' was supplied but isn't a known config.


-------------------------------------------
Batch: 1
-------------------------------------------
+-----+----------+---------------------+----------+---------------------+------------------+------------------+-----------------+-----------------+------------------+------------------+-----------------+------------------+-----------------+---+----------+
|price|volume_24h|volume_24h_change_24h|market_cap|market_cap_change_24h|percent_change_15m|percent_change_30m|percent_change_1h|percent_change_6h|percent_change_12h|percent_change_24h|percent_change_7d|percent_change_30d|percent_change_1y|BTC|beta_value|
+-----+----------+---------------------+----------+---------------------+------------------+------------------+-----------------+-----------------+------------------+------------------+-----------------+------------------+-----------------+---+----------+
+-----+----------+---------------------+----------+---------------------+------------------+------------------+-----------------+------

ERROR:root:KeyboardInterrupt while sending command.
Traceback (most recent call last):
  File "/Users/michieldekoninck/.pyenv/versions/3.10.6/envs/kafka_streamer/lib/python3.10/site-packages/py4j/java_gateway.py", line 1038, in send_command
    response = connection.send_command(command)
  File "/Users/michieldekoninck/.pyenv/versions/3.10.6/envs/kafka_streamer/lib/python3.10/site-packages/py4j/clientserver.py", line 511, in send_command
    answer = smart_decode(self.stream.readline()[:-1])
  File "/Users/michieldekoninck/.pyenv/versions/3.10.6/lib/python3.10/socket.py", line 705, in readinto
    return self._sock.recv_into(b)
KeyboardInterrupt


KeyboardInterrupt: 

In [1]:
import datetime

ls = [[0.0, 0.4920446276664734, 16831.85,
        datetime.datetime(2023, 1, 6, 1, 0)],
       [0.0, 0.46967583894729614, 16828.06,
        datetime.datetime(2023, 1, 6, 1, 5)],
       [1.0, 0.5353881120681763, 16834.23,
        datetime.datetime(2023, 1, 6, 1, 10)],
       [0.0, 0.4920446276664734, 16830.32,
        datetime.datetime(2023, 1, 6, 1, 15)],
       [0.0, 0.45143428444862366, 16833.99,
        datetime.datetime(2023, 1, 6, 1, 20)],
       [1.0, 0.6198055148124695, 16836.33,
        datetime.datetime(2023, 1, 6, 1, 25)],
       [1.0, 0.6198055148124695, 16841.52,
        datetime.datetime(2023, 1, 6, 1, 30)],
       [1.0, 0.7495512962341309, 16852.44,
        datetime.datetime(2023, 1, 6, 1, 35)],
       [1.0, 0.7137175798416138, 16853.92,
        datetime.datetime(2023, 1, 6, 1, 40)],
       [1.0, 0.7495512962341309, 16851.03,
        datetime.datetime(2023, 1, 6, 1, 45)],
       [1.0, 0.803597629070282, 16868.16,
        datetime.datetime(2023, 1, 6, 1, 50)],
       [1.0, 0.7495512962341309, 16860.37,
        datetime.datetime(2023, 1, 6, 1, 55)],
       [1.0, 0.7137175798416138, 16859.78,
        datetime.datetime(2023, 1, 6, 2, 0)],
       [1.0, 0.5580217242240906, 16856.49,
        datetime.datetime(2023, 1, 6, 2, 5)],
       [1.0, 0.5622053742408752, 16854.33,
        datetime.datetime(2023, 1, 6, 2, 10)],
       [1.0, 0.6544626951217651, 16862.45,
        datetime.datetime(2023, 1, 6, 2, 15)],
       [0.0, 0.3762359917163849, 16857.6,
        datetime.datetime(2023, 1, 6, 2, 20)],
       [0.0, 0.40706783533096313, 16853.4,
        datetime.datetime(2023, 1, 6, 2, 25)],
       [0.0, 0.3835005462169647, 16853.36,
        datetime.datetime(2023, 1, 6, 2, 30)],
       [0.0, 0.377326637506485, 16849.8,
        datetime.datetime(2023, 1, 6, 2, 35)],
       [0.0, 0.41008448600769043, 16850.31,
        datetime.datetime(2023, 1, 6, 2, 40)],
       [0.0, 0.34698569774627686, 16852.75,
        datetime.datetime(2023, 1, 6, 2, 45)],
       [0.0, 0.32150697708129883, 16845.65,
        datetime.datetime(2023, 1, 6, 2, 50)],
       [0.0, 0.2563808262348175, 16833.06,
        datetime.datetime(2023, 1, 6, 2, 55)],
       [0.0, 0.2178836166858673, 16827.8,
        datetime.datetime(2023, 1, 6, 3, 0)],
       [0.0, 0.19080787897109985, 16808.78,
        datetime.datetime(2023, 1, 6, 3, 5)],
       [0.0, 0.2178836166858673, 16823.51,
        datetime.datetime(2023, 1, 6, 3, 10)],
       [0.0, 0.2038448303937912, 16821.32,
        datetime.datetime(2023, 1, 6, 3, 15)],
       [0.0, 0.31175002455711365, 16831.37,
        datetime.datetime(2023, 1, 6, 3, 20)],
       [0.0, 0.43129268288612366, 16830.39,
        datetime.datetime(2023, 1, 6, 3, 25)],
       [0.0, 0.4398581087589264, 16825.78,
        datetime.datetime(2023, 1, 6, 3, 30)],
       [1.0, 0.6708728075027466, 16823.66,
        datetime.datetime(2023, 1, 6, 3, 35)],
       [0.0, 0.45802244544029236, 16824.26,
        datetime.datetime(2023, 1, 6, 3, 40)],
       [1.0, 0.6054764986038208, 16830.93,
        datetime.datetime(2023, 1, 6, 3, 45)],
       [0.0, 0.46967583894729614, 16832.81,
        datetime.datetime(2023, 1, 6, 3, 50)],
       [1.0, 0.5353881120681763, 16833.71,
        datetime.datetime(2023, 1, 6, 3, 55)],
       [1.0, 0.6377415657043457, 16837.06,
        datetime.datetime(2023, 1, 6, 4, 0)],
       [1.0, 0.6377415657043457, 16833.52,
        datetime.datetime(2023, 1, 6, 4, 5)],
       [1.0, 0.5191695094108582, 16827.61,
        datetime.datetime(2023, 1, 6, 4, 10)],
       [0.0, 0.4579290449619293, 16828.81,
        datetime.datetime(2023, 1, 6, 4, 15)],
       [1.0, 0.5130391120910645, 16833.54,
        datetime.datetime(2023, 1, 6, 4, 20)],
       [1.0, 0.591326892375946, 16838.31,
        datetime.datetime(2023, 1, 6, 4, 25)],
       [1.0, 0.5457282066345215, 16838.76,
        datetime.datetime(2023, 1, 6, 4, 30)],
       [1.0, 0.5457282066345215, 16836.61,
        datetime.datetime(2023, 1, 6, 4, 35)],
       [1.0, 0.6057042479515076, 16834.04,
        datetime.datetime(2023, 1, 6, 4, 40)],
       [1.0, 0.6377415657043457, 16839.37,
        datetime.datetime(2023, 1, 6, 4, 45)],
       [1.0, 0.591326892375946, 16838.43,
        datetime.datetime(2023, 1, 6, 4, 50)],
       [1.0, 0.5130391120910645, 16839.1,
        datetime.datetime(2023, 1, 6, 4, 55)],
       [0.0, 0.3999825417995453, 16833.56,
        datetime.datetime(2023, 1, 6, 5, 0)],
       [1.0, 0.5130391120910645, 16836.29,
        datetime.datetime(2023, 1, 6, 5, 5)],
       [0.0, 0.45143428444862366, 16832.16,
        datetime.datetime(2023, 1, 6, 5, 10)],
       [0.0, 0.35073554515838623, 16831.46,
        datetime.datetime(2023, 1, 6, 5, 15)],
       [0.0, 0.2178836166858673, 16813.98,
        datetime.datetime(2023, 1, 6, 5, 20)],
       [0.0, 0.26647377014160156, 16820.91,
        datetime.datetime(2023, 1, 6, 5, 25)],
       [0.0, 0.24132995307445526, 16810.94,
        datetime.datetime(2023, 1, 6, 5, 30)],
       [0.0, 0.26647377014160156, 16817.34,
        datetime.datetime(2023, 1, 6, 5, 35)],
       [0.0, 0.31175002455711365, 16818.77,
        datetime.datetime(2023, 1, 6, 5, 40)],
       [0.0, 0.31175002455711365, 16816.99,
        datetime.datetime(2023, 1, 6, 5, 45)],
       [1.0, 0.5848487019538879, 16820.57,
        datetime.datetime(2023, 1, 6, 5, 50)],
       [0.0, 0.4920446276664734, 16823.65,
        datetime.datetime(2023, 1, 6, 5, 55)],
       [1.0, 0.6377415657043457, 16822.99,
        datetime.datetime(2023, 1, 6, 6, 0)],
       [0.0, 0.4920446276664734, 16820.77,
        datetime.datetime(2023, 1, 6, 6, 5)],
       [0.0, 0.4920446276664734, 16820.39,
        datetime.datetime(2023, 1, 6, 6, 10)],
       [0.0, 0.45143428444862366, 16816.3,
        datetime.datetime(2023, 1, 6, 6, 15)],
       [0.0, 0.3999825417995453, 16815.59,
        datetime.datetime(2023, 1, 6, 6, 20)],
       [0.0, 0.35073554515838623, 16816.7,
        datetime.datetime(2023, 1, 6, 6, 25)],
       [0.0, 0.31175002455711365, 16810.0,
        datetime.datetime(2023, 1, 6, 6, 30)],
       [0.0, 0.35073554515838623, 16813.52,
        datetime.datetime(2023, 1, 6, 6, 35)],
       [0.0, 0.377326637506485, 16814.18,
        datetime.datetime(2023, 1, 6, 6, 40)],
       [0.0, 0.32150697708129883, 16804.15,
        datetime.datetime(2023, 1, 6, 6, 45)],
       [0.0, 0.32150697708129883, 16805.01,
        datetime.datetime(2023, 1, 6, 6, 50)],
       [0.0, 0.32150697708129883, 16805.95,
        datetime.datetime(2023, 1, 6, 6, 55)],
       [0.0, 0.46967583894729614, 16810.18,
        datetime.datetime(2023, 1, 6, 7, 0)],
       [0.0, 0.46967583894729614, 16813.94,
        datetime.datetime(2023, 1, 6, 7, 5)],
       [0.0, 0.46967583894729614, 16814.17,
        datetime.datetime(2023, 1, 6, 7, 10)],
       [1.0, 0.570250391960144, 16810.09,
        datetime.datetime(2023, 1, 6, 7, 15)],
       [0.0, 0.4920446276664734, 16808.76,
        datetime.datetime(2023, 1, 6, 7, 20)],
       [0.0, 0.3999825417995453, 16801.19,
        datetime.datetime(2023, 1, 6, 7, 25)],
       [0.0, 0.32150697708129883, 16799.28,
        datetime.datetime(2023, 1, 6, 7, 30)],
       [0.0, 0.2178836166858673, 16789.48,
        datetime.datetime(2023, 1, 6, 7, 35)],
       [0.0, 0.26647377014160156, 16797.43,
        datetime.datetime(2023, 1, 6, 7, 40)],
       [0.0, 0.3375486433506012, 16801.07,
        datetime.datetime(2023, 1, 6, 7, 45)],
       [0.0, 0.3246016204357147, 16796.35,
        datetime.datetime(2023, 1, 6, 7, 50)],
       [0.0, 0.43129268288612366, 16798.68,
        datetime.datetime(2023, 1, 6, 7, 55)],
       [0.0, 0.3246016204357147, 16786.86,
        datetime.datetime(2023, 1, 6, 8, 0)],
       [0.0, 0.4398581087589264, 16787.38,
        datetime.datetime(2023, 1, 6, 8, 5)],
       [0.0, 0.36434343457221985, 16789.35,
        datetime.datetime(2023, 1, 6, 8, 10)],
       [0.0, 0.27821335196495056, 16784.18,
        datetime.datetime(2023, 1, 6, 8, 15)],
       [0.0, 0.36434343457221985, 16789.48,
        datetime.datetime(2023, 1, 6, 8, 20)],
       [0.0, 0.3999825417995453, 16795.73,
        datetime.datetime(2023, 1, 6, 8, 25)],
       [1.0, 0.6620784997940063, 16799.85,
        datetime.datetime(2023, 1, 6, 8, 30)],
       [1.0, 0.6377415657043457, 16798.44,
        datetime.datetime(2023, 1, 6, 8, 35)],
       [1.0, 0.6198055148124695, 16796.25,
        datetime.datetime(2023, 1, 6, 8, 40)],
       [1.0, 0.6377415657043457, 16793.91,
        datetime.datetime(2023, 1, 6, 8, 45)],
       [0.0, 0.46967583894729614, 16789.7,
        datetime.datetime(2023, 1, 6, 8, 50)],
       [0.0, 0.45143428444862366, 16793.67,
        datetime.datetime(2023, 1, 6, 8, 55)],
       [0.0, 0.39956384897232056, 16794.06,
        datetime.datetime(2023, 1, 6, 9, 0)],
       [0.0, 0.40628525614738464, 16794.32,
        datetime.datetime(2023, 1, 6, 9, 5)],
       [1.0, 0.5257048606872559, 16798.63,
        datetime.datetime(2023, 1, 6, 9, 10)],
       [1.0, 0.6198055148124695, 16800.79,
        datetime.datetime(2023, 1, 6, 9, 15)]]

In [2]:
import numpy as np

data = np.array(ls)

In [3]:
# Extract columns for plotting
x_data = data[:, 3]  # datetime for x-axis
y_data = data[:, 2]  # index 2 for y-axis
color_data = data[:, 0]  # index 0 for color

In [21]:
!pip install nbformat

Collecting nbformat
  Using cached nbformat-5.10.4-py3-none-any.whl (78 kB)
Collecting fastjsonschema>=2.15
  Using cached fastjsonschema-2.20.0-py3-none-any.whl (23 kB)
Collecting jsonschema>=2.6
  Using cached jsonschema-4.23.0-py3-none-any.whl (88 kB)
Collecting referencing>=0.28.4
  Using cached referencing-0.35.1-py3-none-any.whl (26 kB)
Collecting jsonschema-specifications>=2023.03.6
  Using cached jsonschema_specifications-2023.12.1-py3-none-any.whl (18 kB)
Collecting rpds-py>=0.7.1
  Using cached rpds_py-0.20.0-cp310-cp310-macosx_11_0_arm64.whl (311 kB)
Installing collected packages: fastjsonschema, rpds-py, referencing, jsonschema-specifications, jsonschema, nbformat
Successfully installed fastjsonschema-2.20.0 jsonschema-4.23.0 jsonschema-specifications-2023.12.1 nbformat-5.10.4 referencing-0.35.1 rpds-py-0.20.0

[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip available: [0m[31;49m22.2.1[0m[39;49m -> [0m[32;49m24.2[0m
[1m[[0m[34;49mnotice[0m

In [7]:
import plotly.graph_objs as go
# Create the scatter plot using Plotly

# Map colors: 0 -> red, 1 -> green
colors = ['red' if val == 0 else 'green' for val in color_data]

# Create the scatter plot using Plotly
fig = go.Figure()

# Add trace with specific colors
fig.add_trace(go.Scatter(
    x=x_data, 
    y=y_data, 
    mode='markers',
    marker=dict(color=colors, size=10),  # Specify colors and marker size
    text=color_data
))

# Update layout for better visualization
fig.update_layout(
    title='BTC Price predictions',
    yaxis_title='BTC Price',
    xaxis=dict(tickformat='%Y-%m-%d %H:%M', tickangle=45)
)

# Show the plot
fig.show()

In [8]:
!pip install bokeh


Collecting bokeh
  Downloading bokeh-3.5.2-py3-none-any.whl (6.8 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m6.8/6.8 MB[0m [31m4.4 MB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0m
Collecting xyzservices>=2021.09.1
  Downloading xyzservices-2024.9.0-py3-none-any.whl (85 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m85.1/85.1 kB[0m [31m3.4 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting Jinja2>=2.9
  Using cached jinja2-3.1.4-py3-none-any.whl (133 kB)
Collecting MarkupSafe>=2.0
  Using cached MarkupSafe-2.1.5-cp310-cp310-macosx_10_9_universal2.whl (18 kB)
Installing collected packages: xyzservices, MarkupSafe, Jinja2, bokeh
Successfully installed Jinja2-3.1.4 MarkupSafe-2.1.5 bokeh-3.5.2 xyzservices-2024.9.0

[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip available: [0m[31;49m22.2.1[0m[39;49m -> [0m[32;49m24.2[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgr

In [10]:
from bokeh.io import output_file, save
from bokeh.models import ColumnDataSource, DateRangeSlider, CustomJS
from bokeh.plotting import figure
from bokeh.layouts import column
import numpy as np
import datetime

# Convert datetime objects to numpy datetime64 for compatibility with Bokeh
data[:, 3] = np.array([np.datetime64(item) for item in data[:, 3]])

# Create a ColumnDataSource with the data
source = ColumnDataSource(data=dict(
    x=data[:, 3],
    y=data[:, 2],
    color=['red' if val == 0 else 'green' for val in data[:, 0]]
))

# Create a Bokeh figure
p = figure(x_axis_type="datetime", title="Scatter plot with color (0: Red, 1: Green)",
           x_axis_label='DateTime (index 3)', y_axis_label='Value (index 2)', width=800)

# Add circle glyphs to the plot
p.circle('x', 'y', size=10, color='color', source=source)

# Create a DateRangeSlider
date_range_slider = DateRangeSlider(title="Date Range: ", 
                                    start=min(data[:, 3]), 
                                    end=max(data[:, 3]), 
                                    value=(min(data[:, 3]), max(data[:, 3])), 
                                    step=1)

# JavaScript callback to update the plot based on the date range selected
callback = CustomJS(args=dict(source=source, slider=date_range_slider, data=data), code="""
    const start = new Date(slider.value[0]);
    const end = new Date(slider.value[1]);
    const filtered_data = {x: [], y: [], color: []};
    
    for (let i = 0; i < data.length; i++) {
        const date = new Date(data[i][3]);
        if (date >= start && date <= end) {
            filtered_data['x'].push(new Date(data[i][3]));
            filtered_data['y'].push(data[i][2]);
            filtered_data['color'].push(data[i][0] === 0 ? 'red' : 'green');
        }
    }
    
    source.data = filtered_data;
    source.change.emit();
""")

# Attach the callback to the DateRangeSlider
date_range_slider.js_on_change('value', callback)

# Layout for the plot and slider
layout = column(p, date_range_slider)

# Output the plot to an HTML file
output_file("interactive_bokeh_plot.html")

# Save the layout as an HTML file
save(layout)

print("The plot has been saved as 'interactive_bokeh_plot.html'.")

The plot has been saved as 'interactive_bokeh_plot.html'.




In [60]:
import pandas as pd

# Read the CSV file
plot_set = pd.read_csv('../preds.csv', index_col=0).sort_values('3')
plot_set.columns = ['label', 'prob', 'price', 'time']
plot_set['time'] = pd.to_datetime(plot_set['time'])
plot_set.set_index('time', inplace=True)

plot_set

Unnamed: 0_level_0,label,prob,price
time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2023-01-01 00:00:00,0.0,0.217884,16520.81
2023-01-01 00:05:00,0.0,0.203845,16506.14
2023-01-01 00:10:00,0.0,0.217337,16495.24
2023-01-01 00:15:00,0.0,0.217337,16502.67
2023-01-01 00:20:00,1.0,0.570250,16523.86
...,...,...,...
2023-08-01 15:00:00,1.0,0.773187,28878.57
2023-08-01 15:05:00,1.0,0.512515,28882.89
2023-08-01 15:10:00,0.0,0.336891,28872.48
2023-08-01 15:15:00,0.0,0.380326,28895.99


In [70]:
resampled = plot_set.resample('d').agg({'label': 'sum', 'prob':'count', 'price':'last'}).reset_index()
resampled['buy_percentage'] = resampled['label'] / resampled['prob']
resampled = np.array(resampled)

In [74]:
time = resampled[:, 0]  # datetime for x-axis
probabilities = resampled[:, -1]  # index 2 for y-axis
probabilities = np.where(probabilities < 0.5, 0, probabilities)
prices = resampled[:, -2]  # index 0 for color

In [75]:
probabilities

array([0, 0, 0, 0.5798611111111112, 0.9166666666666666, 0, 0,
       0.5659722222222222, 0.6180555555555556, 0.5729166666666666,
       0.5277777777777778, 0.8333333333333334, 0.6413043478260869,
       0.5694444444444444, 0.5451388888888888, 0.5381944444444444,
       0.5243055555555556, 0, 0, 0.5833333333333334, 0.625, 0,
       0.5451388888888888, 0, 0.5381944444444444, 0.5833333333333334, 0,
       0, 0.59375, 0, 0.5694444444444444, 0, 0, 0, 0, 0, 0,
       0.5520833333333334, 0, 0, 0, 0.5034722222222222,
       0.5034722222222222, 0, 0.5451388888888888, 0.5972222222222222,
       0.5833333333333334, 0.5, 0, 0.5173611111111112, 0,
       0.5138888888888888, 0.5, 0.75, 0, 0, 0.5104166666666666,
       0.5208333333333334, 0, 0, 1.0, 0, 0, 0.5034722222222222,
       0.5277777777777778, 0, 0, 0, 0, 0.5625, 0.5729166666666666,
       0.5868055555555556, 0.5347222222222222, 0, 0, 0.6268115942028986,
       0, 0.5729166666666666, 0, 0, 0.53125, 0.6666666666666666, 0, 0,
       0.536231884

In [76]:
import plotly.graph_objects as go

# Create figure
fig = go.Figure()

# Add probabilities line on the left y-axis
fig.add_trace(
    go.Bar(x=time, y=probabilities, name='Probabilities', yaxis='y1', marker=dict(color='green'))
)

# Add prices line on the right y-axis
fig.add_trace(
    go.Scatter(x=time, y=prices, mode='lines', name='Prices', yaxis='y2', marker=dict(color='red'))
)

# Update layout for two y-axes
fig.update_layout(
    title="Probabilities and Prices Over Time",
    xaxis_title="Time",
    yaxis_title="Probabilities",
    yaxis=dict(
        title="Probabilities",
        range=[0, 1],
        showgrid=False
    ),
    yaxis2=dict(
        title="Prices",
        overlaying='y',
        side='right',
       # range=[10000, 20000]
    ),
    barmode='group',
    legend=dict(x=0.01, y=0.99)
)

# Show the plot
fig.show()
