In [2]:
from pyspark.sql import SparkSession
from pyspark.sql.functions import col, rand, current_timestamp, concat, lit
import os


# Standard Spark Session setup
spark = SparkSession.builder.getOrCreate()

print("Generating random data...")
df = spark.range(0, 10000) \
    .withColumn("sensor_id", (rand() * 100).cast("int")) \
    .withColumn("reading_value", rand() * 50.0) \
    .withColumn("timestamp", current_timestamp()) \
    .withColumn("status", concat(lit("Status_"), (rand() * 5).cast("int")))


ch_url = "jdbc:ch://analytics-clickhouse:8123/default?user=spark_admin&password=spark_123"

ch_properties = {
    "driver": "com.clickhouse.jdbc.ClickHouseDriver",
    "createTableOptions": "ENGINE = MergeTree() ORDER BY (timestamp, sensor_id)"
}

print("Attempting write with spark_admin user...")
try:
    df.write.jdbc(
        url=ch_url, 
        table="random_sensor_data", 
        mode="overwrite", 
        properties=ch_properties
    )
    print("✅ Data loaded into ClickHouse.")
except Exception as e:
    print(f"❌ Error: {e}")

Generating random data...
Attempting write with spark_admin user...
✅ Data loaded into ClickHouse.
