In [None]:
try:
    import sys, random, uuid
    from pyspark.context import SparkContext
    from pyspark.sql.session import SparkSession
    from pyspark.sql.functions import col, to_timestamp, monotonically_increasing_id, to_date, when, lit
    from pyspark.sql.functions import *
    from pyspark.sql.types import *
    from datetime import datetime, date
    import boto3
    from functools import reduce
    from pyspark.sql import Row
    from faker import Faker
except Exception as e:
    print("Modules are missing : {} ".format(e))

# ==================== CONFIGURATION ====================
job_start_ts = datetime.now()
ts_format = '%Y-%m-%d %H:%M:%S'

# MinIO connection
minio_endpoint = 'http://minio:9000'
minio_access_key = 'admin'
minio_secret_key = 'password'
minio_secure = False

# Spark Master URL (inside docker network)
spark_master_url = "spark://spark-master:7077"

# ==================== SPARK SESSION ====================
spark = (
    SparkSession.builder
    .appName("Jupyter-Spark-MinIO")
    .master("spark://spark-master:7077")  # <--- CONNECT TO SPARK CLUSTER HERE
    .config("spark.serializer", "org.apache.spark.serializer.KryoSerializer")
    .config("spark.sql.extensions", "org.apache.spark.sql.hudi.HoodieSparkSessionExtension")
    .config("className", "org.apache.hudi")
    .config("spark.sql.hive.convertMetastoreParquet", "false")
    .getOrCreate()
)

print(f"✅ Connected to Spark Master at: {spark_master_url}")
print(f"Spark Version: {spark.version}")

# ==================== MINIO / S3 CONFIG ====================
hadoop_conf = spark._jsc.hadoopConfiguration()
hadoop_conf.set("fs.s3a.endpoint", minio_endpoint)
hadoop_conf.set("fs.s3a.access.key", minio_access_key)
hadoop_conf.set("fs.s3a.secret.key", minio_secret_key)
hadoop_conf.set("fs.s3a.path.style.access", "true")
hadoop_conf.set("fs.s3a.connection.ssl.enabled", str(minio_secure).lower())
hadoop_conf.set("fs.s3a.impl", "org.apache.hadoop.fs.s3a.S3AFileSystem")
hadoop_conf.set("fs.s3a.aws.credentials.provider", "org.apache.hadoop.fs.s3a.SimpleAWSCredentialsProvider")

print("✅ Spark and MinIO configuration completed successfully.")
