In [1]:
%%configure -f 
{
    "conf": {
        "spark.sql.catalog.demo": "org.apache.iceberg.spark.SparkCatalog",
        "spark.sql.catalog.demo.catalog-impl": "org.apache.iceberg.aws.glue.GlueCatalog",
        "spark.sql.catalog.demo.warehouse": "s3://elasticmapreduce-uraeusdev/ICEBERG/",
        "spark.sql.extensions":"org.apache.iceberg.spark.extensions.IcebergSparkSessionExtensions"
    }
}

In [2]:
# Print current Iceberg configurations
configs = [
    "spark.sql.catalog.demo",
    "spark.sql.catalog.demo.catalog-impl",
    "spark.sql.catalog.demo.warehouse",
    "spark.sql.extensions"
]

print("Current Iceberg Configurations:")
for config in configs:
    print(f"{config}: {spark.conf.get(config)}")

# Try alternate syntax for showing tables
print("\nTrying alternate table listing syntax:")
tables_df = spark.sql("SHOW TABLES")
tables_df.show()

VBox()

Starting Spark application


ID,YARN Application ID,Kind,State,Spark UI,Driver log,User,Current session?
5,application_1732781624275_0034,pyspark,idle,Link,Link,,✔


FloatProgress(value=0.0, bar_style='info', description='Progress:', layout=Layout(height='25px', width='50%'),…

SparkSession available as 'spark'.


FloatProgress(value=0.0, bar_style='info', description='Progress:', layout=Layout(height='25px', width='50%'),…

Current Iceberg Configurations:
spark.sql.catalog.demo: org.apache.iceberg.spark.SparkCatalog
spark.sql.catalog.demo.catalog-impl: org.apache.iceberg.aws.glue.GlueCatalog
spark.sql.catalog.demo.warehouse: s3://elasticmapreduce-uraeusdev/ICEBERG/
spark.sql.extensions: org.apache.iceberg.spark.extensions.IcebergSparkSessionExtensions

Trying alternate table listing syntax:
+---------+---------+-----------+
|namespace|tableName|isTemporary|
+---------+---------+-----------+
+---------+---------+-----------+

In [5]:
def create_tables():
    """Create tables in Glue catalog using demo namespace"""
    try:
        # Create database if not exists
        print("\nCreating database...")
        spark.sql("CREATE DATABASE IF NOT EXISTS demo.uraeus_db")

        # Create event_logs table
        print("\nCreating event_logs table...")
        spark.sql("""
        CREATE TABLE IF NOT EXISTS demo.uraeus_db.event_logs (
            Log_ID string,
            Timestamp timestamp,
            Signal_Name string,
            Signal_Value string,
            Signal_Unit string,
            VIN string,
            Vehicle_Model string,
            Fleet_Name string,
            Campaign_ID string,
            Component string,
            Datatype string,
            Min double,
            Max double,
            Signal_Fully_Qualified_Name string,
            year int,
            month int, 
            day int,
            _inserted_timestamp timestamp
        )
        USING iceberg
        PARTITIONED BY (year, month, day)
        LOCATION 's3://elasticmapreduce-uraeusdev/ICEBERG/uraeus_db/event_logs'
        TBLPROPERTIES (
            'write.format.default' = 'parquet',
            'write.parquet.compression-codec' = 'zstd',
            'write.target-file-size-bytes' = '536870912',
            'format-version' = '2'
        )
        """)
        
        # Create security_events table
        print("\nCreating security_events table...")
        spark.sql("""
        CREATE TABLE IF NOT EXISTS demo.uraeus_db.security_events (
            Alert_ID string,
            ID string,
            Timestamp timestamp,
            VIN string,
            Severity string,
            SEV_Msg string,
            Origin string,
            NetworkType string,
            NetworkID string,
            Vehicle_Model string,
            Fleet_Name string,
            SEV_Name string,
            Rule_ID string,
            Description string,
            SEV_Status string,
            year int,
            month int,
            day int,
            _inserted_timestamp timestamp
        )
        USING iceberg
        PARTITIONED BY (year, month, day, Severity)
        LOCATION 's3://elasticmapreduce-uraeusdev/ICEBERG/uraeus_db/security_events'
        TBLPROPERTIES (
            'write.format.default' = 'parquet',
            'write.parquet.compression-codec' = 'zstd',
            'write.target-file-size-bytes' = '536870912',
            'format-version' = '2'
        )
        """)

        # Create event logs aggregations table
        spark.sql("""
        CREATE TABLE IF NOT EXISTS demo.uraeus_db.event_logs_aggs (
            start timestamp,
            end timestamp,
            VIN string,
            Signal_ID string,
            Signal_Name string,
            count long,
            Fleet_Name string,
            year int,
            month int,
            day int,
            _inserted_timestamp timestamp
        )
        USING iceberg
        PARTITIONED BY (year, month, day)
        LOCATION 's3://elasticmapreduce-uraeusdev/ICEBERG/uraeus_db/event_logs_aggs'
        TBLPROPERTIES (
            'write.format.default' = 'parquet',
            'write.parquet.compression-codec' = 'zstd',
            'format-version' = '2'
        )
        """)

        # Create security events aggregations table
        spark.sql("""
        CREATE TABLE IF NOT EXISTS demo.uraeus_db.security_events_aggs (
            start timestamp,
            end timestamp,
            ID string,
            SEV_Name string,
            VIN string,
            Severity string,
            Fleet string,
            count long,
            year int,
            month int,
            day int,
            _inserted_timestamp timestamp
        )
        USING iceberg
        PARTITIONED BY (year, month, day, Severity)
        LOCATION 's3://elasticmapreduce-uraeusdev/ICEBERG/uraeus_db/security_events_aggs'
        TBLPROPERTIES (
            'write.format.default' = 'parquet',
            'write.parquet.compression-codec' = 'zstd',
            'format-version' = '2'
        )
        """)

        # Verify creation
        print("\nVerifying tables:")
        spark.sql("SHOW TABLES IN demo.uraeus_db").show()
        
        print("\nVerifying table locations:")
        spark.sql("DESCRIBE TABLE EXTENDED demo.uraeus_db.event_logs").show(truncate=False)
        spark.sql("DESCRIBE TABLE EXTENDED demo.uraeus_db.security_events").show(truncate=False)

    except Exception as e:
        print(f"Error creating tables: {str(e)}")
        raise

# Run the creation script
try:
    create_tables()
    print("\nTables created successfully!")
except Exception as e:
    print(f"\nError during table creation: {str(e)}")
    raise

VBox()

FloatProgress(value=0.0, bar_style='info', description='Progress:', layout=Layout(height='25px', width='50%'),…


Creating database...

Creating event_logs table...

Creating security_events table...

Verifying tables:
+---------+--------------------+-----------+
|namespace|           tableName|isTemporary|
+---------+--------------------+-----------+
|uraeus_db|          event_logs|      false|
|uraeus_db|     event_logs_aggs|      false|
|uraeus_db|     security_events|      false|
|uraeus_db|security_events_aggs|      false|
+---------+--------------------+-----------+


Verifying table locations:
+---------------------------+---------+-------+
|col_name                   |data_type|comment|
+---------------------------+---------+-------+
|Log_ID                     |string   |null   |
|Timestamp                  |timestamp|null   |
|Signal_Name                |string   |null   |
|Signal_Value               |string   |null   |
|Signal_Unit                |string   |null   |
|VIN                        |string   |null   |
|Vehicle_Model              |string   |null   |
|Fleet_Name              

In [4]:
spark.sql("SELECT * FROM demo.uraeus_db.event_logs").show()
spark.sql("SELECT * FROM demo.uraeus_db.security_events").show()

VBox()

FloatProgress(value=0.0, bar_style='info', description='Progress:', layout=Layout(height='25px', width='50%'),…

+------+-------------------+------------------+------------+-----------+-----------------+-------------+-----------+-------------+---------------+------------------+-----+-----+---------------------------+----+-----+---+--------------------+
|Log_ID|          Timestamp|       Signal_Name|Signal_Value|Signal_Unit|              VIN|Vehicle_Model| Fleet_Name|  Campaign_ID|      Component|          Datatype|  Min|  Max|Signal_Fully_Qualified_Name|year|month|day| _inserted_timestamp|
+------+-------------------+------------------+------------+-----------+-----------------+-------------+-----------+-------------+---------------+------------------+-----+-----+---------------------------+----+-----+---+--------------------+
|  null|2024-11-28 13:19:55|          CPU_Load|          33|          %|2EFSWQ2O7JR521017|      DEFAULT|Fleet Bravo|Campaign_ID_3|            ECU|Unsigned Short Int|  0.0|100.0|       Vehicle.Cabin.Inf...|2024|   11| 28|2024-11-28 13:20:...|
|  null|2024-11-28 13:19:55|    