In [0]:
import sys
sys.path.insert(0, "../utils")
from logger import log_bronze_ingestion
import uuid
from pyspark.sql.types import StructType, StructField, StringType, IntegerType
from pyspark.sql.functions import col, current_timestamp, lit

In [0]:
run_id=str(uuid.uuid4())
dataset_name='license_mapping'
landing_path='s3://nyc-tlc-data-398563707364/landing/license_mapping/'
bronze_path='s3://nyc-tlc-data-398563707364/bronze/license_mapping/'
catalog_table='nyc_taxi.bronze.license_mapping'
log_table='nyc_taxi.logs.bronze_ingestion_logs'

In [0]:
try:
    csv_schema = StructType(
        [
            StructField("High Volume", StringType(), True),
            StructField("License Number", StringType(), True),
            StructField("Base Name", StringType(), True),
            StructField("App Company Affiliation", StringType(), True),
        ]
    )

    df = spark.read.csv(path=landing_path, header=True, schema=csv_schema, sep=",")
    df=df.select([col(c).alias(c.replace(" ", "_").lower()) for c in df.columns])\
        .withColumn("_ingest_ts", current_timestamp())\
        .withColumn("_source_file", col("_metadata.file_name"))\
        .withColumn ("run_id", lit(run_id))\
        .write.format("delta").mode("overwrite").save(bronze_path)

    spark.sql(f"CREATE TABLE IF NOT EXISTS {catalog_table} USING DELTA LOCATION '{bronze_path}'")
   
    
    
    log_bronze_ingestion(
        spark=spark,
        run_id=run_id,
        dataset_name=dataset_name,
        catalog_table=catalog_table,
        log_table=log_table,
        success=True
    )
except Exception as e:
    print(e)
    log_bronze_ingestion(
        spark=spark,
        run_id=run_id,
        dataset_name=dataset_name,
        catalog_table=catalog_table,
        success=False,
        log_table=log_table,
        error_msg=str(e)
    )