In [0]:
# Databricks notebook source
# ============================================
# 00. Parameters
# ============================================

CATALOG = "skills_intelligence"
BRONZE_SCHEMA = "01_bronze"
TABLE_NAME = "livrea_soft_skill"

SOURCE_CSV = "/Volumes/skills_intelligence/00_livrea_data/soft_skill/livrea_soft_skill.csv"
FULL_TABLE_NAME = f"{CATALOG}.{BRONZE_SCHEMA}.{TABLE_NAME}"

print("SOURCE_CSV:", SOURCE_CSV)
print("TARGET_TABLE:", FULL_TABLE_NAME)

In [0]:
# Databricks notebook source
# ============================================
# 01. Set catalog & schema, create schema if needed
# ============================================

spark.sql(f"USE CATALOG {CATALOG}")
spark.sql(f"CREATE SCHEMA IF NOT EXISTS {BRONZE_SCHEMA}")
spark.sql(f"USE SCHEMA {BRONZE_SCHEMA}")

spark.sql("SELECT current_catalog(), current_schema()").show(truncate=False)

In [0]:
# Databricks notebook source
# ============================================
# 02. Helpers
# ============================================

from pyspark.sql import functions as F

def file_exists(path: str) -> bool:
    try:
        parent, fname = path.rsplit("/", 1)
        return any(x.name.rstrip("/") == fname for x in dbutils.fs.ls(parent))
    except Exception:
        return False

In [0]:
# Databricks notebook source
# ============================================
# 03. Read CSV (semicolon separated)
# ============================================

df_raw = (
    spark.read
        .format("csv")
        .option("header", "true")
        .option("sep", ";")            # ðŸ”´ IMPORTANT: semicolon separator
        .option("inferSchema", "true")
        .option("multiLine", "true")
        .option("quote", "\"")
        .option("escape", "\"")
        .load(SOURCE_CSV)
)

display(df_raw)
print("Rows:", df_raw.count())
print("Columns:", len(df_raw.columns))

In [0]:
# Databricks notebook source
# ============================================
# 04. Add Bronze metadata
# ============================================

df_bronze = (
    df_raw
        .withColumn("_ingest_ts", F.current_timestamp())
        .withColumn("_source_file", F.lit(SOURCE_CSV))
        .withColumn("_source_filename", F.element_at(F.split(F.lit(SOURCE_CSV), "/"), -1))
        .withColumn("_source_system", F.lit("livrea"))
)

display(df_bronze)


In [0]:
# Databricks notebook source
# ============================================
# 05. Write to Delta (Bronze)
# ============================================

(
    df_bronze.write
        .format("delta")
        .mode("overwrite")                 # master data â†’ overwrite is safe
        .option("overwriteSchema", "true")
        .saveAsTable(FULL_TABLE_NAME)
)

print("âœ… Successfully written to:", FULL_TABLE_NAME)

In [0]:
# Databricks notebook source
# ============================================
# 06. Validation
# ============================================

spark.sql(f"SELECT COUNT(*) AS cnt FROM {FULL_TABLE_NAME}").show()

spark.sql(f"""
SELECT *
FROM {FULL_TABLE_NAME}
ORDER BY _ingest_ts DESC
LIMIT 50
""").show(truncate=False)