In [0]:
from pyspark.sql.functions import lit, current_date, udf
from pyspark.sql.types import StringType
import re


# Function to read CSV
def read_csv(path, header=True, inferSchema=True):
    return spark.read.csv(path, header=header, inferSchema=inferSchema)

# Function to write CSV
def write_csv(df, path, mode="overwrite"):
    df.coalesce(1).write.mode(mode).option("header", True).csv(path)

# Function to convert CamelCase to snake_case
def camel_to_snake(name):
    s1 = re.sub('(.)([A-Z][a-z]+)', r'\1_\2', name)
    return re.sub('([a-z0-9])([A-Z])', r'\1_\2', s1).lower()

# Register UDF
camel_to_snake_udf = udf(camel_to_snake, StringType())

# Function to rename all columns to snake_case
def rename_columns_to_snake_case(df):
    for col_name in df.columns:
        df = df.withColumnRenamed(col_name, camel_to_snake(col_name))
    return df

# Function to add load_date
def add_load_date(df):
    return df.withColumn("load_date", current_date())

print("✅ Utils Notebook Loaded")
