#Read Data

In [0]:
df = spark.read.format('parquet')\
            .option('header',True)\
                .option('inferSchema',True)\
                    .load('abfss://bronze@contosoprojectstorage.dfs.core.windows.net/Raw_data/')

In [0]:
%sql
select count(*) from parquet.`abfss://bronze@contosoprojectstorage.dfs.core.windows.net/Raw_data/`

# Fixing column names

In [0]:
def rename_columns_to_snake_case(df):
    """
    Convert column names from PascalCase or camelCase to snake_case in a PySpark DataFrame.

    Args:
        df (DataFrame): The input DataFrame with columns to be renamed.

    Returns:
        DataFrame: A new DataFrame with column names converted to snake_case.
    """
    # Get the list of column names
    column_names = df.columns

    # Dictionary to hold old and new column name mappings
    rename_map = {}

    for old_col_name in column_names:
        # Convert column name from PascalCase or camelCase to snake_case
        new_col_name = "".join([
            "_" + char.lower() if (
                char.isupper()              # Check if the current character is uppercase
                and idx > 0                 # Ensure it's not the first character
                and not old_col_name[idx - 1].isupper()  # Ensure the previous character is not uppercase
            ) else char.lower()  # Convert character to lowercase
            for idx, char in enumerate(old_col_name)
        ]).lstrip("_")  # Remove any leading underscore

        # Avoid renaming to an existing column name
        if new_col_name in rename_map.values():
            raise ValueError(f"Duplicate column name found after renaming: '{new_col_name}'")

        # Map the old column name to the new column name
        rename_map[old_col_name] = new_col_name

    # Rename columns using the mapping
    for old_col_name, new_col_name in rename_map.items():
        df = df.withColumnRenamed(old_col_name, new_col_name)

    return df

df = rename_columns_to_snake_case(df)

# Data Writing

In [0]:
df.write.format('parquet')\
    .mode('overwrite')\
      .option('path','abfss://silver@contosoprojectstorage.dfs.core.windows.net/contoso_sales')\
        .save()