In [None]:
# Step 1: Import Required Libraries
from pyspark.sql import SparkSession
from pyspark.sql.functions import lit
from pyspark.sql.types import StringType

# Step 2: Initialize Spark Session
spark = SparkSession.builder \
    .appName("SchemaMismatchHandler") \
    .getOrCreate()

# Step 3: Read the Input CSV Files with Header Option
df1 = spark.read.option("header", True).csv("path/to/file1.csv")
df2 = spark.read.option("header", True).csv("path/to/file2.csv")

# Step 4: Get Union of All Columns from Both DataFrames
columns_df1 = set(df1.columns)
columns_df2 = set(df2.columns)
all_columns = list(columns_df1.union(columns_df2))

# Step 5: Add Missing Columns with Null Values
def add_missing_columns(df, all_cols):
    for col in all_cols:
        if col not in df.columns:
            df = df.withColumn(col, lit(None))
    return df.select(sorted(all_cols))  # Ensures consistent column order

df1_aligned = add_missing_columns(df1, all_columns)
df2_aligned = add_missing_columns(df2, all_columns)

# Optional: If necessary, cast conflicting columns to a common type
# Example: Convert a column to StringType in both DataFrames
# df1_aligned = df1_aligned.withColumn("some_column", df1_aligned["some_column"].cast(StringType()))
# df2_aligned = df2_aligned.withColumn("some_column", df2_aligned["some_column"].cast(StringType()))

# Step 6: Merge Both DataFrames
merged_df = df1_aligned.unionByName(df2_aligned)

# Step 7: Show or Save the Final Result
merged_df.show(truncate=False)
# merged_df.write.option("header", True).csv("path/to/output.csv")