In [0]:
from pyspark.sql import SparkSession
from pyspark.sql.functions import lit

spark = SparkSession.builder.appName("SampleUnion").getOrCreate()

In [0]:
commentary_data = [
    (2024, 1001, "B1", 1, "Beautiful cover drive!"),
    (2024, 1001, "B2", 1, "Quick single taken."),
    (2024, 1001, "B3", 1, "That hit the pads!"),
]
commentary_columns = ["year", "match_id", "ball_id", "over_no", "ball_commentary"]

commentary_df = spark.createDataFrame(commentary_data, commentary_columns)

In [0]:
over_data = [
    (2024, 1001, "B1", 1, 6, "Steady over, just 6 runs", "Player A", "Bowler X"),
    (2024, 1001, "B2", 2, 4, "Good over, controlled", "Player A", "Bowler Y"),
]
over_columns = ["year", "match_id", "ball_id", "over_no", "over_total_runs", "over_summary", "over_batsman1_name", "over_bowler_name"]

over_df = spark.createDataFrame(over_data, over_columns)

In [0]:
commentary_prepared = commentary_df \
    .withColumnRenamed("ball_commentary", "comment") \
    .withColumn("runs", lit(None).cast("string")) \
    .withColumn("wicket", lit(None).cast("string")) \
    .withColumn("match_date", lit(None).cast("string")) \
    .withColumn("match_time", lit(None).cast("string")) \
    .withColumn("source", lit("commentary")) \
    .select("match_id", "ball_id", "comment", "runs", "wicket", "match_date", "match_time", "source")

over_prepared = over_df \
    .withColumn("comment", lit(None).cast("string")) \
    .withColumn("runs", lit(None).cast("string")) \
    .withColumn("wicket", lit(None).cast("string")) \
    .withColumn("match_date", lit(None).cast("string")) \
    .withColumn("match_time", lit(None).cast("string")) \
    .withColumn("source", lit("over")) \
    .select("match_id", "ball_id", "comment", "runs", "wicket", "match_date", "match_time", "source")

In [0]:
combined_df = commentary_prepared.unionByName(over_prepared)

combined_df.show(truncate=False)

+--------+-------+----------------------+----+------+----------+----------+----------+
|match_id|ball_id|comment               |runs|wicket|match_date|match_time|source    |
+--------+-------+----------------------+----+------+----------+----------+----------+
|1001    |B1     |Beautiful cover drive!|null|null  |null      |null      |commentary|
|1001    |B2     |Quick single taken.   |null|null  |null      |null      |commentary|
|1001    |B3     |That hit the pads!    |null|null  |null      |null      |commentary|
|1001    |B1     |null                  |null|null  |null      |null      |over      |
|1001    |B2     |null                  |null|null  |null      |null      |over      |
+--------+-------+----------------------+----+------+----------+----------+----------+

