In [0]:
%python
df = spark.read.option("header", True).csv("/FileStore/tables/ipl_2021_matches.csv")
df.show(5)

+----+-----------+--------------------+--------+----------+--------------------+--------------------+-----------+-----------+--------------------+--------------+-----------+
|year|series_type|         series_name|match_no|match_type|          match_name|          match_href|match_team1|match_team2|match_datetime_start|match_date_end|match_venue|
+----+-----------+--------------------+--------+----------+--------------------+--------------------+-----------+-----------+--------------------+--------------+-----------+
|2021|        T20|Indian Premier Le...|    null|    League|MUMBAI INDIANS vs...|https://www.cricb...|       null|       null|                null|          null|       null|
|2021|        T20|Indian Premier Le...|    null|    League|CHENNAI SUPER KIN...|https://www.cricb...|       null|       null|                null|          null|       null|
|2021|        T20|Indian Premier Le...|    null|    League|SUNRISERS HYDERAB...|https://www.cricb...|       null|       null|     

In [0]:
from pyspark.sql.functions import expr, col
from pyspark.sql.types import StringType
dates_df = spark.range(0, 20).withColumn("match_date", expr("date_add(to_date('2021-04-09'), cast(id as int))"))
times = [f"{hour:02d}:{minute:02d}:00" for hour in range(19, 23) for minute in (0, 30)]
times_df = spark.createDataFrame(times[:20], StringType()).toDF("match_time")
from pyspark.sql.functions import monotonically_increasing_id, row_number
from pyspark.sql.window import Window
dates_df = dates_df.withColumn("row_num", row_number().over(Window.orderBy("match_date")))
times_df = times_df.withColumn("row_num", row_number().over(Window.orderBy("match_time")))
unique_date_time_df = dates_df.join(times_df, on="row_num").select("match_date", "match_time")
unique_date_time_df.show(20, truncate=False)


+----------+----------+
|match_date|match_time|
+----------+----------+
|2021-04-09|19:00:00  |
|2021-04-10|19:30:00  |
|2021-04-11|20:00:00  |
|2021-04-12|20:30:00  |
|2021-04-13|21:00:00  |
|2021-04-14|21:30:00  |
|2021-04-15|22:00:00  |
|2021-04-16|22:30:00  |
+----------+----------+



In [0]:
from pyspark.sql.functions import concat_ws
unique_date_time_df = unique_date_time_df.withColumn(
    "matchdatetime", concat_ws(" ", "match_date", "match_time")
)

In [0]:
unique_date_time_df.select("matchdatetime", "match_date", "match_time").show(20, truncate=False)


+-------------------+----------+----------+
|matchdatetime      |match_date|match_time|
+-------------------+----------+----------+
|2021-04-09 19:00:00|2021-04-09|19:00:00  |
|2021-04-10 19:30:00|2021-04-10|19:30:00  |
|2021-04-11 20:00:00|2021-04-11|20:00:00  |
|2021-04-12 20:30:00|2021-04-12|20:30:00  |
|2021-04-13 21:00:00|2021-04-13|21:00:00  |
|2021-04-14 21:30:00|2021-04-14|21:30:00  |
|2021-04-15 22:00:00|2021-04-15|22:00:00  |
|2021-04-16 22:30:00|2021-04-16|22:30:00  |
+-------------------+----------+----------+



In [0]:
unique_date_time_df.write.mode("overwrite").option("header", "true").csv("dbfs:/FileStore/tables/match_datetime_full.csv")


In [0]:
from pyspark.sql.functions import col, to_date, date_format, hour, minute, second

In [0]:
df = df.withColumn("matchdate", to_date(col("matchdatetime"))) \
       .withColumn("matchtime", date_format(col("matchdatetime"), "HH:mm:ss"))

In [0]:
df = df.withColumn("matchname", col("match_name"))

In [0]:
df.select("matchname", "matchdate", "matchtime").show(20, truncate=False)


+----------------------------------------------------------------+----------+---------+
|matchname                                                       |matchdate |matchtime|
+----------------------------------------------------------------+----------+---------+
|MUMBAI INDIANS vs ROYAL CHALLENGERS BENGALURU, 1st Match        |2021-04-09|19:30:00 |
|CHENNAI SUPER KINGS vs DELHI CAPITALS, 2nd Match                |2021-04-09|19:30:00 |
|SUNRISERS HYDERABAD vs KOLKATA KNIGHT RIDERS, 3rd Match         |2021-04-09|19:30:00 |
|RAJASTHAN ROYALS vs PUNJAB KINGS, 4th Match                     |2021-04-09|19:30:00 |
|KOLKATA KNIGHT RIDERS vs MUMBAI INDIANS, 5th Match              |2021-04-09|19:30:00 |
|SUNRISERS HYDERABAD vs ROYAL CHALLENGERS BENGALURU, 6th Match   |2021-04-09|19:30:00 |
|RAJASTHAN ROYALS vs DELHI CAPITALS, 7th Match                   |2021-04-09|19:30:00 |
|PUNJAB KINGS vs CHENNAI SUPER KINGS, 8th Match                  |2021-04-09|19:30:00 |
|MUMBAI INDIANS vs SUNRISERS HYD

In [0]:
df.write.mode("overwrite").option("header", "true").csv("dbfs:/FileStore/tables/match_details.csv")
