In [0]:

ipl_df = spark.read.parquet("dbfs:/FileStore/ipl_cleaned.parquet/")
ipl_df.printSchema()
ipl_df.show(truncate=False)


root
 |-- year: integer (nullable = true)
 |-- series_type: timestamp (nullable = true)
 |-- series_name: string (nullable = true)
 |-- match_no: string (nullable = true)
 |-- match_type: string (nullable = true)
 |-- match_name: string (nullable = true)
 |-- match_href: string (nullable = true)
 |-- match_team1: string (nullable = true)
 |-- match_team2: string (nullable = true)
 |-- match_datetime_start: timestamp (nullable = true)
 |-- match_date_end: string (nullable = true)
 |-- match_venue: string (nullable = true)

+----+-------------------+--------------------------+--------+----------+----------------------------------------------------------------+-----------------------------------------------------------------------------------------------+---------------------------+---------------------------+--------------------+--------------+-----------+
|year|series_type        |series_name               |match_no|match_type|match_name                                                  

In [0]:

tied_matches_df = ipl_df.filter(
    (ipl_df.match_type.contains("Tie")) | 
    (ipl_df.match_name.contains("Tie")) | 
    (ipl_df.match_name.contains("Super Over")) | 
    (ipl_df.match_name.contains("Eliminator"))  # Often tie-breaker situations
)

tied_matches_df.select("match_name", "match_type").distinct().show(truncate=False)


+----------------------------------------------------------------+----------+
|match_name                                                      |match_type|
+----------------------------------------------------------------+----------+
|ROYAL CHALLENGERS BENGALURU vs KOLKATA KNIGHT RIDERS, Eliminator|League    |
+----------------------------------------------------------------+----------+



In [0]:

tie_breakers_df = tied_matches_df.select("match_type").distinct()
tie_breakers_df.show(truncate=False)


+----------+
|match_type|
+----------+
|League    |
+----------+



In [0]:
tied_matches_df.select("match_name", "match_type", "match_href").show(truncate=False)


+----------------------------------------------------------------+----------+----------------------------------------------------------------------------------------------+
|match_name                                                      |match_type|match_href                                                                                    |
+----------------------------------------------------------------+----------+----------------------------------------------------------------------------------------------+
|ROYAL CHALLENGERS BENGALURU vs KOLKATA KNIGHT RIDERS, Eliminator|League    |https://www.cricbuzz.com/cricket-scores/37671/rcb-vs-kkr-eliminator-indian-premier-league-2021|
+----------------------------------------------------------------+----------+----------------------------------------------------------------------------------------------+



In [0]:

ipl_df = spark.read.parquet("dbfs:/FileStore/ipl_cleaned.parquet/")

ipl_df.printSchema()
ipl_df.select("match_name").show(5, truncate=False)


root
 |-- year: integer (nullable = true)
 |-- series_type: timestamp (nullable = true)
 |-- series_name: string (nullable = true)
 |-- match_no: string (nullable = true)
 |-- match_type: string (nullable = true)
 |-- match_name: string (nullable = true)
 |-- match_href: string (nullable = true)
 |-- match_team1: string (nullable = true)
 |-- match_team2: string (nullable = true)
 |-- match_datetime_start: timestamp (nullable = true)
 |-- match_date_end: string (nullable = true)
 |-- match_venue: string (nullable = true)

+--------------------------------------------------------+
|match_name                                              |
+--------------------------------------------------------+
|MUMBAI INDIANS vs ROYAL CHALLENGERS BENGALURU, 1st Match|
|CHENNAI SUPER KINGS vs DELHI CAPITALS, 2nd Match        |
|SUNRISERS HYDERABAD vs KOLKATA KNIGHT RIDERS, 3rd Match |
|RAJASTHAN ROYALS vs PUNJAB KINGS, 4th Match             |
|KOLKATA KNIGHT RIDERS vs MUMBAI INDIANS, 5th Match      |


In [0]:

tie_breaker_matches = ipl_df.filter(
    (ipl_df.match_name.contains("Super Over")) |
    (ipl_df.match_name.contains("Tie")) |
    (ipl_df.match_name.contains("Eliminator")) |
    (ipl_df.match_name.contains("Qualifier")) |
    (ipl_df.match_name.contains("Final"))
)

tie_breaker_matches.select("match_name").distinct().show(truncate=False)


+----------------------------------------------------------------+
|match_name                                                      |
+----------------------------------------------------------------+
|DELHI CAPITALS vs CHENNAI SUPER KINGS, Qualifier 1              |
|ROYAL CHALLENGERS BENGALURU vs KOLKATA KNIGHT RIDERS, Eliminator|
|CHENNAI SUPER KINGS vs KOLKATA KNIGHT RIDERS, Final             |
|DELHI CAPITALS vs KOLKATA KNIGHT RIDERS, Qualifier 2            |
+----------------------------------------------------------------+



In [0]:
from pyspark.sql.functions import when, lit

classified = tie_breaker_matches.withColumn(
    "tie_breaker_type",
    when(ipl_df.match_name.contains("Super Over"), lit("Super Over"))
    .when(ipl_df.match_name.contains("Eliminator"), lit("Eliminator"))
    .when(ipl_df.match_name.contains("Qualifier"), lit("Qualifier"))
    .when(ipl_df.match_name.contains("Final"), lit("Final"))
    .otherwise("Other")
)

classified.select("match_name", "tie_breaker_type").distinct().show(truncate=False)


+----------------------------------------------------------------+----------------+
|match_name                                                      |tie_breaker_type|
+----------------------------------------------------------------+----------------+
|CHENNAI SUPER KINGS vs KOLKATA KNIGHT RIDERS, Final             |Final           |
|DELHI CAPITALS vs KOLKATA KNIGHT RIDERS, Qualifier 2            |Qualifier       |
|DELHI CAPITALS vs CHENNAI SUPER KINGS, Qualifier 1              |Qualifier       |
|ROYAL CHALLENGERS BENGALURU vs KOLKATA KNIGHT RIDERS, Eliminator|Eliminator      |
+----------------------------------------------------------------+----------------+

