In [0]:
df = spark.read.option("header", True).csv("dbfs:/FileStore/tables/ipl_2021_matches.csv")


In [0]:
df.printSchema()

root
 |-- year: string (nullable = true)
 |-- series_type: string (nullable = true)
 |-- series_name: string (nullable = true)
 |-- match_no: string (nullable = true)
 |-- match_type: string (nullable = true)
 |-- match_name: string (nullable = true)
 |-- match_href: string (nullable = true)
 |-- match_team1: string (nullable = true)
 |-- match_team2: string (nullable = true)
 |-- match_datetime_start: string (nullable = true)
 |-- match_date_end: string (nullable = true)
 |-- match_venue: string (nullable = true)



In [0]:
df.show(5, truncate=False)

+----+-----------+--------------------------+--------+----------+--------------------------------------------------------+---------------------------------------------------------------------------------------------+-----------+-----------+--------------------+--------------+-----------+
|year|series_type|series_name               |match_no|match_type|match_name                                              |match_href                                                                                   |match_team1|match_team2|match_datetime_start|match_date_end|match_venue|
+----+-----------+--------------------------+--------+----------+--------------------------------------------------------+---------------------------------------------------------------------------------------------+-----------+-----------+--------------------+--------------+-----------+
|2021|T20        |Indian Premier League 2021|null    |League    |MUMBAI INDIANS vs ROYAL CHALLENGERS BENGALURU, 1st Match|https://www

In [0]:
from pyspark.sql.functions import split, trim

In [0]:
teams_df = df.withColumn("team1", trim(split(df["match_name"], "vs")[0])) \
             .withColumn("team2", trim(split(df["match_name"], "vs")[1]))


In [0]:
unique_teams = teams_df.select("team1").union(teams_df.select("team2")).distinct()


In [0]:
team_count = unique_teams.count()
print(f" Number of teams: {team_count}")

 Number of teams: 70


In [0]:
df.select("match_type").distinct().show(truncate=False)


+----------+
|match_type|
+----------+
|League    |
+----------+



In [0]:
df.select("match_name").distinct().show(50, truncate=False)

+----------------------------------------------------------------+
|match_name                                                      |
+----------------------------------------------------------------+
|DELHI CAPITALS vs SUNRISERS HYDERABAD, 33rd Match               |
|ROYAL CHALLENGERS BENGALURU vs CHENNAI SUPER KINGS, 35th Match  |
|SUNRISERS HYDERABAD vs MUMBAI INDIANS, 31st Match               |
|SUNRISERS HYDERABAD vs DELHI CAPITALS, 20th Match               |
|PUNJAB KINGS vs SUNRISERS HYDERABAD, 14th Match                 |
|MUMBAI INDIANS vs PUNJAB KINGS, 42nd Match                      |
|PUNJAB KINGS vs DELHI CAPITALS, 29th Match                      |
|DELHI CAPITALS vs MUMBAI INDIANS, 13th Match                    |
|DELHI CAPITALS vs ROYAL CHALLENGERS BENGALURU, 22nd Match       |
|SUNRISERS HYDERABAD vs ROYAL CHALLENGERS BENGALURU, 6th Match   |
|PUNJAB KINGS vs ROYAL CHALLENGERS BENGALURU, 26th Match         |
|SUNRISERS HYDERABAD vs KOLKATA KNIGHT RIDERS, 3rd Match      

In [0]:
from pyspark.sql.functions import col

completed_df = df.filter(~col("match_name").rlike("Abandoned|No result"))
display(completed_df)

year,series_type,series_name,match_no,match_type,match_name,match_href,match_team1,match_team2,match_datetime_start,match_date_end,match_venue
2021,T20,Indian Premier League 2021,,League,"MUMBAI INDIANS vs ROYAL CHALLENGERS BENGALURU, 1st Match",https://www.cricbuzz.com/cricket-scores/35612/mi-vs-rcb-1st-match-indian-premier-league-2021,,,,,
2021,T20,Indian Premier League 2021,,League,"CHENNAI SUPER KINGS vs DELHI CAPITALS, 2nd Match",https://www.cricbuzz.com/cricket-scores/35617/csk-vs-dc-2nd-match-indian-premier-league-2021,,,,,
2021,T20,Indian Premier League 2021,,League,"SUNRISERS HYDERABAD vs KOLKATA KNIGHT RIDERS, 3rd Match",https://www.cricbuzz.com/cricket-scores/35618/srh-vs-kkr-3rd-match-indian-premier-league-2021,,,,,
2021,T20,Indian Premier League 2021,,League,"RAJASTHAN ROYALS vs PUNJAB KINGS, 4th Match",https://www.cricbuzz.com/cricket-scores/35622/rr-vs-pbks-4th-match-indian-premier-league-2021,,,,,
2021,T20,Indian Premier League 2021,,League,"KOLKATA KNIGHT RIDERS vs MUMBAI INDIANS, 5th Match",https://www.cricbuzz.com/cricket-scores/35627/kkr-vs-mi-5th-match-indian-premier-league-2021,,,,,
2021,T20,Indian Premier League 2021,,League,"SUNRISERS HYDERABAD vs ROYAL CHALLENGERS BENGALURU, 6th Match",https://www.cricbuzz.com/cricket-scores/35628/srh-vs-rcb-6th-match-indian-premier-league-2021,,,,,
2021,T20,Indian Premier League 2021,,League,"RAJASTHAN ROYALS vs DELHI CAPITALS, 7th Match",https://www.cricbuzz.com/cricket-scores/35632/rr-vs-dc-7th-match-indian-premier-league-2021,,,,,
2021,T20,Indian Premier League 2021,,League,"PUNJAB KINGS vs CHENNAI SUPER KINGS, 8th Match",https://www.cricbuzz.com/cricket-scores/35637/pbks-vs-csk-8th-match-indian-premier-league-2021,,,,,
2021,T20,Indian Premier League 2021,,League,"MUMBAI INDIANS vs SUNRISERS HYDERABAD, 9th Match",https://www.cricbuzz.com/cricket-scores/35642/mi-vs-srh-9th-match-indian-premier-league-2021,,,,,
2021,T20,Indian Premier League 2021,,League,"ROYAL CHALLENGERS BENGALURU vs KOLKATA KNIGHT RIDERS, 10th Match",https://www.cricbuzz.com/cricket-scores/35643/rcb-vs-kkr-10th-match-indian-premier-league-2021,,,,,


In [0]:
cleaned_df = completed_df.drop("series_type", "series_name")  

In [0]:
display(cleaned_df)

year,match_no,match_type,match_name,match_href,match_team1,match_team2,match_datetime_start,match_date_end,match_venue
2021,,League,"MUMBAI INDIANS vs ROYAL CHALLENGERS BENGALURU, 1st Match",https://www.cricbuzz.com/cricket-scores/35612/mi-vs-rcb-1st-match-indian-premier-league-2021,,,,,
2021,,League,"CHENNAI SUPER KINGS vs DELHI CAPITALS, 2nd Match",https://www.cricbuzz.com/cricket-scores/35617/csk-vs-dc-2nd-match-indian-premier-league-2021,,,,,
2021,,League,"SUNRISERS HYDERABAD vs KOLKATA KNIGHT RIDERS, 3rd Match",https://www.cricbuzz.com/cricket-scores/35618/srh-vs-kkr-3rd-match-indian-premier-league-2021,,,,,
2021,,League,"RAJASTHAN ROYALS vs PUNJAB KINGS, 4th Match",https://www.cricbuzz.com/cricket-scores/35622/rr-vs-pbks-4th-match-indian-premier-league-2021,,,,,
2021,,League,"KOLKATA KNIGHT RIDERS vs MUMBAI INDIANS, 5th Match",https://www.cricbuzz.com/cricket-scores/35627/kkr-vs-mi-5th-match-indian-premier-league-2021,,,,,
2021,,League,"SUNRISERS HYDERABAD vs ROYAL CHALLENGERS BENGALURU, 6th Match",https://www.cricbuzz.com/cricket-scores/35628/srh-vs-rcb-6th-match-indian-premier-league-2021,,,,,
2021,,League,"RAJASTHAN ROYALS vs DELHI CAPITALS, 7th Match",https://www.cricbuzz.com/cricket-scores/35632/rr-vs-dc-7th-match-indian-premier-league-2021,,,,,
2021,,League,"PUNJAB KINGS vs CHENNAI SUPER KINGS, 8th Match",https://www.cricbuzz.com/cricket-scores/35637/pbks-vs-csk-8th-match-indian-premier-league-2021,,,,,
2021,,League,"MUMBAI INDIANS vs SUNRISERS HYDERABAD, 9th Match",https://www.cricbuzz.com/cricket-scores/35642/mi-vs-srh-9th-match-indian-premier-league-2021,,,,,
2021,,League,"ROYAL CHALLENGERS BENGALURU vs KOLKATA KNIGHT RIDERS, 10th Match",https://www.cricbuzz.com/cricket-scores/35643/rcb-vs-kkr-10th-match-indian-premier-league-2021,,,,,
