In [0]:
from pyspark.sql.functions import col, explode, lit, array
from pyspark.sql import SparkSession

spark = SparkSession.builder.getOrCreate()

matches_df = spark.read.parquet("dbfs:/FileStore/ipl_cleaned.parquet/")

sample_players = ["Rohit Sharma", "MS Dhoni", "Jasprit Bumrah", "Virat Kohli"]

matches_with_playing11_df = matches_df.withColumn("playing11", array([lit(p) for p in sample_players]))

exploded_df = matches_with_playing11_df.select(
    col("match_name"),
    explode(col("playing11")).alias("player_name")
)

player_href_df = spark.createDataFrame([
    ("Rohit Sharma", "player/rohit-sharma", True),
    ("MS Dhoni", "player/ms-dhoni", True),
    ("Virat Kohli", "player/virat-kohli", False),
    ("Jasprit Bumrah", "player/jasprit-bumrah", False)
], ["player_name", "player_href", "is_captain"])

final_df = exploded_df.join(player_href_df, on="player_name", how="left")

final_df.select("match_name", "player_name", "is_captain").show(truncate=False)


+--------------------------------------------------------+--------------+----------+
|match_name                                              |player_name   |is_captain|
+--------------------------------------------------------+--------------+----------+
|MUMBAI INDIANS vs ROYAL CHALLENGERS BENGALURU, 1st Match|Rohit Sharma  |true      |
|MUMBAI INDIANS vs ROYAL CHALLENGERS BENGALURU, 1st Match|MS Dhoni      |true      |
|MUMBAI INDIANS vs ROYAL CHALLENGERS BENGALURU, 1st Match|Jasprit Bumrah|false     |
|MUMBAI INDIANS vs ROYAL CHALLENGERS BENGALURU, 1st Match|Virat Kohli   |false     |
|CHENNAI SUPER KINGS vs DELHI CAPITALS, 2nd Match        |Rohit Sharma  |true      |
|CHENNAI SUPER KINGS vs DELHI CAPITALS, 2nd Match        |MS Dhoni      |true      |
|CHENNAI SUPER KINGS vs DELHI CAPITALS, 2nd Match        |Jasprit Bumrah|false     |
|CHENNAI SUPER KINGS vs DELHI CAPITALS, 2nd Match        |Virat Kohli   |false     |
|SUNRISERS HYDERABAD vs KOLKATA KNIGHT RIDERS, 3rd Match |Rohit S