In [None]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import pandas as pd
import time

options = Options()
options.add_argument("--headless")
driver = webdriver.Chrome(options=options)

url = "https://www.espncricinfo.com/series/ipl-2021-1249214/mumbai-indians-vs-royal-challengers-bangalore-1st-match-1254058/full-scorecard"
driver.get(url)

wait = WebDriverWait(driver, 20)
match_data = {}

match_data['year'] = 2021
match_data['series_type'] = "IPL"
match_data['series_name'] = "IPL 2021"
match_data['match_no'] = 1
match_data['match_type'] = "T20 IPL"
match_data['match_id'] = "1254058"

try:
    status = wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, '.ds-text-tight-m.ds-font-regular.ds-truncate.ds-text-typo')))
    match_data['match_status'] = status.text.strip()
except:
    match_data['match_status'] = "Not Available"

try:
    info_blocks = driver.find_elements(By.CSS_SELECTOR, '.ds-text-tight-s.ds-font-regular.ds-text-ui-typo-mid')
    for block in info_blocks:
        text = block.text
        if "Toss" in text:
            match_data['match_toss'] = text.replace("Toss", "").strip()
        elif "Match" in text:
            match_data['match_winning_team'] = text.replace("Match", "").strip()
        elif "Eliminator" in text or "Super Over" in text:
            match_data['match_tie_breaker'] = text.strip()
except:
    match_data['match_toss'] = match_data.get('match_toss', "Not Available")
    match_data['match_winning_team'] = match_data.get('match_winning_team', "Not Available")
    match_data['match_tie_breaker'] = match_data.get('match_tie_breaker', "None")

try:
    details = driver.find_element(By.XPATH, "//span[contains(text(),'Umpires')]/parent::div").text
    match_data['umpires'] = details.replace("Umpires", "").strip()
except:
    match_data['umpires'] = "Not Available"

try:
    ref = driver.find_element(By.XPATH, "//span[contains(text(),'Match Referee')]/parent::div").text
    match_data['match_referee'] = ref.replace("Match Referee", "").strip()
except:
    match_data['match_referee'] = "Not Available"

try:
    third_ump = driver.find_element(By.XPATH, "//span[contains(text(),'TV Umpire')]/parent::div").text
    match_data['third_umpires'] = third_ump.replace("TV Umpire", "").strip()
except:
    match_data['third_umpires'] = "Not Available"

try:
    match_data['match_datetime'] = driver.find_element(By.CSS_SELECTOR, '.ds-text-tight-s.ds-font-regular.ds-text-typo-mid+span').text
except:
    match_data['match_datetime'] = "Not Available"

team_blocks = driver.find_elements(By.CSS_SELECTOR, '.ds-text-tight-l.ds-font-bold')
if len(team_blocks) >= 2:
    match_data['team1_name'] = team_blocks[0].text.strip()
    match_data['team2_name'] = team_blocks[1].text.strip()
else:
    match_data['team1_name'] = "MI"
    match_data['team2_name'] = "RCB"

try:
    scores = driver.find_elements(By.CSS_SELECTOR, '.ds-text-compact-m.ds-text-typo-title')
    match_data['team1_score'], match_data['team1_wickets'] = scores[0].text.strip().split('/')
    match_data['team2_score'], match_data['team2_wickets'] = scores[1].text.strip().split('/')
except:
    match_data['team1_score'] = match_data['team2_score'] = "0"
    match_data['team1_wickets'] = match_data['team2_wickets'] = "0"

def get_team_section(team_index):
    try:
        squad_card = driver.find_elements(By.CSS_SELECTOR, '.ds-rounded-lg.ds-mt-2')[team_index]
        lines = squad_card.text.split('\n')
        data = {
            'captain': '',
            'players': [],
            'bench': [],
            'support_staff': []
        }
        for line in lines:
            if '(c)' in line:
                data['captain'] = line.replace('(c)', '').strip()
            elif '(wk)' in line:
                data['players'].append(line)
            elif 'Substitute' in line or 'Sub' in line:
                data['bench'].append(line)
            elif 'Coach' in line or 'Physio' in line:
                data['support_staff'].append(line)
            else:
                data['players'].append(line)
        return data
    except:
        return {'captain': '', 'players': [], 'bench': [], 'support_staff': []}

team1_data = get_team_section(0)
team2_data = get_team_section(1)

match_data['team1_captain'] = team1_data['captain']
match_data['team1_players'] = ', '.join(team1_data['players'])
match_data['team1_bench'] = ', '.join(team1_data['bench'])
match_data['team1_support_staff'] = ', '.join(team1_data['support_staff'])

match_data['team2_captain'] = team2_data['captain']
match_data['team2_players'] = ', '.join(team2_data['players'])
match_data['team2_bench'] = ', '.join(team2_data['bench'])
match_data['team2_support_staff'] = ', '.join(team2_data['support_staff'])

commentary_url = "https://www.espncricinfo.com/series/ipl-2021-1249214/mumbai-indians-vs-royal-challengers-bangalore-1st-match-1254058/ball-by-ball-commentary"
driver.get(commentary_url)

try:
    wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, '.ds-text-tight-s.ds-font-regular.ds-truncate.ds-text-typo')))
    commentary_elements = driver.find_elements(By.CSS_SELECTOR, '.ds-text-tight-s.ds-font-regular.ds-truncate.ds-text-typo')
    ball_no = 1
    commentary_data = []
    for elem in commentary_elements:
        over_no = elem.find_element(By.CSS_SELECTOR, '.ds-text-tight-s.ds-font-regular.ds-truncate.ds-text-typo').text
        ball_commentary = elem.find_element(By.CSS_SELECTOR, '.ds-text-tight-s.ds-font-regular.ds-truncate.ds-text-typo').text
        commentary_data.append({
            'ball_no': ball_no,
            'over_no': over_no,
            'ball_commentary': ball_commentary
        })
        ball_no += 1
except:
    commentary_data = []

driver.quit()

df_match = pd.DataFrame([match_data])
df_commentary = pd.DataFrame(commentary_data)
df_final = df_match.assign(key=1).merge(df_commentary.assign(key=1), on='key').drop('key', axis=1)

df_final.to_csv("ipl_2021_match1_full_data.csv", index=False)
print(" Full match data saved to 'ipl_2021_match1_full_data.csv'")
print(df_final.head())


✅ Full match data saved to 'ipl_2021_match1_full_data.csv'
Empty DataFrame
Columns: [year, series_type, series_name, match_no, match_type, match_id, match_status, umpires, match_referee, third_umpires, match_datetime, team1_name, team2_name, team1_score, team2_score, team1_wickets, team2_wickets, team1_captain, team1_players, team1_bench, team1_support_staff, team2_captain, team2_players, team2_bench, team2_support_staff]
Index: []

[0 rows x 25 columns]
