In [2]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import pandas as pd

options = Options()
options.add_argument("--headless")
options.add_argument("--disable-gpu")
options.add_argument("--no-sandbox")

driver = webdriver.Chrome(options=options)

url = "https://www.espncricinfo.com/series/ipl-2021-1249214/match-1/rcb-vs-mi-live-cricket-score"
driver.get(url)

try:
    WebDriverWait(driver, 30).until(
        EC.presence_of_element_located((By.CSS_SELECTOR, '.live-comm'))
    )
except:
    print("Timed out waiting for the commentary to load.")

match_details = {}

try:
    match_details['year'] = 2021
    match_details['series_type'] = "IPL"
    match_details['series_name'] = "IPL 2021"
    match_details['match_no'] = 1
    match_details['match_type'] = "T20 IPL"
    match_details['match_id'] = "1249214"
    
    try:
        match_details['match_winning_team'] = driver.find_element(By.CSS_SELECTOR, '.match-info.match-info-MATCH .status-text span').text.strip()
    except:
        match_details['match_winning_team'] = "Not Available"
    
    match_details['match_tie_breaker'] = "None"
except Exception as e:
    print(f"Error extracting match details: {e}")

try:
    toss_info = driver.find_element(By.XPATH, "//div[contains(text(),'Toss')]/following-sibling::div").text.strip()
    match_details['match_toss'] = toss_info
except:
    match_details['match_toss'] = "Not Available"

commentary_data = []
ball_no = 1

commentary_divs = driver.find_elements(By.CSS_SELECTOR, '.live-comm')

if commentary_divs:
    for comm in commentary_divs:
        try:
            ball = comm.find_element(By.CSS_SELECTOR, '.ball').text.strip()
            description = comm.find_element(By.CSS_SELECTOR, '.description').text.strip()
            over_no = ball.split()[0]
            commentary_data.append([
                match_details['year'], 
                match_details['series_type'], 
                match_details['series_name'],
                match_details['match_no'], 
                match_details['match_type'], 
                match_details['match_id'], 
                match_details['match_winning_team'], 
                match_details['match_tie_breaker'], 
                match_details['match_toss'], 
                ball_no, 
                over_no, 
                description
            ])
            ball_no += 1
        except Exception as e:
            print(f"Error extracting commentary: {e}")
else:
    print("No commentary elements found.")

driver.quit()

if commentary_data:
    df = pd.DataFrame(commentary_data, columns=[
        'year', 'series_type', 'series_name', 'match_no', 'match_type', 'match_id',
        'match_winning_team', 'match_tie_breaker', 'match_toss', 'ball_no', 'over_no', 'ball_commentary'
    ])
    print(df.head())
    df.to_csv("ball_by_ball_commentary_full.csv", index=False)
    print("✅ Ball-by-ball commentary with match details saved to 'ball_by_ball_commentary_full.csv'")
else:
    print("No commentary data scraped.")


Timed out waiting for the commentary to load.
No commentary elements found.
No commentary data scraped.


In [3]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import pandas as pd

options = Options()
options.add_argument("--headless")
options.add_argument("--disable-gpu")
options.add_argument("--no-sandbox")

driver = webdriver.Chrome(options=options)

url = "https://www.espncricinfo.com/series/ipl-2021-1249214/match-1/rcb-vs-mi-live-cricket-score"
driver.get(url)

try:
    WebDriverWait(driver, 60).until(
        EC.presence_of_element_located((By.CSS_SELECTOR, '.live-comm'))
    )
except:
    print("Timed out waiting for the commentary to load.")

match_details = {}

try:
    match_details['year'] = 2021
    match_details['series_type'] = "IPL"
    match_details['series_name'] = "IPL 2021"
    match_details['match_no'] = 1
    match_details['match_type'] = "T20 IPL"
    match_details['match_id'] = "1249214"
    
    try:
        match_details['match_winning_team'] = driver.find_element(By.CSS_SELECTOR, '.match-info.match-info-MATCH .status-text span').text.strip()
    except:
        match_details['match_winning_team'] = "Not Available"
    
    match_details['match_tie_breaker'] = "None"
except Exception as e:
    print(f"Error extracting match details: {e}")

try:
    toss_info = driver.find_element(By.XPATH, "//div[contains(text(),'Toss')]/following-sibling::div").text.strip()
    match_details['match_toss'] = toss_info
except:
    match_details['match_toss'] = "Not Available"

commentary_data = []
ball_no = 1

commentary_divs = driver.find_elements(By.CSS_SELECTOR, '.live-comm')

if commentary_divs:
    for comm in commentary_divs:
        try:
            ball = comm.find_element(By.CSS_SELECTOR, '.ball').text.strip()
            description = comm.find_element(By.CSS_SELECTOR, '.description').text.strip()
            over_no = ball.split()[0]
            commentary_data.append([
                match_details['year'], 
                match_details['series_type'], 
                match_details['series_name'],
                match_details['match_no'], 
                match_details['match_type'], 
                match_details['match_id'], 
                match_details['match_winning_team'], 
                match_details['match_tie_breaker'], 
                match_details['match_toss'], 
                ball_no, 
                over_no, 
                description
            ])
            ball_no += 1
        except Exception as e:
            print(f"Error extracting commentary: {e}")
else:
    print("No commentary elements found.")

driver.quit()

if commentary_data:
    df = pd.DataFrame(commentary_data, columns=[
        'year', 'series_type', 'series_name', 'match_no', 'match_type', 'match_id',
        'match_winning_team', 'match_tie_breaker', 'match_toss', 'ball_no', 'over_no', 'ball_commentary'
    ])
    print(df.head())
    df.to_csv("ball_by_ball_commentary_full.csv", index=False)
    print("✅ Ball-by-ball commentary with match details saved to 'ball_by_ball_commentary_full.csv'")
else:
    print("No commentary data scraped.")


Timed out waiting for the commentary to load.
No commentary elements found.
No commentary data scraped.
