Scrape ball by ball commentary for the 1st Match

In [1]:
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
import time
import csv
import json

# Headless browser setup
options = Options()
options.add_argument("--headless")
driver = webdriver.Chrome(options=options)

# Match details
match_id = "35612"
match_url = f"https://www.cricbuzz.com/cricket-full-commentary/{match_id}/mi-vs-rcb-1st-match-indian-premier-league-2021"

year = "2021"
series_type = "T20"
series_name = "Indian Premier League 2021"
match_no = "1st Match"
match_type = "T20 IPL"

driver.get(match_url)
time.sleep(3)  # Let the JS load

# Scroll to load all commentary
last_height = driver.execute_script("return document.body.scrollHeight")
while True:
    driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
    time.sleep(2)
    new_height = driver.execute_script("return document.body.scrollHeight")
    if new_height == last_height:
        break
    last_height = new_height

# Ball-by-ball commentary
commentary_blocks = driver.find_elements(By.CSS_SELECTOR, ".cb-col.cb-col-100.cb-col.cb-com-ln")

data = []

for block in commentary_blocks:
    try:
        over_el = block.find_element(By.CSS_SELECTOR, ".cb-col.cb-col-8.text-bold")
        comm_el = block.find_element(By.CSS_SELECTOR, ".cb-col.cb-col-90.cb-com-ln")
        
        over_no = over_el.text.strip()
        ball_commentary = comm_el.text.strip()
        
        if '.' in over_no:
            innings = 1 if float(over_no) <= 20 else 2
        else:
            innings = 1

        ball_no = f"{innings}-{over_no}"

        data.append({
            "year": year,
            "series_type": series_type,
            "series_name": series_name,
            "match_no": match_no,
            "match_type": match_type,
            "match_id": match_id,
            "match_winning_team": "",  # to be filled later
            "match_tie_breaker": "",
            "match_toss": "",
            "ball_no": ball_no,
            "over_no": over_no,
            "ball_commentary": ball_commentary
        })
    except Exception as e:
        print("Error processing block:", e)

# Fetch toss and match result info (from page)
try:
    match_info_blocks = driver.find_elements(By.CSS_SELECTOR, ".cb-mtch-info-itm")

    for block in match_info_blocks:
        text = block.text.strip()
        if "won the toss" in text:
            for item in data:
                item["match_toss"] = text
        elif "won" in text:
            for item in data:
                item["match_tie_breaker"] = text
                item["match_winning_team"] = text.split("won")[0].strip()
except Exception as e:
    print("Error extracting toss/result:", e)

driver.quit()

# Save to JSON
with open("task4_commentary.json", "w", encoding="utf-8") as jf:
    json.dump(data, jf, indent=4, ensure_ascii=False)

# Save to CSV
if data:
    with open("task4_commentary.csv", "w", newline='', encoding='utf-8') as cf:
        writer = csv.DictWriter(cf, fieldnames=data[0].keys())
        writer.writeheader()
        writer.writerows(data)

print(f"\n✅ Scraped {len(data)} balls and exported to task4_commentary.json & .csv")


✅ Scraped 0 balls and exported to task4_commentary.json & .csv
