Scrape Venue details 1st Match

In [2]:
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import time
import csv
import json

options = Options()
options.add_argument("--headless")
driver = webdriver.Chrome(options=options)

match_id = "35718"
match_url = f"https://www.cricbuzz.com/cricket-match-facts/{match_id}/kkr-vs-rcb-30th-match-indian-premier-league-2021"

year = "2021"
series_type = "T20"
series_name = "Indian Premier League 2021"
match_no = "30th Match"
match_type = "T20 IPL"

driver.get(match_url)

wait = WebDriverWait(driver, 10) 

venue_details = {}

try:
    match_venue_stadium = wait.until(EC.presence_of_element_located((By.XPATH, "//span[contains(text(),'Venue:')]//following-sibling::span"))).text.strip()
    venue_details['match_venue_stadium'] = match_venue_stadium
except Exception as e:
    venue_details['match_venue_stadium'] = "Not available"
    print("Error extracting venue stadium:", e)

try:
    match_venue_city = wait.until(EC.presence_of_element_located((By.XPATH, "//span[contains(text(),'City:')]//following-sibling::span"))).text.strip()
    venue_details['match_venue_city'] = match_venue_city
except Exception as e:
    venue_details['match_venue_city'] = "Not available"
    print("Error extracting venue city:", e)

try:
    match_venue_capacity = wait.until(EC.presence_of_element_located((By.XPATH, "//span[contains(text(),'Capacity:')]//following-sibling::span"))).text.strip()
    venue_details['match_venue_capacity'] = match_venue_capacity
except Exception as e:
    venue_details['match_venue_capacity'] = "Not available"
    print("Error extracting venue capacity:", e)

try:
    match_venue_host_teams = wait.until(EC.presence_of_element_located((By.XPATH, "//span[contains(text(),'Host:')]//following-sibling::span"))).text.strip()
    venue_details['match_venue_host_teams'] = match_venue_host_teams
except Exception as e:
    venue_details['match_venue_host_teams'] = "Not available"
    print("Error extracting host teams:", e)

match_status = "Not available"
match_winning_team = "Not available"
match_toss = "Not available"

try:
    match_status = wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, ".cb-col.cb-col-100.cb-ltst-wgt-hdr"))).text.strip()
except Exception as e:
    print("Error extracting match status:", e)

try:
    toss = driver.find_element(By.XPATH, "//span[contains(text(),'Toss')]").text.strip()
    match_toss = toss
except:
    match_toss = "Toss information not available"

commentary_blocks = driver.find_elements(By.CSS_SELECTOR, ".cb-col.cb-col-100.cb-col.cb-com-ln")

data = []

for block in commentary_blocks:
    try:
        over_el = block.find_element(By.CSS_SELECTOR, ".cb-col.cb-col-8.text-bold")
        comm_el = block.find_element(By.CSS_SELECTOR, ".cb-col.cb-col-90.cb-com-ln")
        
        over_no = over_el.text.strip()
        ball_commentary = comm_el.text.strip()
        
        innings = 1 if float(over_no) <= 20 else 2
        ball_no = f"{innings}-{over_no}"

        match_entry = {
            "year": year,
            "series_type": series_type,
            "series_name": series_name,
            "match_no": match_no,
            "match_type": match_type,
            "match_id": match_id,
            "match_status": match_status,
            "match_winning_team": match_winning_team,
            "match_toss": match_toss,
            "match_venue_stadium": venue_details['match_venue_stadium'],
            "match_venue_city": venue_details['match_venue_city'],
            "match_venue_capacity": venue_details['match_venue_capacity'],
            "match_venue_host_teams": venue_details['match_venue_host_teams'],
            "ball_no": ball_no,
            "over_no": over_no,
            "ball_commentary": ball_commentary
        }

        data.append(match_entry)

    except Exception as e:
        print("Error processing block:", e)

driver.quit()

with open("task6_commentary_with_venue_details.json", "w", encoding="utf-8") as jf:
    json.dump(data, jf, indent=4, ensure_ascii=False)

if data:
    with open("task6_commentary_with_venue_details.csv", "w", newline='', encoding='utf-8') as cf:
        writer = csv.DictWriter(cf, fieldnames=data[0].keys())
        writer.writeheader()
        writer.writerows(data)

print(f"\nScraped {len(data)} balls with venue details and exported to task6_commentary_with_venue_details.json & .csv")


Error extracting venue city: Message: 
Stacktrace:
	GetHandleVerifier [0x00007FF7DF4C5335+78597]
	GetHandleVerifier [0x00007FF7DF4C5390+78688]
	(No symbol) [0x00007FF7DF2791AA]
	(No symbol) [0x00007FF7DF2CF149]
	(No symbol) [0x00007FF7DF2CF3FC]
	(No symbol) [0x00007FF7DF322467]
	(No symbol) [0x00007FF7DF2F712F]
	(No symbol) [0x00007FF7DF31F2BB]
	(No symbol) [0x00007FF7DF2F6EC3]
	(No symbol) [0x00007FF7DF2C03F8]
	(No symbol) [0x00007FF7DF2C1163]
	GetHandleVerifier [0x00007FF7DF76EEED+2870973]
	GetHandleVerifier [0x00007FF7DF769698+2848360]
	GetHandleVerifier [0x00007FF7DF786973+2967875]
	GetHandleVerifier [0x00007FF7DF4E017A+188746]
	GetHandleVerifier [0x00007FF7DF4E845F+222255]
	GetHandleVerifier [0x00007FF7DF4CD2B4+111236]
	GetHandleVerifier [0x00007FF7DF4CD462+111666]
	GetHandleVerifier [0x00007FF7DF4B3589+5465]
	BaseThreadInitThunk [0x00007FF8E623E8D7+23]
	RtlUserThreadStart [0x00007FF8E68D14FC+44]

Error extracting venue capacity: Message: 
Stacktrace:
	GetHandleVerifier [0x00007FF

In [3]:
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
import time

options = Options()
options.add_argument("--headless")
driver = webdriver.Chrome(options=options)

match_id = "35718"
url = f"https://www.cricbuzz.com/cricket-match-facts/{match_id}/kkr-vs-rcb-30th-match-indian-premier-league-2021"
driver.get(url)
time.sleep(3) 

venue_details = {
    "match_venue_stadium": "Not available",
    "match_venue_city": "Not available",
    "match_venue_capacity": "Not available",
    "match_venue_host_teams": "Not available"
}

try:
    venue_block = driver.find_elements(By.CSS_SELECTOR, ".cb-col.cb-col-100.cb-pb-10 .cb-col.cb-col-50")
    for block in venue_block:
        try:
            label = block.find_element(By.TAG_NAME, "span").text.strip().lower()
            value = block.find_elements(By.TAG_NAME, "span")[1].text.strip()
            
            if "stadium" in label or "venue" in label:
                venue_details["match_venue_stadium"] = value
            elif "city" in label:
                venue_details["match_venue_city"] = value
            elif "capacity" in label:
                venue_details["match_venue_capacity"] = value
            elif "host" in label:
                venue_details["match_venue_host_teams"] = value
        except:
            continue

except Exception as e:
    print("❌ Failed to extract venue block:", e)

driver.quit()

print("✅ Extracted Venue Details:\n", venue_details)


✅ Extracted Venue Details:
 {'match_venue_stadium': 'Not available', 'match_venue_city': 'Not available', 'match_venue_capacity': 'Not available', 'match_venue_host_teams': 'Not available'}
