In [1]:
from bs4 import BeautifulSoup
import requests
import pandas as pd

# URL of the Cricbuzz page with upcoming international cricket matches
URL = "https://www.cricbuzz.com/cricket-schedule/upcoming-series/international"

# Setting a User-Agent to avoid request being blocked
HEADERS = {"User-Agent": 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/94.0.4606.81 Safari/537.36'}

# Fetch the page content with proper error handling
try:
    response = requests.get(URL, headers=HEADERS)
    response.raise_for_status()  # Will raise an exception for a failed request
    soup = BeautifulSoup(response.text, "lxml")
except requests.exceptions.RequestException as e:
    print(f"Error fetching the page: {e}")
    exit()

# Find the sections containing dates (e.g., "Wed, Jan 29 2025")
match_days = soup.find_all("div", class_="cb-lv-grn-strip text-bold")

# Find the sections containing matches (teams and details)
matches = soup.find_all("div", class_="cb-ovr-flo cb-col-50 cb-col cb-mtchs-dy-vnu cb-adjst-lst")

# Find the venues for the matches
venues = soup.find_all("div", class_="cb-font-12 text-gray cb-ovr-flo")

# List to store match data
match_data = []

# Loop over match days, match details, and venues
for i in range(len(match_days)):
    try:
        # Extract match name (teams playing)
        match_name = matches[i].find("a").text.strip() if matches[i].find("a") else "Unknown Match"
        
        # Extract match date
        match_date = match_days[i].text.strip() if match_days[i] else "Unknown Date"
        
        # Extract venue (location)
        venue = venues[i].text.strip() if i < len(venues) and venues[i] else "Unknown Venue"
        
        # Append the data to match_data list
        match_data.append([match_name, match_date, venue])
    
    except IndexError:
        print(f"Skipping match at index {i} due to missing data")
        continue  # If any data is missing, skip that match

# Convert the match data into a DataFrame
df = pd.DataFrame(match_data, columns=["Match Name", "Date", "Venue"])

# Save the DataFrame to a CSV file
df.to_csv("cricbuzz_matches_enhanced.csv", index=False)

print("✅ Data saved to 'cricbuzz_matches_enhanced.csv'")


✅ Data saved to 'cricbuzz_matches_enhanced.csv'
