Scrape all matches from one series IPL 2021

In [6]:
import requests
from bs4 import BeautifulSoup
import csv
import json

series_url = "https://www.cricbuzz.com/cricket-series/3472/indian-premier-league-2021/matches"
base_url = "https://www.cricbuzz.com"

series_name = "Indian Premier League 2021"
series_type = "T20"
year = "2021"

response = requests.get(series_url)
soup = BeautifulSoup(response.text, 'html.parser')

match_cards = soup.select('.cb-mtch-lst.cb-col.cb-col-100.cb-tms-itm')


match_data = []

for card in match_cards:
    try:
        match_href_tag = card.select_one('a')
        match_href = base_url + match_href_tag['href']
        match_name = match_href_tag.text.strip()

        match_info = card.select_one('.cb-col.cb-col-100.cb-srs-mtchs-tm-dtl').text.strip().split(',')

        match_no = match_info[0].strip() if len(match_info) >= 1 else ""
        match_date = match_info[-2].strip() if len(match_info) >= 2 else ""
        match_venue = match_info[-1].strip() if len(match_info) >= 3 else ""

        match_type = "T20"  

        if "vs" in match_name:
            teams = match_name.split("vs")
            match_team1 = teams[0].strip()
            match_team2 = teams[1].strip()
        else:
            match_team1 = match_team2 = ""

        match_entry = {
            "year": year,
            "series_type": series_type,
            "series_name": series_name,
            "match_no": match_no,
            "match_type": match_type,
            "match_name": match_name,
            "match_href": match_href,
            "match_team1": match_team1,
            "match_team2": match_team2,
            "match_datetime_start": match_date,
            "match_date_end": match_date,
            "match_venue": match_venue
        }

        match_data.append(match_entry)

    except Exception as e:
        print("Error parsing card:", e)

for match in match_data[:3]:
    print(match)

with open("ipl_2021_matches.json", "w", encoding="utf-8") as json_file:
    json.dump(match_data, json_file, indent=4, ensure_ascii=False)

if match_data:
    csv_columns = list(match_data[0].keys())
    with open("ipl_2021_matches.csv", "w", encoding="utf-8", newline='') as csv_file:
        writer = csv.DictWriter(csv_file, fieldnames=csv_columns)
        writer.writeheader()
        writer.writerows(match_data)
    print("\n Data exported to ipl_2021_matches.csv")
else:
    print("\n No match data found. CSV export skipped.")



 No match data found. CSV export skipped.
