In [1]:
import requests
from bs4 import BeautifulSoup
import re
import pandas as pd

# URL and headers
url = "https://www.cricbuzz.com/cricket-series/3961/icc-mens-t20-world-cup-2022/matches"
headers = {
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/139.0.0.0 Safari/537.36 Edg/139.0.0.0"
}

response = requests.get(url, headers=headers)
soup = BeautifulSoup(response.text, "html.parser")

# Find all match title links
match_links = soup.find_all("a", class_="text-hvr-underline")

match_dict = {}

for link in match_links:
    match_name = link.span.text.strip() if link.span else None

    # Clean match title: remove 'Super 12 '
    if match_name:
        match_name = match_name.replace("Super 12 ", "")

    # Extract match number
    match_number = None
    if match_name:
        match_num_search = re.search(r'(\d+)(?:st|nd|rd|th) Match', match_name)
        if match_num_search:
            match_number = int(match_num_search.group(1))

    # Navigate to parent div to get result and venue
    parent_div = link.find_parent("div", class_="cb-col-100")
    if parent_div:
        # Result
        result_tag = parent_div.find("a", class_="cb-text-complete")
        result = result_tag.text.strip() if result_tag else None

        winner, margin = None, None
        if result:
            if "won by" in result:
                parts = result.split("won by")
                winner = parts[0].strip()
                margin = parts[1].strip()
            else:
                winner = "Draw"
                margin = ""

        # Venue city
        venue_tag = parent_div.find("div", class_="text-gray")
        venue = None
        if venue_tag:
            venue_text = venue_tag.text.strip()
            if "," in venue_text:
                venue = venue_text.split(",")[-1].strip()

        if match_number and match_name:
            match_dict[match_number] = [match_name, winner, margin, venue]

# Prepare full data 1-42 sequentially
data = []
for i in range(1, 43):
    if i in match_dict:
        item = match_dict[i]
    else:
        item = [f"No Match Scheduled, {i}th Match", "Draw", "", ""]
    data.append([i, item[0], item[1], item[2], item[3]])

# Create DataFrame
df = pd.DataFrame(data, columns=["Match No", "Match Name", "Winner", "Margin", "Venue"])

# Save to Excel
df.to_excel("Match_Summary.xlsx", index=False)

print("Excel file 'Match_Summary.xlsx' created successfully!")


Excel file 'Match_Summary.xlsx' created successfully!


In [2]:
pip install openpyxl


Collecting openpyxl
  Downloading openpyxl-3.1.5-py2.py3-none-any.whl.metadata (2.5 kB)
Collecting et-xmlfile (from openpyxl)
  Downloading et_xmlfile-2.0.0-py3-none-any.whl.metadata (2.7 kB)
Downloading openpyxl-3.1.5-py2.py3-none-any.whl (250 kB)
Downloading et_xmlfile-2.0.0-py3-none-any.whl (18 kB)
Installing collected packages: et-xmlfile, openpyxl

   -------------------- ------------------- 1/2 [openpyxl]
   -------------------- ------------------- 1/2 [openpyxl]
   -------------------- ------------------- 1/2 [openpyxl]
   -------------------- ------------------- 1/2 [openpyxl]
   -------------------- ------------------- 1/2 [openpyxl]
   -------------------- ------------------- 1/2 [openpyxl]
   -------------------- ------------------- 1/2 [openpyxl]
   -------------------- ------------------- 1/2 [openpyxl]
   -------------------- ------------------- 1/2 [openpyxl]
   -------------------- ------------------- 1/2 [openpyxl]
   -------------------- ------------------- 1/2 [openp