In [1]:
import random
import time
import logging
import csv
import os
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from webdriver_manager.chrome import ChromeDriverManager
from selenium_stealth import stealth

# Setup logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

# Configure Chrome
options = Options()
options.add_argument("--headless")
options.add_argument("--disable-blink-features=AutomationControlled")
options.add_argument("--disable-gpu")
options.add_argument("--no-sandbox")
driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()), options=options)

# Apply stealth mode
stealth(driver,
        languages=["en-US", "en"],
        vendor="Google Inc.",
        platform="Win32",
        webgl_vendor="Intel Inc.",
        renderer="Intel Iris OpenGL Engine",
        fix_hairline=True,
        )

# Clean values (especially BBM)
def clean_data_value(header, value):
    if header.upper() == "BBM":
        return f"'{value}"
    return value

# Extract a single table
def extract_table(table_element):
    try:
        headers = [th.text.strip() for th in table_element.find_elements(By.TAG_NAME, "th")]
        rows = table_element.find_elements(By.TAG_NAME, "tr")
        data = []

        for row in rows:
            cols = row.find_elements(By.TAG_NAME, "td")
            if cols:
                row_data = []
                for i, col in enumerate(cols):
                    value = col.text.strip()
                    header = headers[i] if i < len(headers) else ""
                    cleaned_value = clean_data_value(header, value)
                    row_data.append(cleaned_value)
                data.append(row_data)

        return {"headers": headers, "data": data}
    except Exception as e:
        logger.warning(f"Error extracting table: {e}")
        return {"headers": [], "data": []}

# Step 1: Get Royal Challengers Bengaluru players
driver.get("https://www.iplt20.com/teams/royal-challengers-bengaluru/squad")
wait = WebDriverWait(driver, 15)
wait.until(EC.presence_of_all_elements_located((By.CLASS_NAME, "dys-box-color")))
time.sleep(2)

players_raw = driver.find_elements(By.CLASS_NAME, "dys-box-color")
player_links = []

for player in players_raw:
    try:
        name = player.find_element(By.CLASS_NAME, "ih-p-name").text.strip()
        img_url = player.find_element(By.CLASS_NAME, "ih-p-img").find_element(By.TAG_NAME, "img").get_attribute("src")
        detail_url = player.find_element(By.TAG_NAME, "a").get_attribute("href")
        player_links.append({"name": name, "img_url": img_url, "detail_url": detail_url})
    except Exception as e:
        logger.warning(f"Skipping player due to error: {e}")

logger.info(f"Found {len(player_links)} players. Starting to scrape data...")

all_data = []
timestamp = time.strftime("%Y%m%d_%H%M%S")
csv_folder = os.path.join("players", f"Royal Challengers Bengaluru")
os.makedirs(csv_folder, exist_ok=True)

# Step 2: Visit each player page and extract info
for player in player_links:
    try:
        logger.info(f"Scraping player: {player['name']}")
        driver.get(player["detail_url"])
        time.sleep(random.uniform(2, 4))

        titles = driver.find_elements(By.CLASS_NAME, "sm-pp-table__title")
        tables = driver.find_elements(By.CLASS_NAME, "sm-pp-table")

        batting = {}
        bowling = {}

        for i in range(len(titles)):
            title_text = titles[i].text.strip()
            table_data = extract_table(tables[i])

            if table_data["data"]:
                if "bat" in title_text.lower():
                    batting[title_text] = table_data
                elif "bowl" in title_text.lower():
                    bowling[title_text] = table_data

        # Step 3: Save everything to CSV
        name_clean = player["name"].replace(" ", "_").replace(".", "").lower()
        file_path = os.path.join(csv_folder, f"{name_clean}.csv")

        with open(file_path, "w", newline='', encoding="utf-8") as f:
            writer = csv.writer(f)

            writer.writerow(["Player Name", player["name"]])
            writer.writerow(["Image URL", player["img_url"]])
            writer.writerow(["Detail Page URL", player["detail_url"]])
            writer.writerow([])

            writer.writerow(["=== Batting Statistics ==="])
            for section, table in batting.items():
                writer.writerow([section])
                if table["headers"]:
                    writer.writerow(table["headers"])
                if table["data"]:
                    writer.writerows(table["data"])
                writer.writerow([])

            writer.writerow(["=== Bowling Statistics ==="])
            for section, table in bowling.items():
                writer.writerow([section])
                if table["headers"]:
                    writer.writerow(table["headers"])
                if table["data"]:
                    writer.writerows(table["data"])
                writer.writerow([])

        logger.info(f"✓ Saved CSV for: {player['name']}")

    except Exception as e:
        logger.error(f"Error scraping {player['name']}: {str(e)}")
        continue

logger.info(f"✅ All players saved successfully to folder '{csv_folder}'")
driver.quit()


INFO:WDM:Get LATEST chromedriver version for google-chrome
INFO:WDM:Get LATEST chromedriver version for google-chrome
INFO:WDM:Driver [C:\Users\ajmal\.wdm\drivers\chromedriver\win64\135.0.7049.95\chromedriver-win32/chromedriver.exe] found in cache
INFO:__main__:Found 22 players. Starting to scrape data...
INFO:__main__:Scraping player: RAJAT PATIDAR
INFO:__main__:✓ Saved CSV for: RAJAT PATIDAR
INFO:__main__:Scraping player: VIRAT KOHLI
INFO:__main__:✓ Saved CSV for: VIRAT KOHLI
INFO:__main__:Scraping player: PHIL SALT
INFO:__main__:✓ Saved CSV for: PHIL SALT
INFO:__main__:Scraping player: JITESH SHARMA
INFO:__main__:✓ Saved CSV for: JITESH SHARMA
INFO:__main__:Scraping player: DEVDUTT PADIKKAL
INFO:__main__:✓ Saved CSV for: DEVDUTT PADIKKAL
INFO:__main__:Scraping player: SWASTIK CHHIKARA
INFO:__main__:✓ Saved CSV for: SWASTIK CHHIKARA
INFO:__main__:Scraping player: LIAM LIVINGSTONE
INFO:__main__:✓ Saved CSV for: LIAM LIVINGSTONE
INFO:__main__:Scraping player: KRUNAL PANDYA
INFO:__main_

In [2]:
import random
import time
import csv
import os
import logging

from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from webdriver_manager.chrome import ChromeDriverManager
from selenium_stealth import stealth

# Setup logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

# Chrome options
options = Options()
options.add_argument("--headless")
options.add_argument("--disable-blink-features=AutomationControlled")
options.add_argument("--disable-gpu")
options.add_argument("--no-sandbox")

# Start driver
driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()), options=options)

# Stealth
stealth(driver,
        languages=["en-US", "en"],
        vendor="Google Inc.",
        platform="Win32",
        webgl_vendor="Intel Inc.",
        renderer="Intel Iris OpenGL Engine",
        fix_hairline=True,
        )

# Team URLs
team_urls = {
    "Chennai Super Kings": "https://www.iplt20.com/teams/chennai-super-kings/squad",
    "Delhi Capitals": "https://www.iplt20.com/teams/delhi-capitals/squad",
    "Gujarat Titans": "https://www.iplt20.com/teams/gujarat-titans/squad",
    "Kolkata Knight Riders": "https://www.iplt20.com/teams/kolkata-knight-riders/squad",
    "Lucknow Super Giants": "https://www.iplt20.com/teams/lucknow-super-giants/squad",
    "Mumbai Indians": "https://www.iplt20.com/teams/mumbai-indians/squad",
    "Punjab Kings": "https://www.iplt20.com/teams/punjab-kings/squad",
    "Rajasthan Royals": "https://www.iplt20.com/teams/rajasthan-royals/squad",
    "Royal Challengers Bengaluru": "https://www.iplt20.com/teams/royal-challengers-bengaluru/squad",
    "Sunrisers Hyderabad": "https://www.iplt20.com/teams/sunrisers-hyderabad/squad"
}

# Extract table data
def extract_table(table_element):
    try:
        headers = [th.text.strip() for th in table_element.find_elements(By.TAG_NAME, "th")]
        rows = table_element.find_elements(By.TAG_NAME, "tr")
        data = []
        for row in rows:
            cols = row.find_elements(By.TAG_NAME, "td")
            if cols:
                row_data = [col.text.strip() for col in cols]
                data.append(row_data)
        return {"headers": headers, "data": data}
    except Exception as e:
        logger.warning(f"Error extracting table: {e}")
        return {"headers": [], "data": []}

# Main loop
for team_name, team_url in team_urls.items():
    logger.info(f"Scraping team: {team_name}")
    driver.get(team_url)
    wait = WebDriverWait(driver, 15)
    wait.until(EC.presence_of_all_elements_located((By.CLASS_NAME, "dys-box-color")))
    time.sleep(2)

    players_raw = driver.find_elements(By.CLASS_NAME, "dys-box-color")
    player_links = []

    for player in players_raw:
        try:
            name = player.find_element(By.CLASS_NAME, "ih-p-name").text.strip()
            img_url = player.find_element(By.CLASS_NAME, "ih-p-img").find_element(By.TAG_NAME, "img").get_attribute("src")
            detail_url = player.find_element(By.TAG_NAME, "a").get_attribute("href")
            player_links.append({"name": name, "img_url": img_url, "detail_url": detail_url})
        except Exception as e:
            logger.warning(f"Skipping player due to error: {e}")

    logger.info(f"Found {len(player_links)} players for {team_name}")

    team_data = []

    for player in player_links:
        try:
            logger.info(f"Scraping player: {player['name']}")
            driver.get(player["detail_url"])
            time.sleep(random.uniform(2, 4))

            titles = driver.find_elements(By.CLASS_NAME, "sm-pp-table__title")
            tables = driver.find_elements(By.CLASS_NAME, "sm-pp-table")

            mat = runs = avg = wkts = ""

            for i in range(len(titles)):
                title = titles[i].text.strip().lower()
                table = extract_table(tables[i])

                for row in table["data"]:
                    if "ipl" in row[0].lower() or "t20" in row[0].lower():
                        if "bat" in title:
                            mat = row[1] if len(row) > 1 else ""
                            runs = row[4] if len(row) > 4 else ""
                            avg = row[6] if len(row) > 6 else ""
                        elif "bowl" in title:
                            wkts = row[8] if len(row) > 8 else ""
                        break

            team_data.append([
                player["name"],
                player["img_url"],
                player["detail_url"],
                mat, runs, avg, wkts
            ])

        except Exception as e:
            logger.error(f"Error scraping {player['name']}: {str(e)}")
            continue

    # Save team CSV
    folder = "players_summary"
    os.makedirs(folder, exist_ok=True)
    team_file = os.path.join(folder, f"{team_name.replace(' ', '_')}.csv")

    with open(team_file, "w", newline='', encoding="utf-8") as f:
        writer = csv.writer(f)
        writer.writerow(["Player Name", "Image URL", "Detail Page URL"])
        writer.writerows(team_data)

    logger.info(f"✅ Saved summary CSV for {team_name}")

driver.quit()
logger.info("🎉 All teams processed successfully!")


INFO:WDM:Get LATEST chromedriver version for google-chrome
INFO:WDM:Get LATEST chromedriver version for google-chrome
INFO:WDM:Driver [C:\Users\ajmal\.wdm\drivers\chromedriver\win64\135.0.7049.114\chromedriver-win32/chromedriver.exe] found in cache
INFO:__main__:Scraping team: Chennai Super Kings
INFO:__main__:Found 25 players for Chennai Super Kings
INFO:__main__:Scraping player: MS DHONI
INFO:__main__:Scraping player: DEWALD BREVIS
INFO:__main__:Scraping player: DEVON CONWAY
INFO:__main__:Scraping player: RAHUL TRIPATHI
INFO:__main__:Scraping player: SHAIK RASHEED
INFO:__main__:Scraping player: VANSH BEDI
INFO:__main__:Scraping player: ANDRE SIDDARTH
INFO:__main__:Scraping player: AYUSH MHATRE
INFO:__main__:Scraping player: RACHIN RAVINDRA
INFO:__main__:Scraping player: RAVICHANDRAN ASHWIN
INFO:__main__:Scraping player: VIJAY SHANKAR
INFO:__main__:Scraping player: SAM CURRAN
INFO:__main__:Scraping player: ANSHUL KAMBOJ
INFO:__main__:Scraping player: DEEPAK HOODA
INFO:__main__:Scrapin