## Orange Cap

In [1]:
import os
import time
import csv
import logging
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from webdriver_manager.chrome import ChromeDriverManager
from selenium_stealth import stealth

# Setup logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

# Configure Chrome
options = Options()
options.add_argument("--headless")
options.add_argument("--disable-blink-features=AutomationControlled")
options.add_argument("--disable-gpu")
options.add_argument("--no-sandbox")
driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()), options=options)

# Apply stealth mode
stealth(driver,
        languages=["en-US", "en"],
        vendor="Google Inc.",
        platform="Win32",
        webgl_vendor="Intel Inc.",
        renderer="Intel Iris OpenGL Engine",
        fix_hairline=True,
        )

# Go to IPL Stats 2025 page
url = "https://www.iplt20.com/stats/2025"
driver.get(url)

wait = WebDriverWait(driver, 15)
# Wait for the Orange Cap table to load
wait.until(EC.presence_of_element_located((By.CLASS_NAME, "np-mostrunsTable")))

try:
    table = driver.find_element(By.CLASS_NAME, "np-mostrunsTable")
    header = table.find_element(By.CLASS_NAME, "st-table__head")
    rows_wrap = table.find_element(By.CLASS_NAME, "st-table-wrap")

    headers = [th.text.strip() for th in header.find_elements(By.TAG_NAME, "th")]

    # Extract table rows
    rows = rows_wrap.find_elements(By.TAG_NAME, "tr")
    data = []
    for row in rows:
        cols = row.find_elements(By.TAG_NAME, "td")
        if cols:
            row_data = [col.text.strip() for col in cols]
            data.append(row_data)

    # Prepare file path
    folder_name = os.path.join("Stat", "batting")
    os.makedirs(folder_name, exist_ok=True)
    file_path = os.path.join(folder_name, "orange_cap.csv")

    # Update or create CSV
    existing_rows = []
    if os.path.exists(file_path):
        with open(file_path, "r", newline='', encoding="utf-8") as f:
            reader = csv.reader(f)
            existing_rows = list(reader)
        existing_data = existing_rows[1:] if existing_rows else []
        new_rows = [row for row in data if row not in existing_data]
        if new_rows:
            with open(file_path, "a", newline='', encoding="utf-8") as f:
                writer = csv.writer(f)
                writer.writerows(new_rows)
            logger.info(f"✅ Orange Cap CSV updated with {len(new_rows)} new rows.")
        else:
            logger.info("ℹ️ No new data to update in the Orange Cap CSV.")
    else:
        with open(file_path, "w", newline='', encoding="utf-8") as f:
            writer = csv.writer(f)
            writer.writerow(headers)
            writer.writerows(data)
        logger.info(f"✅ Orange Cap CSV created and data saved to {file_path}")

except Exception as e:
    logger.error(f"Error extracting Orange Cap data: {e}")

driver.quit()


INFO:WDM:Get LATEST chromedriver version for google-chrome
INFO:WDM:Get LATEST chromedriver version for google-chrome
INFO:WDM:Driver [C:\Users\ajmal\.wdm\drivers\chromedriver\win64\135.0.7049.114\chromedriver-win32/chromedriver.exe] found in cache
INFO:__main__:✅ Orange Cap CSV created and data saved to Stat\batting\orange_cap.csv


## Most Fifties

In [11]:
import os
import time
import csv
import logging
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from webdriver_manager.chrome import ChromeDriverManager
from selenium_stealth import stealth

# Setup logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

# Configure Chrome
options = Options()
options.add_argument("--headless")
options.add_argument("--disable-blink-features=AutomationControlled")
options.add_argument("--disable-gpu")
options.add_argument("--no-sandbox")
driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()), options=options)

# Apply stealth mode
stealth(driver,
        languages=["en-US", "en"],
        vendor="Google Inc.",
        platform="Win32",
        webgl_vendor="Intel Inc.",
        renderer="Intel Iris OpenGL Engine",
        fix_hairline=True,
        )

# Go to IPL Stats 2025 page
url = "https://www.iplt20.com/stats/2025"
driver.get(url)
wait = WebDriverWait(driver, 15)

try:
    # Wait for the tabs to appear
    wait.until(EC.presence_of_element_located((By.CLASS_NAME, "cSBListItems")))
    
    # Scroll to make sure the buttons load
    driver.execute_script("window.scrollTo(0, 800);")
    time.sleep(2)
    
    # Click 'Most Fifties'
    fifties_tab = wait.until(
        EC.presence_of_element_located((By.XPATH, "//div[contains(text(), 'Most Fifties') and contains(@class, 'cSBListItems')]"))
    )
    driver.execute_script("arguments[0].click();", fifties_tab)
    logger.info("✅ Clicked on 'Most Fifties' tab.")
    
    # Wait for the table to load
    wait.until(EC.presence_of_element_located((By.XPATH, "//table")))
    time.sleep(2)  # Give it a moment to fully render
    
    # Get headers from the st-table__head
    headers = []
    header_section = driver.find_element(By.CLASS_NAME, "st-table__head")
    th_elements = header_section.find_elements(By.TAG_NAME, "th")
    for th in th_elements:
        header_text = th.text.strip()
        if not header_text:
            header_text = th.get_attribute("innerText").strip()
        headers.append(header_text)
    
    # Extract row data
    table = driver.find_element(By.XPATH, "//table")
    rows_wrap = table.find_element(By.TAG_NAME, "tbody")
    rows = rows_wrap.find_elements(By.TAG_NAME, "tr")
    
    data = []
    for row in rows:
        cols = row.find_elements(By.TAG_NAME, "td")
        if cols:
            row_data = [col.text.strip() for col in cols]
            data.append(row_data)
    
    # Prepare file path
    folder_name = os.path.join("Stat", "batting")
    os.makedirs(folder_name, exist_ok=True)
    file_path = os.path.join(folder_name, "most_fifties.csv")
    
    # Update or create CSV
    existing_rows = []
    if os.path.exists(file_path):
        with open(file_path, "r", newline='', encoding="utf-8") as f:
            reader = csv.reader(f)
            existing_rows = list(reader)
        existing_data = existing_rows[1:] if existing_rows else []
        new_rows = [row for row in data if row not in existing_data]
        if new_rows:
            with open(file_path, "a", newline='', encoding="utf-8") as f:
                writer = csv.writer(f)
                writer.writerows(new_rows)
            logger.info(f"✅ Most Fifties CSV updated with {len(new_rows)} new rows.")
        else:
            logger.info("ℹ️ No new data to update in the Most Fifties CSV.")
    else:
        with open(file_path, "w", newline='', encoding="utf-8") as f:
            writer = csv.writer(f)
            writer.writerow(headers)
            writer.writerows(data)
        logger.info(f"✅ Most Fifties CSV created and data saved to {file_path}")
        
except Exception as e:
    logger.error(f"Error extracting Most Fifties data: {e}")
finally:
    driver.quit()

INFO:WDM:Get LATEST chromedriver version for google-chrome
INFO:WDM:Get LATEST chromedriver version for google-chrome
INFO:WDM:Driver [C:\Users\ajmal\.wdm\drivers\chromedriver\win64\135.0.7049.114\chromedriver-win32/chromedriver.exe] found in cache
INFO:__main__:✅ Clicked on 'Most Fifties' tab.
INFO:__main__:ℹ️ No new data to update in the Most Fifties CSV.


## Batting - Most 100's in Innings


In [31]:
#Batting - Most 100's in Innings

import csv
import os
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.chrome.options import Options
from webdriver_manager.chrome import ChromeDriverManager
import time

# Setup options
options = Options()
options.add_argument("--start-maximized")

# Setup driver
driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()), options=options)

# Create folder if not exists
folder_name = "Ipl Stat 2025"
folder_name2 = "batting"
os.makedirs(folder_name, exist_ok=True)

# Open the IPL stats page
driver.get("https://www.iplt20.com/stats/2025")

try:
    # Wait for stat tabs to load
    WebDriverWait(driver, 30).until(
        EC.presence_of_element_located((By.CLASS_NAME, "cSBListItems"))
    )

    # Scroll down slightly to make tabs visible
    driver.execute_script("window.scrollTo(0, 800);")
    time.sleep(2)

    # Click on 'Most Centuries'
    centuries_tab = WebDriverWait(driver, 20).until(
        EC.presence_of_element_located((By.XPATH, "//div[contains(text(), 'Most Centuries') and contains(@class, 'cSBListItems')]"))
    )
    driver.execute_script("arguments[0].click();", centuries_tab)
    print("✅ Clicked on 'Most Centuries' tab.")
    time.sleep(5)

    # Wait for the table to load
    WebDriverWait(driver, 20).until(
        EC.presence_of_element_located((By.XPATH, "//table"))
    )

    # ✅ Get headers from the st-table__head
    headers = []
    header_section = driver.find_element(By.CLASS_NAME, "st-table__head")
    th_elements = header_section.find_elements(By.TAG_NAME, "th")

    for th in th_elements:
        header_text = th.text.strip()
        if not header_text:
            header_text = th.get_attribute("innerText").strip()
        headers.append(header_text)

    # ✅ Extract row data
    data = []
    rows = driver.find_elements(By.XPATH, "//table/tbody/tr")
    for row in rows:
        cols = row.find_elements(By.TAG_NAME, "td")
        data.append([col.text.strip() for col in cols])

    # ✅ Save to CSV
    file_path = os.path.join(folder_name, folder_name2,"most_centuries.csv")
    with open(file_path, "w", newline="", encoding="utf-8") as f:
        writer = csv.writer(f)
        writer.writerow(headers)
        writer.writerows(data)

    print(f"✅ Data saved with headers to '{file_path}'")

except Exception as e:
    print("❌ Error scraping Most Centuries:", e)

finally:
    driver.quit()


INFO:WDM:Get LATEST chromedriver version for google-chrome
INFO:WDM:Get LATEST chromedriver version for google-chrome
INFO:WDM:Driver [C:\Users\ajmal\.wdm\drivers\chromedriver\win64\135.0.7049.95\chromedriver.exe] found in cache


✅ Clicked on 'Most Centuries' tab.
✅ Data saved with headers to 'Ipl Stat 2025\batting\most_centuries.csv'


## Best Batting Average

In [12]:
import os
import time
import csv
import logging
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from webdriver_manager.chrome import ChromeDriverManager
from selenium_stealth import stealth

# Setup logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

# Configure Chrome
options = Options()
options.add_argument("--headless")
options.add_argument("--disable-blink-features=AutomationControlled")
options.add_argument("--disable-gpu")
options.add_argument("--no-sandbox")
options.add_argument("--start-maximized")
driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()), options=options)

# Apply stealth mode
stealth(driver,
        languages=["en-US", "en"],
        vendor="Google Inc.",
        platform="Win32",
        webgl_vendor="Intel Inc.",
        renderer="Intel Iris OpenGL Engine",
        fix_hairline=True,
        )

# Go to IPL Stats 2025 page
url = "https://www.iplt20.com/stats/2025"
driver.get(url)
wait = WebDriverWait(driver, 15)

try:
    # Wait for the tabs to appear
    wait.until(EC.presence_of_element_located((By.CLASS_NAME, "cSBListItems")))
    
    # Scroll to make sure the buttons load
    driver.execute_script("window.scrollTo(0, 800);")
    time.sleep(2)
    
    # Click 'Best Batting Average'
    batting_avg_tab = wait.until(
        EC.presence_of_element_located((By.XPATH, "//div[contains(text(), 'Best Batting Average') and contains(@class, 'cSBListItems')]"))
    )
    driver.execute_script("arguments[0].click();", batting_avg_tab)
    logger.info("✅ Clicked on 'Best Batting Average' tab.")
    
    # Wait for the table to load
    wait.until(EC.presence_of_element_located((By.XPATH, "//table")))
    time.sleep(2)  # Give it a moment to fully render
    
    # Get headers from the st-table__head
    headers = []
    header_section = driver.find_element(By.CLASS_NAME, "st-table__head")
    th_elements = header_section.find_elements(By.TAG_NAME, "th")
    for th in th_elements:
        header_text = th.text.strip()
        if not header_text:
            header_text = th.get_attribute("innerText").strip()
        headers.append(header_text)
    
    # Extract row data
    table = driver.find_element(By.XPATH, "//table")
    rows_wrap = table.find_element(By.TAG_NAME, "tbody")
    rows = rows_wrap.find_elements(By.TAG_NAME, "tr")
    
    data = []
    for row in rows:
        cols = row.find_elements(By.TAG_NAME, "td")
        if cols:
            row_data = [col.text.strip() for col in cols]
            data.append(row_data)
    
    # Prepare file path
    folder_name = os.path.join("Stat", "Batting")
    os.makedirs(folder_name, exist_ok=True)
    file_path = os.path.join(folder_name, "best_batting_average.csv")
    
    # Update or create CSV
    existing_rows = []
    if os.path.exists(file_path):
        with open(file_path, "r", newline='', encoding="utf-8") as f:
            reader = csv.reader(f)
            existing_rows = list(reader)
        existing_data = existing_rows[1:] if existing_rows else []
        new_rows = [row for row in data if row not in existing_data]
        if new_rows:
            with open(file_path, "a", newline='', encoding="utf-8") as f:
                writer = csv.writer(f)
                writer.writerows(new_rows)
            logger.info(f"✅ Best Batting Average CSV updated with {len(new_rows)} new rows.")
        else:
            logger.info("ℹ️ No new data to update in the Best Batting Average CSV.")
    else:
        with open(file_path, "w", newline='', encoding="utf-8") as f:
            writer = csv.writer(f)
            writer.writerow(headers)
            writer.writerows(data)
        logger.info(f"✅ Best Batting Average CSV created and data saved to {file_path}")
        
except Exception as e:
    logger.error(f"Error extracting Best Batting Average data: {e}")
finally:
    driver.quit()

INFO:WDM:Get LATEST chromedriver version for google-chrome
INFO:WDM:Get LATEST chromedriver version for google-chrome
INFO:WDM:Driver [C:\Users\ajmal\.wdm\drivers\chromedriver\win64\135.0.7049.114\chromedriver-win32/chromedriver.exe] found in cache
INFO:__main__:✅ Clicked on 'Best Batting Average' tab.
INFO:__main__:✅ Best Batting Average CSV created and data saved to Ipl Stat 2025\batting\best_batting_average.csv


## Purple Cap

In [15]:
import os
import time
import csv
import logging
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from webdriver_manager.chrome import ChromeDriverManager
from selenium_stealth import stealth

# Setup logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

def fetch_purple_cap_data():
    options = Options()
    options.add_argument("--headless")
    options.add_argument("--disable-blink-features=AutomationControlled")
    options.add_argument("--disable-gpu")
    options.add_argument("--no-sandbox")
    options.add_argument("--start-maximized")
    driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()), options=options)

    stealth(driver,
            languages=["en-US", "en"],
            vendor="Google Inc.",
            platform="Win32",
            webgl_vendor="Intel Inc.",
            renderer="Intel Iris OpenGL Engine",
            fix_hairline=True,
            )

    url = "https://www.iplt20.com/stats/2025"
    driver.get(url)
    wait = WebDriverWait(driver, 15)

    try:
        wait.until(EC.presence_of_element_located((By.CLASS_NAME, "cSBListItems")))
        driver.execute_script("window.scrollTo(0, 800);")
        time.sleep(2)

        purple_cap_tab = wait.until(
            EC.presence_of_element_located((By.XPATH, "//div[contains(text(), 'Purple Cap') and contains(@class, 'cSBListItems')]"))
        )
        driver.execute_script("arguments[0].click();", purple_cap_tab)
        logger.info("✅ Clicked on 'Purple Cap' tab.")

        wait.until(EC.presence_of_element_located((By.XPATH, "//table")))
        time.sleep(2)

        headers = []
        header_section = driver.find_element(By.CLASS_NAME, "st-table__head")
        th_elements = header_section.find_elements(By.TAG_NAME, "th")
        for th in th_elements:
            header_text = th.text.strip()
            if not header_text:
                header_text = th.get_attribute("innerText").strip()
            headers.append(header_text)

        table = driver.find_element(By.XPATH, "//table")
        rows_wrap = table.find_element(By.TAG_NAME, "tbody")
        rows = rows_wrap.find_elements(By.TAG_NAME, "tr")

        data = []
        for row in rows:
            cols = row.find_elements(By.TAG_NAME, "td")
            if cols:
                row_data = [col.text.strip() for col in cols]
                data.append(row_data)

        folder_name = os.path.join("Stat", "Bowling")
        os.makedirs(folder_name, exist_ok=True)
        file_path = os.path.join(folder_name, "purple_cap.csv")

        existing_rows = []
        if os.path.exists(file_path):
            with open(file_path, "r", newline='', encoding="utf-8") as f:
                reader = csv.reader(f)
                existing_rows = list(reader)
            existing_data = existing_rows[1:] if existing_rows else []
            new_rows = [row for row in data if row not in existing_data]
            if new_rows:
                with open(file_path, "a", newline='', encoding="utf-8") as f:
                    writer = csv.writer(f)
                    writer.writerows(new_rows)
                logger.info(f"✅ Purple Cap CSV updated with {len(new_rows)} new rows.")
            else:
                logger.info("ℹ️ No new data to update in the Purple Cap CSV.")
        else:
            with open(file_path, "w", newline='', encoding="utf-8") as f:
                writer = csv.writer(f)
                writer.writerow(headers)
                writer.writerows(data)
            logger.info(f"✅ Purple Cap CSV created and data saved to {file_path}")

    except Exception as e:
        logger.error(f"Error extracting Purple Cap data: {e}")
    finally:
        driver.quit()

fetch_purple_cap_data()

INFO:WDM:Get LATEST chromedriver version for google-chrome
INFO:WDM:Get LATEST chromedriver version for google-chrome
INFO:WDM:Driver [C:\Users\ajmal\.wdm\drivers\chromedriver\win64\135.0.7049.114\chromedriver-win32/chromedriver.exe] found in cache
INFO:__main__:✅ Clicked on 'Purple Cap' tab.
INFO:__main__:✅ Purple Cap CSV updated with 20 new rows.


## Best Bowling Figures

In [17]:
import os
import time
import csv
import logging
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from webdriver_manager.chrome import ChromeDriverManager
from selenium_stealth import stealth

# Setup logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

def fetch_best_bowling_figures_data():
    # Configure Chrome
    options = Options()
    options.add_argument("--headless")
    options.add_argument("--disable-gpu")
    options.add_argument("--no-sandbox")
    options.add_argument("--disable-blink-features=AutomationControlled")

    # Setup driver
    driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()), options=options)

    # Apply stealth
    stealth(driver,
            languages=["en-US", "en"],
            vendor="Google Inc.",
            platform="Win32",
            webgl_vendor="Intel Inc.",
            renderer="Intel Iris OpenGL Engine",
            fix_hairline=True,
    )

    try:
        driver.get("https://www.iplt20.com/stats/2025")
        wait = WebDriverWait(driver, 20)

        # Wait and scroll to load stat tabs
        wait.until(EC.presence_of_element_located((By.CLASS_NAME, "cSBListItems")))
        driver.execute_script("window.scrollTo(0, 800);")
        time.sleep(2)

        # Click on 'Best Bowling Figures'
        tab = wait.until(EC.presence_of_element_located(
            (By.XPATH, "//div[contains(text(), 'Best Bowling Figures') and contains(@class, 'cSBListItems')]")
        ))
        driver.execute_script("arguments[0].click();", tab)
        logger.info("✅ Clicked on 'Best Bowling Figures' tab.")
        time.sleep(5)

        # Wait for the table to load
        wait.until(EC.presence_of_element_located((By.XPATH, "//table")))

        # Extract headers
        headers = []
        header_section = driver.find_element(By.CLASS_NAME, "st-table__head")
        th_elements = header_section.find_elements(By.TAG_NAME, "th")
        for th in th_elements:
            header_text = th.text.strip() or th.get_attribute("innerText").strip()
            headers.append(header_text)

        # Extract data rows
        data = []
        rows = driver.find_elements(By.XPATH, "//table/tbody/tr")
        for row in rows:
            cols = row.find_elements(By.TAG_NAME, "td")
            row_data = [col.text.strip() for col in cols]
            if any(row_data):
                data.append(row_data)

        # Save to CSV
        folder_path = os.path.join("Stat", "Bowling")
        os.makedirs(folder_path, exist_ok=True)
        file_path = os.path.join(folder_path, "best_bowling_figures.csv")

        with open(file_path, "w", newline="", encoding="utf-8") as f:
            writer = csv.writer(f)
            writer.writerow(headers)
            writer.writerows(data)

        logger.info(f"✅ Best Bowling Figures data saved to '{file_path}'.")

    except Exception as e:
        logger.error(f"❌ Error scraping Best Bowling Figures: {e}")
        raise

    finally:
        driver.quit()
        
fetch_best_bowling_figures_data()

INFO:WDM:Get LATEST chromedriver version for google-chrome
INFO:WDM:Get LATEST chromedriver version for google-chrome
INFO:WDM:Driver [C:\Users\ajmal\.wdm\drivers\chromedriver\win64\135.0.7049.114\chromedriver-win32/chromedriver.exe] found in cache
INFO:__main__:✅ Clicked on 'Best Bowling Figures' tab.
INFO:__main__:✅ Best Bowling Figures data saved to 'Ipl Stat 2025\bowling\best_bowling_figures.csv'.


## Bowling - TATA IPL Green Dot Balls

In [36]:
#Bowling - TATA IPL Green Dot Balls

import csv
import os
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.chrome.options import Options
from webdriver_manager.chrome import ChromeDriverManager
import time

# Setup Chrome options
options = Options()
options.add_argument("--start-maximized")

# Setup WebDriver
driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()), options=options)

# Create output folder
folder_name = "Stat"
folder_name2 = "Bowling"
os.makedirs(folder_name, exist_ok=True)

# Navigate to IPL stats page
driver.get("https://www.iplt20.com/stats/2025")

try:
    # Wait for tabs to load
    WebDriverWait(driver, 30).until(
        EC.presence_of_element_located((By.CLASS_NAME, "cSBListItems"))
    )

    # Scroll to load all buttons
    driver.execute_script("window.scrollTo(0, 800);")
    time.sleep(2)

    # Click on 'TATA IPL Green Dot Balls'
    tab = WebDriverWait(driver, 20).until(
        EC.presence_of_element_located((By.XPATH, "//div[contains(text(), 'TATA IPL Green Dot Balls') and contains(@class, 'cSBListItems')]"))
    )
    driver.execute_script("arguments[0].click();", tab)
    print("✅ Clicked on 'TATA IPL Green Dot Balls' tab.")
    time.sleep(5)

    # Wait for the table
    WebDriverWait(driver, 20).until(
        EC.presence_of_element_located((By.XPATH, "//table"))
    )

    # ✅ Extract table headers
    headers = []
    header_section = driver.find_element(By.CLASS_NAME, "st-table__head")
    th_elements = header_section.find_elements(By.TAG_NAME, "th")

    for th in th_elements:
        header_text = th.text.strip()
        if not header_text:
            header_text = th.get_attribute("innerText").strip()
        headers.append(header_text)

    # ✅ Extract table rows
    data = []
    rows = driver.find_elements(By.XPATH, "//table/tbody/tr")
    for row in rows:
        cols = row.find_elements(By.TAG_NAME, "td")
        row_data = [col.text.strip() for col in cols]
        if any(row_data):  # Skip empty rows
            data.append(row_data)

    # ✅ Write to CSV
    file_path = os.path.join(folder_name,folder_name2, "tata_ipl_green_dot_balls.csv")
    with open(file_path, "w", newline="", encoding="utf-8") as f:
        writer = csv.writer(f)
        writer.writerow(headers)
        writer.writerows(data)

    print(f"✅ Data saved to '{file_path}'")

except Exception as e:
    print("❌ Error scraping TATA IPL Green Dot Balls:", e)

finally:
    driver.quit()


INFO:WDM:Get LATEST chromedriver version for google-chrome
INFO:WDM:Get LATEST chromedriver version for google-chrome
INFO:WDM:Driver [C:\Users\ajmal\.wdm\drivers\chromedriver\win64\135.0.7049.95\chromedriver.exe] found in cache


✅ Clicked on 'TATA IPL Green Dot Balls' tab.
✅ Data saved to 'Ipl Stat 2025\bowling\tata_ipl_green_dot_balls.csv'


In [18]:
import os
import csv
import logging
import time
from datetime import datetime
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from webdriver_manager.chrome import ChromeDriverManager
from selenium_stealth import stealth

# Setup logging with more information
logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
    handlers=[
        logging.FileHandler("ipl_scraper.log"),
        logging.StreamHandler()
    ]
)
logger = logging.getLogger(__name__)

def fetch_orange_cap_data():
    start_time = time.time()
    logger.info("Starting Orange Cap data scraping")
    
    # Configure Chrome with additional options for stability
    options = Options()
    options.add_argument("--headless")
    options.add_argument("--disable-blink-features=AutomationControlled")
    options.add_argument("--disable-gpu")
    options.add_argument("--no-sandbox")
    options.add_argument("--disable-dev-shm-usage")  # Overcome limited resource problems
    options.add_argument("--window-size=1920,1080")  # Consistent window size
    
    try:
        driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()), options=options)
        
        # Apply stealth mode to avoid detection
        stealth(driver,
                languages=["en-US", "en"],
                vendor="Google Inc.",
                platform="Win32",
                webgl_vendor="Intel Inc.",
                renderer="Intel Iris OpenGL Engine",
                fix_hairline=True,
                )
        
        url = "https://www.iplt20.com/stats/2025"
        logger.info(f"Navigating to {url}")
        
        # Add retry mechanism
        max_retries = 3
        for attempt in range(max_retries):
            try:
                driver.get(url)
                # Wait for the specific table to load
                wait = WebDriverWait(driver, 20)  # Increased timeout
                wait.until(EC.presence_of_element_located((By.CLASS_NAME, "np-mostrunsTable")))
                break
            except Exception as e:
                if attempt < max_retries - 1:
                    logger.warning(f"Attempt {attempt+1} failed: {e}. Retrying...")
                    time.sleep(2)  # Wait before retrying
                else:
                    raise
        
        # Extract the table data
        table = driver.find_element(By.CLASS_NAME, "np-mostrunsTable")
        header = table.find_element(By.CLASS_NAME, "st-table__head")
        rows_wrap = table.find_element(By.CLASS_NAME, "st-table-wrap")

        # Get column headers
        headers = [th.text.strip() for th in header.find_elements(By.TAG_NAME, "th")]
        logger.info(f"Headers found: {headers}")

        # Get all rows data
        rows = rows_wrap.find_elements(By.TAG_NAME, "tr")
        data = []
        for row in rows:
            cols = row.find_elements(By.TAG_NAME, "td")
            if cols:
                row_data = [col.text.strip() for col in cols]
                data.append(row_data)
        
        logger.info(f"Scraped {len(data)} rows of data")
        
        # Create directory structure
        folder_name = os.path.join("Stat", "Batting")
        os.makedirs(folder_name, exist_ok=True)
        file_path = os.path.join(folder_name, "orange_cap.csv")
        
        # Add timestamp for backup
        timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
        
        # Always create a fresh CSV with the latest data
        if os.path.exists(file_path):
            # Create backup of existing file
            backup_path = os.path.join(folder_name, f"orange_cap_backup_{timestamp}.csv")
            os.rename(file_path, backup_path)
            logger.info(f"Backup created at {backup_path}")
            
            # For logging purposes only, check what's new
            with open(backup_path, "r", newline='', encoding="utf-8") as f:
                reader = csv.reader(f)
                existing_rows = list(reader)
            
            existing_headers = existing_rows[0] if existing_rows else []
            existing_data = existing_rows[1:] if len(existing_rows) > 1 else []
            
            # Check if headers match
            if existing_headers != headers:
                logger.warning(f"Headers mismatch. Old: {existing_headers}, New: {headers}")
            
            # Identify new rows for logging purposes only
            existing_rows_set = {tuple(row) for row in existing_data}
            new_rows = [row for row in data if tuple(row) not in existing_rows_set]
            
            # Write to new file
            with open(file_path, "w", newline='', encoding="utf-8") as f:
                writer = csv.writer(f)
                writer.writerow(headers)
                
                # Merge existing and new data
                # Option 1: Preserve all data
                all_data = existing_data + new_rows
                
                player_dict = {}
                for row in all_data:
                    if len(row) > 1:  # Ensure row has enough columns
                        player_dict[row[1]] = row  # Assuming column 1 is player name
                
                # Sort by position (assuming column 0 is position)
                sorted_data = sorted(player_dict.values(), key=lambda x: int(x[0]) if x[0].isdigit() else float('inf'))
                
                writer.writerows(sorted_data)
            
            # Always update CSV regardless of new rows
            logger.info(f"✅ Orange Cap CSV updated with {len(data)} total rows")
            if new_rows:
                for row in new_rows[:3]:  # Log sample of new rows (limit to 3)
                    logger.info(f"New row: {row}")
        else:
            # Create new file
            with open(file_path, "w", newline='', encoding="utf-8") as f:
                writer = csv.writer(f)
                writer.writerow(headers)
                writer.writerows(data)
            logger.info(f"✅ Orange Cap CSV created with {len(data)} rows and saved to {file_path}")
    
    except Exception as e:
        logger.error(f"Error extracting Orange Cap data: {e}", exc_info=True)
    finally:
        try:
            driver.quit()
        except:
            pass
        
        elapsed_time = time.time() - start_time
        logger.info(f"Script execution completed in {elapsed_time:.2f} seconds")

if __name__ == '__main__':
    fetch_orange_cap_data()

INFO:__main__:Starting Orange Cap data scraping
INFO:WDM:Get LATEST chromedriver version for google-chrome
INFO:WDM:Get LATEST chromedriver version for google-chrome
INFO:WDM:Driver [C:\Users\ajmal\.wdm\drivers\chromedriver\win64\135.0.7049.114\chromedriver-win32/chromedriver.exe] found in cache
INFO:__main__:Navigating to https://www.iplt20.com/stats/2025
INFO:__main__:Headers found: ['POS', 'PLAYER', 'RUNS', 'MAT', 'INNS', 'NO', 'HS', 'AVG', 'BF', 'SR', '100', '50', '4S', '6S']
INFO:__main__:Scraped 20 rows of data
INFO:__main__:✅ Orange Cap CSV created with 20 rows and saved to Ipl_Stat_2025\batting\orange_cap.csv
INFO:__main__:Script execution completed in 19.28 seconds
