In [1]:
#Goals final
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import pandas as pd
import time
from datetime import datetime
import gspread
from oauth2client.service_account import ServiceAccountCredentials

# Path to the ChromeDriver
chrome_driver_path = r"C:\Users\Mohammed\chromedriver-win64\chromedriver.exe"

# Set up headless ChromeDriver
options = webdriver.ChromeOptions()
options.add_argument('--headless')  # Run in headless mode (without GUI)
options.add_argument('--no-sandbox')
options.add_argument('--disable-dev-shm-usage')
service = Service(chrome_driver_path)
driver = webdriver.Chrome(service=service, options=options)

# URL of the website
url = 'https://www.indiansuperleague.com/stats/538-138-goals-player-statistics'
driver.get(url)

# Initialize WebDriverWait
wait = WebDriverWait(driver, 30)

# Selectors for the first player (unique)
first_player_name_selector = 'div.player-card > div.player-info > a'
first_player_club_selector = 'div.player-card > div.player-info > div.club-wrap > a > div.name.full-name'
first_player_games_played_selector = 'div.player-card > div.player-info > div.player-meta > div:nth-child(1) > p.text.value'
first_player_goals_selector = 'div.player-card > div.player-info > div.player-meta > div:nth-child(2)'

# Selectors for other players (generalized selectors for all players)
player_name_selector = '#\\37 3c8f3c9-6505-4f1d-8111-9a332868005a > div > div > div.waf-body > div.stats-table.table > div.table-left > div.table-body > div > div.table-data.table-data-player > a'
club_name_selector = '#\\37 3c8f3c9-6505-4f1d-8111-9a332868005a > div > div > div.waf-body > div.stats-table.table > div.table-right > div.table-body > div > div.table-data.table-data-club > div > a > div.name.full-name'
games_played_selector = '#\\37 3c8f3c9-6505-4f1d-8111-9a332868005a > div > div > div.waf-body > div.stats-table.table > div.table-right > div.table-body > div > div.table-data.table-data-games-played > p'
goals_selector = '#\\37 3c8f3c9-6505-4f1d-8111-9a332868005a > div > div > div.waf-body > div.stats-table.table > div.table-right > div.table-body > div > div.table-data.table-data-goals > p'

# "More" button selector to load additional data if applicable
more_button_selector = 'button.loadmore'

def load_all_data():
    """Function to click on the 'More' button to load additional data"""
    while True:
        try:
            # Wait for the "More" button to appear
            more_button = WebDriverWait(driver, 10).until(
                EC.presence_of_element_located((By.CSS_SELECTOR, more_button_selector))
            )
            driver.execute_script("arguments[0].scrollIntoView(true);", more_button)
            time.sleep(2)  # Short pause for smooth scrolling
            driver.execute_script("arguments[0].click();", more_button)
            time.sleep(5)  # Wait for new data to load
        except Exception as e:
            print(f"Error or no more 'More' button to click: {e}")
            break  # Exit the loop if no more 'More' button is found

def extract_first_player_data():
    """Extract data for the first player using unique selectors"""
    data = []
    try:
        first_player_name = driver.find_element(By.CSS_SELECTOR, first_player_name_selector).text
        first_player_club = driver.find_element(By.CSS_SELECTOR, first_player_club_selector).text
        first_player_games_played = driver.find_element(By.CSS_SELECTOR, first_player_games_played_selector).text
        first_player_goals = driver.find_element(By.CSS_SELECTOR, first_player_goals_selector).text
        data.append([first_player_name, first_player_club, first_player_games_played, first_player_goals])
    except Exception as e:
        print(f"Error extracting first player data: {e}")
    return data

def extract_all_player_data():
    """Extract data for all players, including the first player"""
    data = []
    try:
        # Extract data for the first player
        data.extend(extract_first_player_data())
        # Extract data for the remaining players using the generalized selectors
        player_elements = driver.find_elements(By.CSS_SELECTOR, player_name_selector)
        club_elements = driver.find_elements(By.CSS_SELECTOR, club_name_selector)
        games_played_elements = driver.find_elements(By.CSS_SELECTOR, games_played_selector)
        goals_elements = driver.find_elements(By.CSS_SELECTOR, goals_selector)
        player_names = [elem.text for elem in player_elements]
        club_names = [elem.text for elem in club_elements]
        games_played = [elem.text for elem in games_played_elements]
        goals = [elem.text for elem in goals_elements]
        # Ensure all lists have the same length
        max_length = max(len(player_names), len(club_names), len(games_played), len(goals))
        # Extend lists to equal length
        player_names.extend(['N/A'] * (max_length - len(player_names)))
        club_names.extend(['N/A'] * (max_length - len(club_names)))
        games_played.extend(['N/A'] * (max_length - len(games_played)))
        goals.extend(['N/A'] * (max_length - len(goals)))
        # Combine data for all players
        for i in range(max_length):
            data.append([player_names[i], club_names[i], games_played[i], goals[i]])
    except Exception as e:
        print(f"Error while extracting all player data: {e}")
    return data

def save_to_google_sheets(data, spreadsheet_id, sheet_name='Goals'):
    """Append extracted data to Google Sheets"""
    try:
        # Setup Google Sheets API client
        scope = ["https://www.googleapis.com/auth/spreadsheets", "https://www.googleapis.com/auth/drive.file"]
        creds = ServiceAccountCredentials.from_json_keyfile_name(r"C:\Users\Mohammed\Downloads\cricket-24-25-775817c1ed58.json", scope)
        client = gspread.authorize(creds)
        
        # Open the Google Sheet by ID and select the sheet
        sheet = client.open_by_key(spreadsheet_id).worksheet(sheet_name)

        # Get current date in format dd/mm/yyyy
        current_date = datetime.now().strftime("%m/%d/%Y")

        # Add date as the first column in each row
        data_with_date = [[current_date] + row for row in data]

        # Append the data
        sheet.append_rows(data_with_date, value_input_option="RAW")
        print(f"Data successfully appended to Google Sheets!")
    except Exception as e:
        print(f"Error occurred while appending to Google Sheets: {e}")

# Load all data by clicking the "More" button if present
load_all_data()

# Extract data from the page
extracted_data = extract_all_player_data()

# Save the extracted data to Google Sheets
if extracted_data:
    save_to_google_sheets(extracted_data, spreadsheet_id="1c_kIcLpdLTY3BKz7y9Cm6_2z0bXwOlY2NqB_VgFKqnc")

# Close the WebDriver
driver.quit()


Error or no more 'More' button to click: Message: 
Stacktrace:
	GetHandleVerifier [0x00007FF6628C3AB5+28005]
	(No symbol) [0x00007FF6628283B0]
	(No symbol) [0x00007FF6626C580A]
	(No symbol) [0x00007FF662715A3E]
	(No symbol) [0x00007FF662715D2C]
	(No symbol) [0x00007FF66275EA97]
	(No symbol) [0x00007FF66273BA7F]
	(No symbol) [0x00007FF66275B8B3]
	(No symbol) [0x00007FF66273B7E3]
	(No symbol) [0x00007FF6627075C8]
	(No symbol) [0x00007FF662708731]
	GetHandleVerifier [0x00007FF662BB643D+3118829]
	GetHandleVerifier [0x00007FF662C06C90+3448640]
	GetHandleVerifier [0x00007FF662BFCF0D+3408317]
	GetHandleVerifier [0x00007FF66298A40B+841403]
	(No symbol) [0x00007FF66283340F]
	(No symbol) [0x00007FF66282F484]
	(No symbol) [0x00007FF66282F61D]
	(No symbol) [0x00007FF66281EB79]
	BaseThreadInitThunk [0x00007FF8573E259D+29]
	RtlUserThreadStart [0x00007FF85972AF38+40]

Data successfully appended to Google Sheets!


In [2]:
#Assits
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import pandas as pd
import time
import datetime
import gspread
from oauth2client.service_account import ServiceAccountCredentials
from selenium.webdriver.chrome.options import Options

# Path to the ChromeDriver
chrome_driver_path = r"C:\Users\Mohammed\chromedriver-win64\chromedriver.exe"
service = Service(chrome_driver_path)

# Setting up headless mode for Chrome
chrome_options = Options()
chrome_options.add_argument("--headless")
chrome_options.add_argument("--disable-gpu")
driver = webdriver.Chrome(service=service, options=chrome_options)

# URL of the new website
url = 'https://www.indiansuperleague.com/stats/538-153-assists-player-statistics'
driver.get(url)

# Initialize WebDriverWait
wait = WebDriverWait(driver, 30)

# CSS Selectors for player names, club names, games played, and assists
player_name_selector = '#\\37 3c8f3c9-6505-4f1d-8111-9a332868005a > div > div > div.waf-body > div.stats-table.table > div.table-left > div.table-body > div > div.table-data.table-data-player > a'
club_name_selector = '#\\37 3c8f3c9-6505-4f1d-8111-9a332868005a > div > div > div.waf-body > div.stats-table.table > div.table-right > div.table-body > div > div.table-data.table-data-club > div > a'
games_played_selector = '#\\37 3c8f3c9-6505-4f1d-8111-9a332868005a > div > div > div.waf-body > div.stats-table.table > div.table-right > div.table-body > div > div.table-data.table-data-games-played > p'
assists_selector = '#\\37 3c8f3c9-6505-4f1d-8111-9a332868005a > div > div > div.waf-body > div.stats-table.table > div.table-right > div.table-body > div > div.table-data.table-data-assists > p'

# Generalized CSS Selectors for the first player (to handle random numbers in the class)
first_player_name_selector = '#\\37 3c8f3c9-6505-4f1d-8111-9a332868005a > div > div > div.waf-body > div.player-card > div.player-info > a'
first_player_club_selector = '#\\37 3c8f3c9-6505-4f1d-8111-9a332868005a > div > div > div.waf-body > div.player-card > div.player-info > div.club-wrap > a > div.name.full-name'
first_player_games_played_selector = '#\\37 3c8f3c9-6505-4f1d-8111-9a332868005a > div > div > div.waf-body > div.player-card > div.player-info > div.player-meta > div:nth-child(1)'
first_player_assists_selector = '#\\37 3c8f3c9-6505-4f1d-8111-9a332868005a > div > div > div.waf-body > div.player-card > div.player-info > div.player-meta > div:nth-child(2)'

more_button_selector = 'button.loadmore'  # Adjust selector for "More" button if present

def load_all_data():
    while True:
        try:
            more_button = driver.find_element(By.CSS_SELECTOR, more_button_selector)
            driver.execute_script("arguments[0].scrollIntoView(true);", more_button)
            time.sleep(2)  # Short pause for smooth scrolling

            driver.execute_script("arguments[0].click();", more_button)
            time.sleep(5)  # Wait for new data to load
        except Exception as e:
            print(f"Error or no more 'More' button to click: {e}")
            break

def extract_data():
    data = []
    try:
        # Extract data for the first player using generalized selectors
        first_player_name = driver.find_element(By.CSS_SELECTOR, first_player_name_selector).text
        first_player_club = driver.find_element(By.CSS_SELECTOR, first_player_club_selector).text
        first_player_games_played = driver.find_element(By.CSS_SELECTOR, first_player_games_played_selector).text
        first_player_assists = driver.find_element(By.CSS_SELECTOR, first_player_assists_selector).text

        data.append([first_player_name, first_player_club, first_player_games_played, first_player_assists])

        # Extract data for all players using general selectors
        player_elements = driver.find_elements(By.CSS_SELECTOR, player_name_selector)
        club_elements = driver.find_elements(By.CSS_SELECTOR, club_name_selector)
        games_played_elements = driver.find_elements(By.CSS_SELECTOR, games_played_selector)
        assists_elements = driver.find_elements(By.CSS_SELECTOR, assists_selector)

        player_names = [elem.text for elem in player_elements]
        club_names = [elem.text for elem in club_elements]
        games_played = [elem.text for elem in games_played_elements]
        assists = [elem.text for elem in assists_elements]

        # Ensure all lists have the same length
        max_length = max(len(player_names), len(club_names), len(games_played), len(assists))
        player_names.extend(['N/A'] * (max_length - len(player_names)))
        club_names.extend(['N/A'] * (max_length - len(club_names)))
        games_played.extend(['N/A'] * (max_length - len(games_played)))
        assists.extend(['N/A'] * (max_length - len(assists)))

        # Extract data for remaining players
        for i in range(max_length):
            data.append([player_names[i], club_names[i], games_played[i], assists[i]])

        return data
    except Exception as e:
        print(f"Error while extracting data: {e}")
        return None

def save_to_google_sheet(data, sheet_id, sheet_name, json_path):
    try:
        # Setup Google Sheets API credentials
        scope = ['https://spreadsheets.google.com/feeds', 'https://www.googleapis.com/auth/drive']
        creds = ServiceAccountCredentials.from_json_keyfile_name(json_path, scope)
        client = gspread.authorize(creds)

        # Open the Google Sheet by ID
        sheet = client.open_by_key(sheet_id).worksheet(sheet_name)

        # Get the current date
        current_date = datetime.datetime.now().strftime("%m/%d/%Y")

        # Prepare data with current date in the first column
        data_with_date = [[current_date] + row for row in data]

        # Append the data to the sheet
        sheet.append_rows(data_with_date)

        print("Data successfully appended to Google Sheet")
    except Exception as e:
        print(f"Error occurred while saving to Google Sheets: {e}")

# Load all data by clicking "More" button if present
load_all_data()

# Extract data from the page
extracted_data = extract_data()

# Save the data to Google Sheets if available
if extracted_data:
    save_to_google_sheet(extracted_data, '1c_kIcLpdLTY3BKz7y9Cm6_2z0bXwOlY2NqB_VgFKqnc', 'Assists', r"C:\Users\Mohammed\Downloads\cricket-24-25-775817c1ed58.json")

# Close the WebDriver
driver.quit()


Error or no more 'More' button to click: Message: no such element: Unable to locate element: {"method":"css selector","selector":"button.loadmore"}
  (Session info: chrome=131.0.6778.205); For documentation on this error, please visit: https://www.selenium.dev/documentation/webdriver/troubleshooting/errors#no-such-element-exception
Stacktrace:
	GetHandleVerifier [0x00007FF6628C3AB5+28005]
	(No symbol) [0x00007FF6628283B0]
	(No symbol) [0x00007FF6626C580A]
	(No symbol) [0x00007FF662715A3E]
	(No symbol) [0x00007FF662715D2C]
	(No symbol) [0x00007FF66275EA97]
	(No symbol) [0x00007FF66273BA7F]
	(No symbol) [0x00007FF66275B8B3]
	(No symbol) [0x00007FF66273B7E3]
	(No symbol) [0x00007FF6627075C8]
	(No symbol) [0x00007FF662708731]
	GetHandleVerifier [0x00007FF662BB643D+3118829]
	GetHandleVerifier [0x00007FF662C06C90+3448640]
	GetHandleVerifier [0x00007FF662BFCF0D+3408317]
	GetHandleVerifier [0x00007FF66298A40B+841403]
	(No symbol) [0x00007FF66283340F]
	(No symbol) [0x00007FF66282F484]
	(No symb

In [3]:
#Chances created
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import pandas as pd
import time
from datetime import datetime
from selenium.webdriver.chrome.options import Options
import gspread
from oauth2client.service_account import ServiceAccountCredentials

# Path to the ChromeDriver
chrome_driver_path = r"C:\Users\Mohammed\chromedriver-win64\chromedriver.exe"

# Set up Chrome options to run in headless mode
chrome_options = Options()
chrome_options.add_argument("--headless")
chrome_options.add_argument("--disable-gpu")
chrome_options.add_argument("--no-sandbox")

service = Service(chrome_driver_path)
driver = webdriver.Chrome(service=service, options=chrome_options)

# URL of the new website
url = 'https://www.indiansuperleague.com/stats/538-199-chances-created-player-statistics'
driver.get(url)

# Initialize WebDriverWait
wait = WebDriverWait(driver, 30)

# CSS Selectors for player names, club names, games played, and chances created
player_name_selector = '#\\37 3c8f3c9-6505-4f1d-8111-9a332868005a > div > div > div.waf-body > div.stats-table.table > div.table-left > div.table-body > div > div.table-data.table-data-player > a'
club_name_selector = '#\\37 3c8f3c9-6505-4f1d-8111-9a332868005a > div > div > div.waf-body > div.stats-table.table > div.table-right > div.table-body > div > div.table-data.table-data-club > div > a'
games_played_selector = '#\\37 3c8f3c9-6505-4f1d-8111-9a332868005a > div > div > div.waf-body > div.stats-table.table > div.table-right > div.table-body > div > div.table-data.table-data-games-played > p'
chances_created_selector = '#\\37 3c8f3c9-6505-4f1d-8111-9a332868005a > div > div > div.waf-body > div.stats-table.table > div.table-right > div.table-body > div > div.table-data.table-data-chances-created > p'

# Unique selectors for the first player
first_player_name_selector = '#\\37 3c8f3c9-6505-4f1d-8111-9a332868005a > div > div > div.waf-body > div.player-card.club-505 > div.player-info > a'
first_player_club_selector = '#\\37 3c8f3c9-6505-4f1d-8111-9a332868005a > div > div > div.waf-body > div.player-card.club-505 > div.player-info > div.club-wrap > a'
first_player_games_played_selector = '#\\37 3c8f3c9-6505-4f1d-8111-9a332868005a > div > div > div.waf-body > div.player-card.club-505 > div.player-info > div.player-meta > div:nth-child(1)'
first_player_chances_created_selector = '#\\37 3c8f3c9-6505-4f1d-8111-9a332868005a > div > div > div.waf-body > div.player-card.club-505 > div.player-info > div.player-meta > div:nth-child(2)'

# Full XPath for the "More" button
more_button_xpath = '/html/body/div[1]/div/div/div/main/section[2]/section[5]/div/div/div/div/section/div/div/div[3]/button'

# Authenticate Google Sheets API
scope = ["https://spreadsheets.google.com/feeds", "https://www.googleapis.com/auth/drive"]
creds = ServiceAccountCredentials.from_json_keyfile_name(r"C:\Users\Mohammed\Downloads\cricket-24-25-775817c1ed58.json", scope)
client = gspread.authorize(creds)

# Access the Google Sheet
spreadsheet = client.open("ISL 24-25")
sheet = spreadsheet.worksheet("Chances Created")

def load_all_data():
    while True:
        try:
            # Locate the "More" button using XPath
            more_button = driver.find_element(By.XPATH, more_button_xpath)
            driver.execute_script("arguments[0].scrollIntoView(true);", more_button)
            time.sleep(2)  # Short pause for smooth scrolling

            # Click the "More" button using JavaScript
            driver.execute_script("arguments[0].click();", more_button)
            time.sleep(5)  # Wait for new data to load

            # Check if the "More" button is still present
            if not driver.find_elements(By.XPATH, more_button_xpath):
                break
        except Exception as e:
            print(f"Error or no more 'More' button to click: {e}")
            break

def extract_data():
    data = []
    try:
        # Extract data for the first player using unique selectors
        first_player_name = driver.find_element(By.CSS_SELECTOR, first_player_name_selector).text
        first_player_club = driver.find_element(By.CSS_SELECTOR, first_player_club_selector).text
        first_player_games_played = driver.find_element(By.CSS_SELECTOR, first_player_games_played_selector).text
        first_player_chances_created = driver.find_element(By.CSS_SELECTOR, first_player_chances_created_selector).text

        data.append([first_player_name, first_player_club, first_player_games_played, first_player_chances_created])

        # Extract data for all players using general selectors
        player_elements = driver.find_elements(By.CSS_SELECTOR, player_name_selector)
        club_elements = driver.find_elements(By.CSS_SELECTOR, club_name_selector)
        games_played_elements = driver.find_elements(By.CSS_SELECTOR, games_played_selector)
        chances_created_elements = driver.find_elements(By.CSS_SELECTOR, chances_created_selector)

        player_names = [elem.text for elem in player_elements]
        club_names = [elem.text for elem in club_elements]
        games_played = [elem.text for elem in games_played_elements]
        chances_created = [elem.text for elem in chances_created_elements]

        # Ensure all lists have the same length
        max_length = max(len(player_names), len(club_names), len(games_played), len(chances_created))

        player_names.extend(['N/A'] * (max_length - len(player_names)))
        club_names.extend(['N/A'] * (max_length - len(club_names)))
        games_played.extend(['N/A'] * (max_length - len(games_played)))
        chances_created.extend(['N/A'] * (max_length - len(chances_created)))

        # Extract data for remaining players
        for i in range(max_length):
            data.append([player_names[i], club_names[i], games_played[i], chances_created[i]])

        return data
    except Exception as e:
        print(f"Error while extracting data: {e}")
        return None

def save_to_google_sheet(data):
    try:
        # Get today's date for the first column
        current_date = datetime.now().strftime("%m/%d/%Y")
        
        # Append the date to each row
        data_with_date = [[current_date] + row for row in data]
        
        # Append data to the Google Sheet
        sheet.append_rows(data_with_date)
        print("Data successfully appended to Google Sheets")
    except Exception as e:
        print(f"Error occurred while saving to Google Sheets: {e}")

# Load all data by clicking "More" button until it disappears
load_all_data()

# Extract data from the page
extracted_data = extract_data()

# Save the data to Google Sheets
if extracted_data:
    save_to_google_sheet(extracted_data)

# Close the WebDriver
driver.quit()


Data successfully appended to Google Sheets


In [4]:
#Clean sheets
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import time
import re
import pandas as pd
import datetime
import gspread
from oauth2client.service_account import ServiceAccountCredentials

# Path to the ChromeDriver and JSON credentials for Google Sheets API
chrome_driver_path = r"C:\Users\Mohammed\chromedriver-win64\chromedriver.exe"
json_keyfile_path = r"C:\Users\Mohammed\Downloads\cricket-24-25-775817c1ed58.json"

# Set up Chrome options for headless mode
chrome_options = Options()
chrome_options.add_argument("--headless")
chrome_options.add_argument("--disable-gpu")

# Set up WebDriver and Google Sheets API client
service = Service(chrome_driver_path)
driver = webdriver.Chrome(service=service, options=chrome_options)

# URL of the page to scrape
url = 'https://www.indiansuperleague.com/stats/538-141-clean-sheets-player-statistics'
driver.get(url)

# Initialize WebDriverWait
wait = WebDriverWait(driver, 30)

# CSS Selectors for player names, club names, games played, and clean sheets
player_name_selector = '#\\37 3c8f3c9-6505-4f1d-8111-9a332868005a > div > div > div.waf-body > div.stats-table.table > div.table-left > div.table-body > div > div.table-data.table-data-player > a'
club_name_selector = '#\\37 3c8f3c9-6505-4f1d-8111-9a332868005a > div > div > div.waf-body > div.stats-table.table > div.table-right > div.table-body > div > div.table-data.table-data-club > div > a'
games_played_selector = '#\\37 3c8f3c9-6505-4f1d-8111-9a332868005a > div > div > div.waf-body > div.stats-table.table > div.table-right > div.table-body > div > div.table-data.table-data-games-played > p'
clean_sheets_selector = '#\\37 3c8f3c9-6505-4f1d-8111-9a332868005a > div > div > div.waf-body > div.stats-table.table > div.table-right > div.table-body > div > div.table-data.table-data-cleansheet > p'

# Unique selectors for the first player, with dynamic matching
first_player_base_selector = '#\\37 3c8f3c9-6505-4f1d-8111-9a332868005a > div > div > div.waf-body > div.player-card.club-'

# Full XPath for the "More" button
more_button_xpath = '/html/body/div[1]/div/div/div/main/section[2]/section[5]/div/div/div/div/section/div/div/div[3]/button'

def load_all_data():
    while True:
        try:
            # Locate the "More" button using XPath
            more_button = WebDriverWait(driver, 10).until(
                EC.element_to_be_clickable((By.XPATH, more_button_xpath))
            )
            driver.execute_script("arguments[0].scrollIntoView(true);", more_button)
            time.sleep(2)  # Short pause for smooth scrolling

            # Click the "More" button using JavaScript
            driver.execute_script("arguments[0].click();", more_button)
            time.sleep(5)  # Wait for new data to load

            # Check if the "More" button is still present
            if not driver.find_elements(By.XPATH, more_button_xpath):
                break

        except Exception as e:
            print(f"Error or no more 'More' button to click: {e}")
            break

def extract_data():
    data = []
    try:
        # Extract data for the first player using dynamic selectors
        first_player_card = driver.find_elements(By.CSS_SELECTOR, 'div.player-card')
        if first_player_card:
            first_player_element = first_player_card[0]

            # Dynamic extraction using regular expressions for the unique player base selector
            player_id = re.search(r"club-(\d+)", first_player_element.get_attribute('class'))
            if player_id:
                player_id = player_id.group(1)
                first_player_name_selector = f'#\\37 3c8f3c9-6505-4f1d-8111-9a332868005a > div > div > div.waf-body > div.player-card.club-{player_id} > div.player-info > a'
                first_player_club_selector = f'#\\37 3c8f3c9-6505-4f1d-8111-9a332868005a > div > div > div.waf-body > div.player-card.club-{player_id} > div.player-info > div.club-wrap > a'
                first_player_games_played_selector = f'#\\37 3c8f3c9-6505-4f1d-8111-9a332868005a > div > div > div.waf-body > div.player-card.club-{player_id} > div.player-info > div.player-meta > div:nth-child(1) > p.text.value'
                first_player_clean_sheets_selector = f'#\\37 3c8f3c9-6505-4f1d-8111-9a332868005a > div > div > div.waf-body > div.player-card.club-{player_id} > div.player-info > div.player-meta > div:nth-child(2) > p.text.value'

                first_player_name = driver.find_element(By.CSS_SELECTOR, first_player_name_selector).text
                first_player_club = driver.find_element(By.CSS_SELECTOR, first_player_club_selector).text
                first_player_games_played = driver.find_element(By.CSS_SELECTOR, first_player_games_played_selector).text
                first_player_clean_sheets = driver.find_element(By.CSS_SELECTOR, first_player_clean_sheets_selector).text

                # Add the data for the first player
                data.append([first_player_name, first_player_club, first_player_games_played, first_player_clean_sheets])

        # Extract data for all players using general selectors
        player_elements = driver.find_elements(By.CSS_SELECTOR, player_name_selector)
        club_elements = driver.find_elements(By.CSS_SELECTOR, club_name_selector)
        games_played_elements = driver.find_elements(By.CSS_SELECTOR, games_played_selector)
        clean_sheets_elements = driver.find_elements(By.CSS_SELECTOR, clean_sheets_selector)

        player_names = [elem.text for elem in player_elements]
        club_names = [elem.text for elem in club_elements]
        games_played = [elem.text for elem in games_played_elements]
        clean_sheets = [elem.text for elem in clean_sheets_elements]

        # Ensure all lists have the same length
        max_length = max(len(player_names), len(club_names), len(games_played), len(clean_sheets))

        player_names.extend(['N/A'] * (max_length - len(player_names)))
        club_names.extend(['N/A'] * (max_length - len(club_names)))
        games_played.extend(['N/A'] * (max_length - len(games_played)))
        clean_sheets.extend(['N/A'] * (max_length - len(clean_sheets)))

        # Extract data for all players
        for i in range(max_length):
            data.append([player_names[i], club_names[i], games_played[i], clean_sheets[i]])

        return data

    except Exception as e:
        print(f"Error while extracting data: {e}")
        return None

def append_to_google_sheets(data, spreadsheet_id='1c_kIcLpdLTY3BKz7y9Cm6_2z0bXwOlY2NqB_VgFKqnc', sheet_name='Clean Sheets'):
    try:
        # Authenticate with Google Sheets API
        scope = ['https://www.googleapis.com/auth/spreadsheets', 'https://www.googleapis.com/auth/drive']
        creds = ServiceAccountCredentials.from_json_keyfile_name(json_keyfile_path, scope)
        client = gspread.authorize(creds)

        # Open the Google Sheet by ID
        sheet = client.open_by_key(spreadsheet_id).worksheet(sheet_name)

        # Get the current data to determine the last row
        current_data = sheet.get_all_values()
        last_row = len(current_data) + 1

        # Add the current date to the data
        today_date = datetime.datetime.now().strftime('%m/%d/%Y')
        for row in data:
            row.insert(0, today_date)  # Insert the date at the start of each row

        # Append data to the Google Sheet
        sheet.insert_rows(data, last_row)

        print(f"Data successfully appended to Google Sheet {spreadsheet_id} in tab '{sheet_name}'.")

    except Exception as e:
        print(f"Error occurred while appending data to Google Sheets: {e}")

# Load all data by clicking "More" button until it disappears
load_all_data()

# Extract data from the page
extracted_data = extract_data()

# Append the data to Google Sheets
if extracted_data:
    append_to_google_sheets(extracted_data)

# Close the WebDriver
driver.quit()


Error or no more 'More' button to click: Message: 
Stacktrace:
	GetHandleVerifier [0x00007FF6628C3AB5+28005]
	(No symbol) [0x00007FF6628283B0]
	(No symbol) [0x00007FF6626C580A]
	(No symbol) [0x00007FF662715A3E]
	(No symbol) [0x00007FF662715D2C]
	(No symbol) [0x00007FF66275EA97]
	(No symbol) [0x00007FF66273BA7F]
	(No symbol) [0x00007FF66275B8B3]
	(No symbol) [0x00007FF66273B7E3]
	(No symbol) [0x00007FF6627075C8]
	(No symbol) [0x00007FF662708731]
	GetHandleVerifier [0x00007FF662BB643D+3118829]
	GetHandleVerifier [0x00007FF662C06C90+3448640]
	GetHandleVerifier [0x00007FF662BFCF0D+3408317]
	GetHandleVerifier [0x00007FF66298A40B+841403]
	(No symbol) [0x00007FF66283340F]
	(No symbol) [0x00007FF66282F484]
	(No symbol) [0x00007FF66282F61D]
	(No symbol) [0x00007FF66281EB79]
	BaseThreadInitThunk [0x00007FF8573E259D+29]
	RtlUserThreadStart [0x00007FF85972AF38+40]

Data successfully appended to Google Sheet 1c_kIcLpdLTY3BKz7y9Cm6_2z0bXwOlY2NqB_VgFKqnc in tab 'Clean Sheets'.


In [5]:
#Interceptions
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import pandas as pd
import time
import datetime
from selenium.webdriver.chrome.options import Options
import gspread
from oauth2client.service_account import ServiceAccountCredentials

# Path to the ChromeDriver
chrome_driver_path = r"C:\Users\Mohammed\chromedriver-win64\chromedriver.exe"
service = Service(chrome_driver_path)

# Set up Chrome options for headless mode
chrome_options = Options()
chrome_options.add_argument("--headless")
chrome_options.add_argument("--no-sandbox")
chrome_options.add_argument("--disable-dev-shm-usage")

driver = webdriver.Chrome(service=service, options=chrome_options)

# URL of the interceptions statistics website
url = 'https://www.indiansuperleague.com/stats/538-149-interceptions-player-statistics'
driver.get(url)

# Initialize WebDriverWait
wait = WebDriverWait(driver, 30)

# CSS Selectors for player names, club names, games played, and interceptions
player_name_selector = '#\\37 3c8f3c9-6505-4f1d-8111-9a332868005a > div > div > div.waf-body > div.stats-table.table > div.table-left > div.table-body > div:nth-child(n) > div.table-data.table-data-player > a'
club_name_selector = '#\\37 3c8f3c9-6505-4f1d-8111-9a332868005a > div > div > div.waf-body > div.stats-table.table > div.table-right > div.table-body > div:nth-child(n) > div.table-data.table-data-club > div > a'
games_played_selector = '#\\37 3c8f3c9-6505-4f1d-8111-9a332868005a > div > div > div.waf-body > div.stats-table.table > div.table-right > div.table-body > div:nth-child(n) > div.table-data.table-data-games-played > p'
interceptions_selector = '#\\37 3c8f3c9-6505-4f1d-8111-9a332868005a > div > div > div.waf-body > div.stats-table.table > div.table-right > div.table-body > div:nth-child(n) > div.table-data.table-data-interceptions > p'

# Updated selectors for the first player (flexible to handle dynamic club number)
first_player_name_selector = '#\\37 3c8f3c9-6505-4f1d-8111-9a332868005a > div > div > div.waf-body > div.player-card[class*="club-"] > div.player-info > a'
first_player_club_selector = '#\\37 3c8f3c9-6505-4f1d-8111-9a332868005a > div > div > div.waf-body > div.player-card[class*="club-"] > div.player-info > div.club-wrap > a'
first_player_games_played_selector = '#\\37 3c8f3c9-6505-4f1d-8111-9a332868005a > div > div > div.waf-body > div.player-card[class*="club-"] > div.player-info > div.player-meta > div:nth-child(1) > p.text.value'
first_player_interceptions_selector = '#\\37 3c8f3c9-6505-4f1d-8111-9a332868005a > div > div > div.waf-body > div.player-card[class*="club-"] > div.player-info > div.player-meta > div:nth-child(2) > p.text.value'

# Full XPath for the "More" button
more_button_xpath = '/html/body/div[1]/div/div/div/main/section[2]/section[5]/div/div/div/div/section/div/div/div[3]/button'

# Function to load all data by clicking the "More" button
def load_all_data():
    while True:
        try:
            # Locate the "More" button using XPath
            more_button = WebDriverWait(driver, 10).until(
                EC.element_to_be_clickable((By.XPATH, more_button_xpath))
            )
            driver.execute_script("arguments[0].scrollIntoView(true);", more_button)
            time.sleep(2)  # Short pause for smooth scrolling
            # Click the "More" button using JavaScript
            driver.execute_script("arguments[0].click();", more_button)
            time.sleep(5)  # Wait for new data to load
            # Check if the "More" button is still present
            if not driver.find_elements(By.XPATH, more_button_xpath):
                break
        except Exception as e:
            print(f"Error or no more 'More' button to click: {e}")
            break

# Function to extract data
def extract_data():
    data = []
    try:
        # Extract data for the first player using flexible selectors
        first_player_name = driver.find_element(By.CSS_SELECTOR, first_player_name_selector).text
        first_player_club = driver.find_element(By.CSS_SELECTOR, first_player_club_selector).text
        first_player_games_played = driver.find_element(By.CSS_SELECTOR, first_player_games_played_selector).text
        first_player_interceptions = driver.find_element(By.CSS_SELECTOR, first_player_interceptions_selector).text
        
        data.append([first_player_name, first_player_club, first_player_games_played, first_player_interceptions])

        # Extract data for all players using general selectors
        player_elements = driver.find_elements(By.CSS_SELECTOR, player_name_selector)
        club_elements = driver.find_elements(By.CSS_SELECTOR, club_name_selector)
        games_played_elements = driver.find_elements(By.CSS_SELECTOR, games_played_selector)
        interceptions_elements = driver.find_elements(By.CSS_SELECTOR, interceptions_selector)

        player_names = [elem.text for elem in player_elements]
        club_names = [elem.text for elem in club_elements]
        games_played = [elem.text for elem in games_played_elements]
        interceptions = [elem.text for elem in interceptions_elements]

        # Ensure all lists have the same length
        max_length = max(len(player_names), len(club_names), len(games_played), len(interceptions))

        player_names.extend(['N/A'] * (max_length - len(player_names)))
        club_names.extend(['N/A'] * (max_length - len(club_names)))
        games_played.extend(['N/A'] * (max_length - len(games_played)))
        interceptions.extend(['N/A'] * (max_length - len(interceptions)))

        # Extract data for all players
        for i in range(max_length):
            data.append([player_names[i], club_names[i], games_played[i], interceptions[i]])

        return data

    except Exception as e:
        print(f"Error while extracting data: {e}")
        return None

# Function to save data to Google Sheets
def save_to_google_sheets(data, sheet_name="Interceptions"):
    try:
        # Authenticate with Google Sheets API
        scope = ["https://spreadsheets.google.com/feeds", "https://www.googleapis.com/auth/drive"]
        creds = ServiceAccountCredentials.from_json_keyfile_name(r"C:\Users\Mohammed\Downloads\cricket-24-25-775817c1ed58.json", scope)
        client = gspread.authorize(creds)

        # Open the Google Sheet by ID
        sheet = client.open_by_key('1c_kIcLpdLTY3BKz7y9Cm6_2z0bXwOlY2NqB_VgFKqnc')

        # Open the sheet/tab named "Interceptions"
        try:
            worksheet = sheet.worksheet(sheet_name)
        except gspread.exceptions.WorksheetNotFound:
            worksheet = sheet.add_worksheet(title=sheet_name, rows="100", cols="20")

        # Add the current date to each row of data
        date_today = datetime.datetime.now().strftime("%m/%d/%Y")
        for row in data:
            row.insert(0, date_today)

        # Append the data to the sheet
        worksheet.append_rows(data, value_input_option="RAW")

        print(f"Data successfully written to Google Sheets under '{sheet_name}' tab.")

    except Exception as e:
        print(f"Error occurred while saving to Google Sheets: {e}")

# Load all data by clicking "More" button until it disappears
load_all_data()

# Extract data from the page
extracted_data = extract_data()

# Save the data to Google Sheets
if extracted_data:
    save_to_google_sheets(extracted_data)

# Close the WebDriver
driver.quit()


Data successfully written to Google Sheets under 'Interceptions' tab.


In [6]:
#Saves
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import gspread
from oauth2client.service_account import ServiceAccountCredentials
from datetime import datetime
import time

# Path to the ChromeDriver
chrome_driver_path = r"C:\Users\Mohammed\chromedriver-win64\chromedriver.exe"
service = Service(chrome_driver_path)

# Configure headless mode
options = webdriver.ChromeOptions()
options.add_argument('--headless')
options.add_argument('--disable-gpu')
options.add_argument('--no-sandbox')
options.add_argument('--disable-dev-shm-usage')
driver = webdriver.Chrome(service=service, options=options)

# URL of the website
url = 'https://www.indiansuperleague.com/stats/538-140-saves-player-statistics'
driver.get(url)

# Initialize WebDriverWait
wait = WebDriverWait(driver, 30)

# CSS Selectors and XPaths
player_name_selector = '#\\37 3c8f3c9-6505-4f1d-8111-9a332868005a > div > div > div.waf-body > div.stats-table.table > div.table-left > div.table-body > div:nth-child(n) > div.table-data.table-data-player > a'
club_name_selector = '#\\37 3c8f3c9-6505-4f1d-8111-9a332868005a > div > div > div.waf-body > div.stats-table.table > div.table-right > div.table-body > div:nth-child(n) > div.table-data.table-data-club > div > a'
games_played_selector = '#\\37 3c8f3c9-6505-4f1d-8111-9a332868005a > div > div > div.waf-body > div.stats-table.table > div.table-right > div.table-body > div:nth-child(n) > div.table-data.table-data-games-played > p'
saves_selector = '#\\37 3c8f3c9-6505-4f1d-8111-9a332868005a > div > div > div.waf-body > div.stats-table.table > div.table-right > div.table-body > div:nth-child(n) > div.table-data.table-data-saves > p'

first_player_name_xpath = "//div[contains(@class, 'player-card')]/div[@class='player-info']/a"
first_player_club_xpath = "//div[contains(@class, 'player-card')]/div[@class='player-info']/div[@class='club-wrap']/a"
first_player_games_played_xpath = "//div[contains(@class, 'player-card')]/div[@class='player-info']/div[@class='player-meta']/div[1]/p[@class='text value']"
first_player_saves_xpath = "//div[contains(@class, 'player-card')]/div[@class='player-info']/div[@class='player-meta']/div[2]/p[@class='text value']"

more_button_xpath = '/html/body/div[1]/div/div/div/main/section[2]/section[5]/div/div/div/div/section/div/div/div[3]/button'

def load_all_data():
    while True:
        try:
            more_button = WebDriverWait(driver, 10).until(
                EC.element_to_be_clickable((By.XPATH, more_button_xpath))
            )
            driver.execute_script("arguments[0].scrollIntoView(true);", more_button)
            time.sleep(2)
            driver.execute_script("arguments[0].click();", more_button)
            time.sleep(5)

            if not driver.find_elements(By.XPATH, more_button_xpath):
                break
        except Exception:
            break

def extract_data():
    data = []
    try:
        first_player_name = driver.find_element(By.XPATH, first_player_name_xpath).text
        first_player_club = driver.find_element(By.XPATH, first_player_club_xpath).text
        first_player_games_played = driver.find_element(By.XPATH, first_player_games_played_xpath).text
        first_player_saves = driver.find_element(By.XPATH, first_player_saves_xpath).text

        data.append([first_player_name, first_player_club, first_player_games_played, first_player_saves])

        player_elements = driver.find_elements(By.CSS_SELECTOR, player_name_selector)
        club_elements = driver.find_elements(By.CSS_SELECTOR, club_name_selector)
        games_played_elements = driver.find_elements(By.CSS_SELECTOR, games_played_selector)
        saves_elements = driver.find_elements(By.CSS_SELECTOR, saves_selector)

        player_names = [elem.text for elem in player_elements]
        club_names = [elem.text for elem in club_elements]
        games_played = [elem.text for elem in games_played_elements]
        saves = [elem.text for elem in saves_elements]

        max_length = max(len(player_names), len(club_names), len(games_played), len(saves))
        player_names.extend(['N/A'] * (max_length - len(player_names)))
        club_names.extend(['N/A'] * (max_length - len(club_names)))
        games_played.extend(['N/A'] * (max_length - len(games_played)))
        saves.extend(['N/A'] * (max_length - len(saves)))

        for i in range(max_length):
            data.append([player_names[i], club_names[i], games_played[i], saves[i]])

        return data

    except Exception as e:
        print(f"Error while extracting data: {e}")
        return None

def append_to_google_sheet(data):
    try:
        scope = ["https://spreadsheets.google.com/feeds", "https://www.googleapis.com/auth/drive"]
        creds = ServiceAccountCredentials.from_json_keyfile_name(
            r"C:\Users\Mohammed\Downloads\cricket-24-25-775817c1ed58.json", scope
        )
        client = gspread.authorize(creds)

        sheet = client.open_by_key("1c_kIcLpdLTY3BKz7y9Cm6_2z0bXwOlY2NqB_VgFKqnc")
        worksheet = sheet.worksheet("Saves")

        date = datetime.now().strftime("%m/%d/%Y")
        for row in data:
            worksheet.append_row([date] + row)

        print("Data successfully appended to Google Sheet.")

    except Exception as e:
        print(f"Error while appending to Google Sheet: {e}")

# Load all data
load_all_data()

# Extract data
extracted_data = extract_data()

# Append data to Google Sheet
if extracted_data:
    append_to_google_sheet(extracted_data)

# Close the WebDriver
driver.quit()

Data successfully appended to Google Sheet.


In [7]:
#Sucessful passes
import time
import pandas as pd
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
import gspread
from oauth2client.service_account import ServiceAccountCredentials
from datetime import datetime

# Set up Chrome options
chrome_options = Options()
chrome_options.add_argument("--headless")  # Run Chrome in headless mode for faster execution

# Set up ChromeDriver path (update to your correct path)
driver_path = r"C:\Users\Mohammed\chromedriver-win64\chromedriver.exe"

# Set up WebDriver
service = Service(driver_path)
driver = webdriver.Chrome(service=service, options=chrome_options)

# Open the website
url = 'https://www.indiansuperleague.com/stats/538-189-successful-passes-player-statistics'
driver.get(url)

# JavaScript code to scroll and click "More" button
more_button_xpath = '//*[@id="73c8f3c9-6505-4f1d-8111-9a332868005a"]/div/div/div[3]/button'
while True:
    try:
        # Scroll the page down to the "More" button
        more_button = driver.find_element(By.XPATH, more_button_xpath)
        driver.execute_script("arguments[0].scrollIntoView(true);", more_button)
        time.sleep(1)  # Wait for the button to be visible
        
        # Click the "More" button using JavaScript
        driver.execute_script("arguments[0].click();", more_button)
        time.sleep(2)  # Wait for the new data to load
    except:
        break  # Break the loop when the 'More' button is no longer present

# Selectors for player data
player_name_selector = '#\\37 3c8f3c9-6505-4f1d-8111-9a332868005a > div > div > div.waf-body > div.stats-table.table > div.table-left > div.table-body > div:nth-child({}) > div.table-data.table-data-player > a'
club_name_selector = '#\\37 3c8f3c9-6505-4f1d-8111-9a332868005a > div > div > div.waf-body > div.stats-table.table > div.table-right > div.table-body > div:nth-child({}) > div.table-data.table-data-club > div > a'
games_played_selector = '#\\37 3c8f3c9-6505-4f1d-8111-9a332868005a > div > div > div.waf-body > div.stats-table.table > div.table-right > div.table-body > div:nth-child({}) > div.table-data.table-data-games-played > p'
successful_passes_selector = '#\\37 3c8f3c9-6505-4f1d-8111-9a332868005a > div > div > div.waf-body > div.stats-table.table > div.table-right > div.table-body > div:nth-child({}) > div.table-data.table-data-successful-passes > p'

# Unique selectors for the first player (generalized to match any first player)
first_player_name_selector = '#\\37 3c8f3c9-6505-4f1d-8111-9a332868005a > div > div > div.waf-body > div.player-card > div.player-info > a'
first_player_club_selector = '#\\37 3c8f3c9-6505-4f1d-8111-9a332868005a > div > div > div.waf-body > div.player-card > div.player-info > div.club-wrap > a'
first_player_games_played_selector = '#\\37 3c8f3c9-6505-4f1d-8111-9a332868005a > div > div > div.waf-body > div.player-card > div.player-info > div.player-meta > div:nth-child(1) > p.text.value'
first_player_successful_passes_selector = '#\\37 3c8f3c9-6505-4f1d-8111-9a332868005a > div > div > div.waf-body > div.player-card > div.player-info > div.player-meta > div:nth-child(2) > p.text.value'

# Initialize lists to store data
players = []
clubs = []
games_played = []
successful_passes = []

# Scrape data for the first player (generalized selectors)
players.append(driver.find_element(By.CSS_SELECTOR, first_player_name_selector).text)
clubs.append(driver.find_element(By.CSS_SELECTOR, first_player_club_selector).text)
games_played.append(driver.find_element(By.CSS_SELECTOR, first_player_games_played_selector).text)
successful_passes.append(driver.find_element(By.CSS_SELECTOR, first_player_successful_passes_selector).text)

# Scrape data for remaining players
index = 2  # Start from the second player
while True:
    try:
        player = driver.find_element(By.CSS_SELECTOR, player_name_selector.format(index)).text
        club = driver.find_element(By.CSS_SELECTOR, club_name_selector.format(index)).text
        games = driver.find_element(By.CSS_SELECTOR, games_played_selector.format(index)).text
        passes = driver.find_element(By.CSS_SELECTOR, successful_passes_selector.format(index)).text
        
        # Append to lists
        players.append(player)
        clubs.append(club)
        games_played.append(games)
        successful_passes.append(passes)
        
        index += 1  # Move to the next player
        
    except:
        break  # If the data is missing or out of range, break the loop

# Close the browser
driver.quit()

# Create a DataFrame using pandas
data = {
    'Date': [datetime.today().strftime('%d/%m/%Y')] * len(players),
    'Player': players,
    'Club': clubs,
    'Games Played': games_played,
    'Successful Passes': successful_passes
}
df = pd.DataFrame(data)

# Google Sheets API authentication
scope = ["https://spreadsheets.google.com/feeds", "https://www.googleapis.com/auth/spreadsheets", "https://www.googleapis.com/auth/drive"]
creds = ServiceAccountCredentials.from_json_keyfile_name(r"C:\Users\Mohammed\Downloads\cricket-24-25-775817c1ed58.json", scope)
client = gspread.authorize(creds)

# Open the Google Sheet and select the specific worksheet
sheet = client.open_by_key("1c_kIcLpdLTY3BKz7y9Cm6_2z0bXwOlY2NqB_VgFKqnc")
worksheet = sheet.worksheet("Successful Passes")

# Append the data to the Google Sheet (without overwriting old data)
existing_data = worksheet.get_all_values()
header = existing_data[0] if existing_data else []

if header:
    next_row = len(existing_data) + 1
else:
    header = ["Date", "Player", "Club", "Games Played", "Successful Passes"]
    next_row = 2  # Start appending below the header

# Append new data to the sheet
worksheet.append_rows(df.values.tolist(), value_input_option='RAW')

print("Data has been successfully appended to the 'Successful Passes' tab.")


Data has been successfully appended to the 'Successful Passes' tab.


In [8]:
#Touches
import time
import pandas as pd
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
import gspread
from oauth2client.service_account import ServiceAccountCredentials
from datetime import datetime

# Set up Chrome options
chrome_options = Options()
chrome_options.add_argument("--headless")  # Run Chrome in headless mode for faster execution

# Set up ChromeDriver path (update to your correct path)
driver_path = r"C:\Users\Mohammed\chromedriver-win64\chromedriver.exe"

# Set up WebDriver
service = Service(driver_path)
driver = webdriver.Chrome(service=service, options=chrome_options)

# Open the website
url = 'https://www.indiansuperleague.com/stats/538-146-touches-player-statistics'
driver.get(url)

# JavaScript code to scroll and click "More" button
more_button_xpath = '//*[@id="73c8f3c9-6505-4f1d-8111-9a332868005a"]/div/div/div[3]/button'
while True:
    try:
        # Scroll the page down to the "More" button
        more_button = driver.find_element(By.XPATH, more_button_xpath)
        driver.execute_script("arguments[0].scrollIntoView(true);", more_button)
        time.sleep(1)  # Wait for the button to be visible
        # Click the "More" button using JavaScript
        driver.execute_script("arguments[0].click();", more_button)
        time.sleep(2)  # Wait for the new data to load
    except:
        break  # Break the loop when the 'More' button is no longer present

# Selectors for player data
player_name_selector = '#\\37 3c8f3c9-6505-4f1d-8111-9a332868005a > div > div > div.waf-body > div.stats-table.table > div.table-left > div.table-body > div:nth-child({}) > div.table-data.table-data-player > a'
club_name_selector = '#\\37 3c8f3c9-6505-4f1d-8111-9a332868005a > div > div > div.waf-body > div.stats-table.table > div.table-right > div.table-body > div:nth-child({}) > div.table-data.table-data-club > div > a'
games_played_selector = '#\\37 3c8f3c9-6505-4f1d-8111-9a332868005a > div > div > div.waf-body > div.stats-table.table > div.table-right > div.table-body > div:nth-child({}) > div.table-data.table-data-games-played > p'
successful_passes_selector = '#\\37 3c8f3c9-6505-4f1d-8111-9a332868005a > div > div > div.waf-body > div.stats-table.table > div.table-right > div.table-body > div:nth-child({}) > div.table-data.table-data-touches > p'

# Initialize lists to store data
players = []
clubs = []
games_played = []
successful_passes = []

# Unique selectors for the first player (generalized to match any first player)
first_player_name_selector = '#\\37 3c8f3c9-6505-4f1d-8111-9a332868005a > div > div > div.waf-body > div.player-card > div.player-info > a'
first_player_club_selector = '#\\37 3c8f3c9-6505-4f1d-8111-9a332868005a > div > div > div.waf-body > div.player-card > div.player-info > div.club-wrap > a'
first_player_games_played_selector = '#\\37 3c8f3c9-6505-4f1d-8111-9a332868005a > div > div > div.waf-body > div.player-card > div.player-info > div.player-meta > div:nth-child(1) > p.text.value'
first_player_successful_passes_selector = '#\\37 3c8f3c9-6505-4f1d-8111-9a332868005a > div > div > div.waf-body > div.player-card > div.player-info > div.player-meta > div:nth-child(2) > p.text.value'

# Scrape data for the first player (generalized selectors)
try:
    players.append(driver.find_element(By.CSS_SELECTOR, first_player_name_selector).text)
    clubs.append(driver.find_element(By.CSS_SELECTOR, first_player_club_selector).text)
    games_played.append(driver.find_element(By.CSS_SELECTOR, first_player_games_played_selector).text)
    successful_passes.append(driver.find_element(By.CSS_SELECTOR, first_player_successful_passes_selector).text)
except Exception as e:
    print(f"Error scraping first player: {e}")

# Scrape data for remaining players
index = 2  # Start from the second player
while True:
    try:
        player = driver.find_element(By.CSS_SELECTOR, player_name_selector.format(index)).text
        club = driver.find_element(By.CSS_SELECTOR, club_name_selector.format(index)).text
        games = driver.find_element(By.CSS_SELECTOR, games_played_selector.format(index)).text
        passes = driver.find_element(By.CSS_SELECTOR, successful_passes_selector.format(index)).text
        
        # Append to lists
        players.append(player)
        clubs.append(club)
        games_played.append(games)
        successful_passes.append(passes)
        
        index += 1  # Move to the next player
    except Exception as e:
        # If no more elements found or an error occurs, break the loop
        print(f"Finished scraping or encountered an error: {e}")
        break

# Close the browser
driver.quit()

# Print lengths of lists to debug
print("Players:", len(players))
print("Clubs:", len(clubs))
print("Games Played:", len(games_played))
print("Touches:", len(successful_passes))

# Create a DataFrame using pandas
data = pd.DataFrame({
    'Date': [datetime.today().strftime('%d/%m/%Y')] * len(players),
    'Player': players,
    'Club': clubs,
    'Games Played': games_played,
    'Touches': successful_passes
})

# Google Sheets API authentication
scope = ["https://spreadsheets.google.com/feeds", "https://www.googleapis.com/auth/spreadsheets", "https://www.googleapis.com/auth/drive"]
creds = ServiceAccountCredentials.from_json_keyfile_name(r"C:\Users\Mohammed\Downloads\cricket-24-25-775817c1ed58.json", scope)
client = gspread.authorize(creds)

# Open the Google Sheet and select the specific worksheet
sheet = client.open_by_key("1c_kIcLpdLTY3BKz7y9Cm6_2z0bXwOlY2NqB_VgFKqnc")
worksheet = sheet.worksheet("Touches")

# Append the data to the Google Sheet (without overwriting old data)
existing_data = worksheet.get_all_values()
header = existing_data[0] if existing_data else []
if header:
    next_row = len(existing_data) + 1
else:
    header = ["Date", "Player", "Club", "Games Played", "Touches"]
    next_row = 2  # Start appending below the header

# Append new data to the sheet
worksheet.append_rows(data.values.tolist(), value_input_option='RAW')
print("Data has been successfully appended to the 'Touches' tab.")

Finished scraping or encountered an error: Message: no such element: Unable to locate element: {"method":"css selector","selector":"#\37 3c8f3c9-6505-4f1d-8111-9a332868005a > div > div > div.waf-body > div.stats-table.table > div.table-left > div.table-body > div:nth-child(296) > div.table-data.table-data-player > a"}
  (Session info: chrome=131.0.6778.205); For documentation on this error, please visit: https://www.selenium.dev/documentation/webdriver/troubleshooting/errors#no-such-element-exception
Stacktrace:
	GetHandleVerifier [0x00007FF6628C3AB5+28005]
	(No symbol) [0x00007FF6628283B0]
	(No symbol) [0x00007FF6626C580A]
	(No symbol) [0x00007FF662715A3E]
	(No symbol) [0x00007FF662715D2C]
	(No symbol) [0x00007FF66275EA97]
	(No symbol) [0x00007FF66273BA7F]
	(No symbol) [0x00007FF66275B8B3]
	(No symbol) [0x00007FF66273B7E3]
	(No symbol) [0x00007FF6627075C8]
	(No symbol) [0x00007FF662708731]
	GetHandleVerifier [0x00007FF662BB643D+3118829]
	GetHandleVerifier [0x00007FF662C06C90+3448640]
