In [4]:
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver import ChromeOptions
from selenium.webdriver.common.by import By
from selenium.webdriver.support.wait import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import WebDriverException
from selenium.common.exceptions import NoSuchElementException
# from selenium.webdriver.support.ui import WebDriverWait
# from selenium.webdriver.common.keys import Keys
import pandas as pd
# import time



def get_data():
    # Set up the Chrome driver
    chrome_drive_path = r"C:/Development/chromedriver.exe"
    service = Service(chrome_drive_path)

    options = ChromeOptions()
    options.add_experimental_option("detach", True)
    
    driver = None
    try:
        driver = webdriver.Chrome(service=service, options=options) 
        driver.maximize_window()
        
        volleyball_link = "https://en.volleyballworld.com/volleyball/competitions/vnl-2022/"
        driver.get(volleyball_link)

        # Accept cookies
        accept_cookies = driver.find_element(By.CSS_SELECTOR, "button#onetrust-accept-btn-handler")
        accept_cookies.click()

        # Click on the statistics button
        stat_tab = driver.find_element(By.LINK_TEXT, "Statistics")
        stat_tab.click()

        # 'See all' button
        see_all = driver.find_element(By.CSS_SELECTOR, "a[href='/volleyball/competitions/vnl-2022/statistics/men/best-scorers/index']")
        see_all.click()

        # Best attackers table
        best_attackers = driver.find_element(By.CSS_SELECTOR, "a[href='/volleyball/competitions/vnl-2022/statistics/men/best-attackers/']")
        best_attackers.click()



        # Wait for the table to load       
        tbody = WebDriverWait(driver, 10).until(
            EC.presence_of_element_located((By.XPATH, '//*[@id="main-content"]/section[3]/div/div/div/div/div/table/tbody'))
        )

        # Get all table rows
        MAX_CLICKS = 15  # Max number of clicks on the 'See more' button
        click_count = 0  # Counter for the number of clicks
        rows = []
        while click_count < MAX_CLICKS:
            # Check if the "See more" button exists
            see_more_button = None
            try:
                see_more_button = WebDriverWait(driver, 10).until(
                EC.element_to_be_clickable((By.XPATH, '//*[@id="main-content"]/section[3]/div/div/div/div/div/a/span'))
                )
            except NoSuchElementException:
                break
                
    
            rows.extend(tbody.find_elements(By.XPATH, './/tr'))
            see_more_button.click()
            
            click_count += 1
            
        
        # Get table data
        data = []
        for tr in tbody.find_elements(By.XPATH, '//tr'):
            row = [item.text for item in tr.find_elements(By.XPATH, './/td')]
            data.append(row)
        # print(data[1:])

        # Get table headers
        headers = []
        thead = driver.find_element(By.XPATH, '//*[@id="main-content"]/section[3]/div/div/div/div/div/table/thead')
        for hr in thead.find_elements(By.XPATH, '//tr'):
            head_row = [i.text for i in hr.find_elements(By.XPATH, './/th')]
            headers.append(head_row)
            # print(headers[0])

        # Create DataFrame
        df = pd.DataFrame(data[1:], columns=headers[0])
        return df
    
    except WebDriverException as e:
        print("An error occurred while running the WebDriver:", e)
        
    finally:
        if driver is not None:
            driver.quit()
    
# Call the function to get the data
result_df = get_data()
if result_df is not None:
    print(result_df)







    Shirt Number                       Player Name Team Points Errors  \
0             14                  Abdel-Aziz Nimir  NED    207     60   
1             17                Esmaeilnezhad Amin  IRI    190     56   
2              2                     Russell Aaron  USA    162     46   
3              1                      Nishida Yuji  JPN    162     52   
4              8                     Defalco Torey  USA    157     40   
..           ...                               ...  ...    ...    ...   
286           20  Honorato Henrique Dantas Nóbrega  BRA      0      0   
287           17                   Phillips Korben  AUS      0      0   
288           13                      Bleeker Mats  NED      0      0   
289           19                   de Weijer Freek  NED      0      0   
290            2                 Grebennikov Jenia  FRA      0      0   

    Attempts Average Per Match Success % Total  
0        143             15.92     50.49   410  
1        105             

In [10]:
result_df.head(13)

Unnamed: 0,Shirt Number,Player Name,Team,Points,Errors,Attempts,Average Per Match,Success %,Total
0,14,Abdel-Aziz Nimir,NED,207,60,143,15.92,50.49,410
1,17,Esmaeilnezhad Amin,IRI,190,56,105,14.62,54.13,351
2,2,Russell Aaron,USA,162,46,116,10.8,50.0,324
3,1,Nishida Yuji,JPN,162,52,81,12.46,54.92,295
4,8,Defalco Torey,USA,157,40,88,14.27,55.09,285
5,23,Nikolov Aleksandar,BUL,150,43,97,12.5,51.72,290
6,5,Ensing Kyle,USA,143,37,89,9.53,53.16,269
7,22,Zhang Jingyin,CHN,140,49,73,14.0,53.44,262
8,7,Maar Stephen Timothy,CAN,139,48,129,11.58,43.99,316
9,12,Lima Bruno,ARG,136,44,89,11.33,50.56,269


In [11]:
result_df.tail(7)

Unnamed: 0,Shirt Number,Player Name,Team,Points,Errors,Attempts,Average Per Match,Success %,Total
284,25,Pazhooman Fazel,IRI,0,0,1,0.0,0.0,1
285,2,Andrade Leonardo Henrique,BRA,0,0,0,,,0
286,20,Honorato Henrique Dantas Nóbrega,BRA,0,0,0,,,0
287,17,Phillips Korben,AUS,0,0,0,,,0
288,13,Bleeker Mats,NED,0,0,0,,,0
289,19,de Weijer Freek,NED,0,0,0,,,0
290,2,Grebennikov Jenia,FRA,0,0,0,,,0


In [7]:
# with pd.ExcelWriter('result.xlsx') as writer:
#     result_df.to_excel(writer, sheet_name='Sheet_1')

In [12]:
result_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 291 entries, 0 to 290
Data columns (total 9 columns):
 #   Column             Non-Null Count  Dtype 
---  ------             --------------  ----- 
 0   Shirt Number       291 non-null    object
 1   Player Name        291 non-null    object
 2   Team               291 non-null    object
 3   Points             291 non-null    object
 4   Errors             291 non-null    object
 5   Attempts           291 non-null    object
 6   Average Per Match  291 non-null    object
 7   Success %          291 non-null    object
 8   Total              291 non-null    object
dtypes: object(9)
memory usage: 20.6+ KB


In [13]:
result_df.shape

(291, 9)

In [10]:
result_df['Team'].value_counts()

ITA    23
POL    23
SRB    21
USA    20
IRI    19
BRA    19
AUS    18
SLO    18
FRA    18
CAN    17
GER    17
NED    16
JPN    16
BUL    16
CHN    15
ARG    15
Name: Team, dtype: int64

In [14]:
result_df['Team'].unique()

array(['NED', 'IRI', 'USA', 'JPN', 'BUL', 'CHN', 'CAN', 'ARG', 'AUS',
       'ITA', 'SLO', 'FRA', 'POL', 'SRB', 'GER', 'BRA'], dtype=object)

In [15]:
result_df['Team'].nunique()

16

In [17]:
result_df[result_df['Team']=='USA']

Unnamed: 0,Shirt Number,Player Name,Team,Points,Errors,Attempts,Average Per Match,Success %,Total
2,2,Russell Aaron,USA,162,46,116,10.8,50.0,324
4,8,Defalco Torey,USA,157,40,88,14.27,55.09,285
6,5,Ensing Kyle,USA,143,37,89,9.53,53.16,269
36,4,Jendryk II Jeffrey,USA,79,7,34,5.27,65.83,120
50,20,Smith David,USA,67,11,28,4.47,63.21,106
101,9,Hanes Jake,USA,33,8,28,4.12,47.83,69
114,18,Muagututia Garrett,USA,27,7,14,2.45,56.25,48
122,17,Jaeschke Thomas,USA,25,9,33,3.12,37.31,67
138,15,Russell Kyle,USA,20,4,15,2.86,51.28,39
148,6,Stahl Mitchell,USA,15,2,10,1.36,55.56,27


In [18]:
 result_df.isna().any()

Shirt Number         False
Player Name          False
Team                 False
Points               False
Errors               False
Attempts             False
Average Per Match    False
Success %            False
Total                False
dtype: bool