In [46]:
import pandas as pd
import numpy as np
import time
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC

def create_webdriver():
    # Set the options for ChromeDriver
    chrome_options = webdriver.ChromeOptions()
    chrome_options.add_argument('--headless')
    return webdriver.Chrome(options=chrome_options)

def scrape_data_from_table(driver, page_number):
    # Construct the URL using the page number
    url = f'https://check-pvp.fr/ranking/eu/all-realms/all-factions/all-classes/all-specs/rateatm3v3/desc/{page_number}'
    
    # Request the page
    driver.get(url)
    time.sleep(10)  # wait for page load
    
    # Find the table on the page
    table = WebDriverWait(driver, 15).until(EC.presence_of_element_located((By.XPATH, '/html/body/app-root/div/div[3]/app-ranking/div/div[2]/table')))
    table_rows = table.find_elements(By.TAG_NAME, 'tr')
    print(f"Found {len(table_rows)} rows on the table from page {page_number}")

    # Iterate over the table and extract row data
    table_data = []
    for row in table_rows:
        row_data = [cell.text for cell in row.find_elements(By.TAG_NAME, 'td')]
        table_data.append(row_data) 

    # Convert data to pandas DataFrame
    df = pd.DataFrame(table_data).replace('', np.nan).dropna(axis=1, how='all').dropna(how='all', inplace=True)
    print(df.shape)
    return df


# Create a ChromeDriver
driver = create_webdriver()

# Scrape data from the first 3 pages
scraped_data = []
for i in range(1, 4):
    data_frame = scrape_data_from_table(driver, i)
    scraped_data.append(data_frame)

driver.quit()

# Concatenate data from all pages
all_data_df = pd.concat(scraped_data).replace('', np.nan).dropna(axis=1, how='all').dropna(how='all', inplace=True)

print(all_data_df)





In [None]:
# Rename columns
renamed_data_df = pd.DataFrame()
renamed_data_df['RANKING'] = all_data_df[0]
renamed_data_df['NAME'] = all_data_df[1]
renamed_data_df['2v2'] = all_data_df[2]
renamed_data_df['3v3'] = all_data_df[9]
renamed_data_df['RBG'] = all_data_df[16]
renamed_data_df['2v2EXP'] = all_data_df[23]
renamed_data_df['3v3EXP'] = all_data_df[24]
renamed_data_df['RBGEXP'] = all_data_df[25]
renamed_data_df['ARENA_POINTS'] = all_data_df[26]
renamed_data_df['RBG_POINTS'] = all_data_df[27]
renamed_data_df['ITEM_LEVEL'] = all_data_df[28]
renamed_data_df['ACHIEVEMENT_POINTS'] = all_data_df[29]
renamed_data_df['VIEWS'] = all_data_df[30]

# Set the index to be the ranking
renamed_data_df.set_index('RANKING', inplace=True)

                                  NAME   2v2   3v3   RBG 2v2EXP 3v3EXP RBGEXP  \
RANKING                                                                         
None                              None  None  None   NaN    NaN    NaN   None   
None                              None  None  None   NaN    NaN    NaN   None   
1        Raíku - Ravencrest\n<Bad RNG>   480  2750   NaN    NaN    NaN   3251   
NaN                                NaN  None  None   NaN    NaN    NaN   None   
NaN                                NaN  None  None   NaN    NaN    NaN   None   
...                                ...   ...   ...   ...    ...    ...    ...   
NaN                                NaN  None  None  None   None   None   None   
NaN                                NaN  None  None  None   None   None   None   
NaN                                NaN  None  None  None   None   None   None   
NaN                                NaN  None  None  None   None   None   None   
NaN                         