In [49]:
import pandas as pd
import numpy as np
import time
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC

def create_webdriver():
    # Set the options for ChromeDriver
    chrome_options = webdriver.ChromeOptions()
    chrome_options.add_argument('--headless')
    return webdriver.Chrome(options=chrome_options)

def scrape_data_from_table(driver, page_number):
    # Construct the URL using the page number
    url = f'https://check-pvp.fr/ranking/eu/all-realms/all-factions/all-classes/all-specs/rateatm3v3/desc/{page_number}'
    
    # Request the page
    driver.get(url)
    time.sleep(15)  # wait for page load
    
    # Find the table on the page
    table = WebDriverWait(driver, 15).until(EC.presence_of_element_located((By.XPATH, '/html/body/app-root/div/div[3]/app-ranking/div/div[2]/table')))
    table_rows = table.find_elements(By.TAG_NAME, 'tr')
    print(f"Found {len(table_rows)} rows on the table from page {page_number}")

    # Iterate over the table and extract row data
    table_data = []
    for row in table_rows:
        row_data = [cell.text for cell in row.find_elements(By.TAG_NAME, 'td')]
        table_data.append(row_data) 

    # Convert data to pandas DataFrame
    df = pd.DataFrame(table_data).replace('', np.nan).dropna(axis=1, how='all')
    print(df.shape)
    return df


# Create a ChromeDriver
driver = create_webdriver()

# Scrape data from the first 3 pages
scraped_data = []
for i in range(1, 4):
    data_frame = scrape_data_from_table(driver, i)
    scraped_data.append(data_frame)

driver.quit()

# Concatenate data from all pages
all_data_df = pd.concat(scraped_data).replace('', np.nan).dropna(axis=1, how='all')

print(all_data_df)





Found 553 rows on the table from page 1
(553, 15)
Found 530 rows on the table from page 2
(530, 18)
Found 502 rows on the table from page 3
(502, 13)
       0                              1     2     9     18    25    26    27  \
0    None                           None  None  None  None  None  None  None   
1    None                           None  None  None  None  None  None  None   
2       1  Raíku - Ravencrest\n<Bad RNG>   480  2750   384  3251  3810  2400   
3     NaN                            NaN  None  None  None  None  None  None   
4     NaN                            NaN  None  None  None  None  None  None   
..    ...                            ...   ...   ...   ...   ...   ...   ...   
497   NaN                            NaN   NaN   NaN   NaN   NaN   NaN   NaN   
498   NaN                            NaN   NaN   NaN   NaN   NaN   NaN   NaN   
499   NaN                            NaN   NaN   NaN   NaN   NaN   NaN   NaN   
500   NaN                            NaN   NaN   N

In [54]:
# Rename columns
renamed_data_df = pd.DataFrame()
renamed_data_df['RANKING'] = all_data_df[0]
renamed_data_df['NAME'] = all_data_df[1]
renamed_data_df['2v2'] = all_data_df[2]
renamed_data_df['3v3'] = all_data_df[9]
renamed_data_df['RBG'] = all_data_df[16]
renamed_data_df['2v2EXP'] = all_data_df[23]
renamed_data_df['3v3EXP'] = all_data_df[24]
renamed_data_df['RBGEXP'] = all_data_df[25]
renamed_data_df['ARENA_POINTS'] = all_data_df[26]
renamed_data_df['RBG_POINTS'] = all_data_df[27]
renamed_data_df['ITEM_LEVEL'] = all_data_df[28]
renamed_data_df['ACHIEVEMENT_POINTS'] = all_data_df[29]
renamed_data_df['VIEWS'] = all_data_df[30]

# Set the index to be the ranking
renamed_data_df.set_index('RANKING', inplace=True)
renamed_data_df.dropna(how='all', inplace=True)

In [58]:
renamed_data_df

Unnamed: 0_level_0,NAME,2v2,3v3,RBG,2v2EXP,3v3EXP,RBGEXP,ARENA_POINTS,RBG_POINTS,ITEM_LEVEL,ACHIEVEMENT_POINTS,VIEWS
RANKING,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
1,Raíku - Ravencrest\n<Bad RNG>,480,2750,,,,3251,3810,2400,4457,1186,444
2,Gudóngmae - Ravencrest\n<Viciøus PvP>,192,2742,,,,3277,3810,1800,4449,734,431
3,Sính - Ravencrest\n<Bad RNG>,1649,2739,,,,2466,3123,1800,3622,865,432
4,Chãn - Tarren Mill\n<Echo>,768,2733,,,,2600,3113,384,3604,203,444
5,Whaazzform - Tarren Mill\n<Echo>,384,2729,,,,2749,3606,2400,4191,1218,444
...,...,...,...,...,...,...,...,...,...,...,...,...
146,Nttyfromohio - Ravencrest\n<I Zolo This Noob>,1601,2479,384,1606,2479,1900,2589,710,431,18440,39
147,Tael - Elune\n<Above Average>,2419,2478,0,2419,2957,2300,3161,960,439,20200,103
148,Aconethirty - Ravencrest\n<bedge pvp>,2260,2478,192,2940,3509,1700,3824,547,431,15235,15
149,Ryzën - Stormscale\n<its so hard bejb>,1530,2476,384,2391,3066,2400,3290,1186,434,15815,332
