In [1]:
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from webdriver_manager.chrome import ChromeDriverManager
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import pandas as pd
import time

In [2]:
driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()))
url = "https://www.premierleague.com/stats/top/clubs/wins?co=1&se=-1"
driver.get(url)

In [3]:
time.sleep(5)

In [4]:
def create_team_id(team_name):
    if ' ' in team_name:
        parts = team_name.split()
        return parts[0][0].upper() + parts[1][:2].upper()
    return team_name[:3].upper()

In [5]:
teams_data = []

In [6]:
last_page_check = False  # Flag to detect when we've reached the last page 

while True:
    try:
        print("Waiting for the table to load...")

        # Wait for the table to be present (retry mechanism)
        WebDriverWait(driver, 15).until(
            EC.presence_of_element_located((By.CLASS_NAME, 'statsTableContainer'))
        )
        print("Table container located.")

        # Allow time for content inside the table to fully load
        time.sleep(3)

        # Fetch the table rows
        rows = driver.find_elements(By.CSS_SELECTOR, 'tr.table__row')
        print(f"Number of rows found: {len(rows)}")

        if len(rows) == 0:
            print("No rows found, trying again after waiting.")
            time.sleep(5)  # Wait longer and try again
            continue

        # Extract data from the table rows
        for row in rows:
            try:
                # Extract team name and wins
                team_name = row.find_element(By.CSS_SELECTOR, 'td.stats-table__name').text
                wins = row.find_element(By.CSS_SELECTOR, 'td.stats-table__main-stat').text

                # Create Team ID
                team_id = create_team_id(team_name)

                # Append data to list
                teams_data.append({
                    'Team_Name': team_name,
                    'Wins': wins,
                    'Team_ID': team_id
                })
                print(f"Added team: {team_name}, Wins: {wins}")

                # Stop the loop only after Swindon Town's data is added
                if team_name == "Swindon Town":
                    print("Reached the last team: Swindon Town. Exiting loop.")
                    last_page_check = True

            except Exception as row_error:
                print(f"Error processing row: {row_error}")

        if last_page_check:
            break  # Exit loop if we reached the last page

        # Try to locate and click the next button
        try:
            pagination_container = driver.find_element(By.CSS_SELECTOR, 'div.paginationContainer')

            # Find the 'Next' button within the pagination container
            next_button = pagination_container.find_element(By.CSS_SELECTOR, 'div.paginationBtn.paginationNextContainer')

            # Check if the next button is disabled
            if 'is-disabled' in next_button.get_attribute('class') and last_page_check:
                print("Reached the last page and last team. Exiting loop.")
                break  # Exit the loop if we're on the last page
            
            # Check if the button is visible and enabled
            if next_button.is_displayed() and not next_button.get_attribute('class').__contains__('is-disabled'):
                # Scroll the button into view and click it using JavaScript
                driver.execute_script("arguments[0].scrollIntoView();", next_button)
                print("Clicking the next button...")
                driver.execute_script("arguments[0].click();", next_button)
                # Wait for the next page to load
                time.sleep(3)
            else:
                print("Next button is not clickable or is disabled. Exiting loop.")
                break

        except Exception as pagination_error:
            print(f"Error in pagination: {pagination_error}")
            break

    except Exception as e:
        print(f"An error occurred: {e}")
        break


Waiting for the table to load...
Table container located.
Number of rows found: 10
Added team: Manchester United, Wins: 746
Added team: Arsenal, Wins: 676
Added team: Liverpool, Wins: 656
Added team: Chelsea, Wins: 650
Added team: Tottenham Hotspur, Wins: 542
Added team: Manchester City, Wins: 533
Added team: Everton, Wins: 439
Added team: Newcastle United, Wins: 422
Added team: Aston Villa, Wins: 396
Added team: West Ham United, Wins: 361
Clicking the next button...
Waiting for the table to load...
Table container located.
Number of rows found: 10
Added team: Southampton, Wins: 280
Added team: Blackburn Rovers, Wins: 262
Added team: Leeds United, Wins: 223
Added team: Leicester City, Wins: 218
Added team: Fulham, Wins: 192
Added team: Crystal Palace, Wins: 169
Added team: Middlesbrough, Wins: 165
Added team: Sunderland, Wins: 153
Added team: Bolton Wanderers, Wins: 149
Added team: West Bromwich Albion, Wins: 117
Clicking the next button...
Waiting for the table to load...
Table contai

In [7]:
df = pd.DataFrame(teams_data)
df

Unnamed: 0,Team_Name,Wins,Team_ID
0,Manchester United,746,MUN
1,Arsenal,676,ARS
2,Liverpool,656,LIV
3,Chelsea,650,CHE
4,Tottenham Hotspur,542,THO
5,Manchester City,533,MCI
6,Everton,439,EVE
7,Newcastle United,422,NUN
8,Aston Villa,396,AVI
9,West Ham United,361,WHA


In [8]:
# Define the replacements for the specific teams
team_id_corrections = {
    'Swansea City': 'SWA',
    'Bradford City': 'BRA',
    'Coventry City': 'COV',
    'Brighton & Hove Albion': 'BHA',
    'Sheffield United': 'SHU',
    'Wigan Athletic': 'WGA'
}

# Update the Team_ID based on the team name
df.loc[df['Team_Name'] == 'Swansea City', 'Team_ID'] = team_id_corrections['Swansea City']
df.loc[df['Team_Name'] == 'Bradford City', 'Team_ID'] = team_id_corrections['Bradford City']
df.loc[df['Team_Name'] == 'Coventry City', 'Team_ID'] = team_id_corrections['Coventry City']
df.loc[df['Team_Name'] == 'Brighton & Hove Albion', 'Team_ID'] = team_id_corrections['Brighton & Hove Albion']
df.loc[df['Team_Name'] == 'Sheffield United', 'Team_ID'] = team_id_corrections['Sheffield United']
df.loc[df['Team_Name'] == 'Wigan Athletic', 'Team_ID'] = team_id_corrections['Wigan Athletic']

# Verify the changes
print(df[df['Team_Name'].isin(team_id_corrections.keys())])


                 Team_Name Wins Team_ID
23           Coventry City   99     COV
28          Wigan Athletic   85     WGA
29            Swansea City   82     SWA
32  Brighton & Hove Albion   80     BHA
39        Sheffield United   56     SHU
45           Bradford City   14     BRA


In [9]:
df

Unnamed: 0,Team_Name,Wins,Team_ID
0,Manchester United,746,MUN
1,Arsenal,676,ARS
2,Liverpool,656,LIV
3,Chelsea,650,CHE
4,Tottenham Hotspur,542,THO
5,Manchester City,533,MCI
6,Everton,439,EVE
7,Newcastle United,422,NUN
8,Aston Villa,396,AVI
9,West Ham United,361,WHA


In [10]:
# Create the 'Rank' column with values starting from 1
df['Rank'] = range(1, len(df) + 1)

# Reorder the columns to have Rank, Team_ID, Team_Name, Wins
df = df[['Rank', 'Team_ID', 'Team_Name', 'Wins']]

# Verify the new column order and content
df.head()


Unnamed: 0,Rank,Team_ID,Team_Name,Wins
0,1,MUN,Manchester United,746
1,2,ARS,Arsenal,676
2,3,LIV,Liverpool,656
3,4,CHE,Chelsea,650
4,5,THO,Tottenham Hotspur,542


In [11]:
df

Unnamed: 0,Rank,Team_ID,Team_Name,Wins
0,1,MUN,Manchester United,746
1,2,ARS,Arsenal,676
2,3,LIV,Liverpool,656
3,4,CHE,Chelsea,650
4,5,THO,Tottenham Hotspur,542
5,6,MCI,Manchester City,533
6,7,EVE,Everton,439
7,8,NUN,Newcastle United,422
8,9,AVI,Aston Villa,396
9,10,WHA,West Ham United,361


In [12]:
df.to_csv('League Power Ranking.csv', index=False)