In [2]:
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from webdriver_manager.chrome import ChromeDriverManager
from selenium.webdriver.common.by import By
import pandas as pd
from bs4 import BeautifulSoup
import time

In [3]:
driver = webdriver.Chrome()
url = "https://www.transfermarkt.com/premier-league/tabelle/wettbewerb/GB1/saison_id/2012"
driver.get(url)

In [4]:
soup = BeautifulSoup(driver.page_source, 'html.parser')

In [5]:
# Wait for the page to load
time.sleep(5)  # Adjust as needed for your internet speed

In [6]:
table = soup.find('table', {'class':'items'})
rows = table.find_all('tr')

In [7]:
position = []
teams = []
played_games = []
wins = []
draws = []
losses = []
goals = []
goal_diff = []
points = []
team_ids = []

In [8]:
for row in rows[1:]:  # Skip the header row
    cols = row.find_all('td')
    if len(cols) > 0:
        position.append(cols[0].text.strip())  # Position
        teams.append(cols[2].text.strip())  # Team name (adjusted index)
        played_games.append(42)  # Games played (constant value)
        wins.append(cols[4].text.strip())  # Wins
        draws.append(cols[5].text.strip())  # Draws
        losses.append(cols[6].text.strip())  # Losses
        goals.append(cols[7].text.strip())  # Goals (For:Against)
        goal_diff.append(cols[8].text.strip())  # Goal difference (adjusted index)
        points.append(cols[9].text.strip())  # Points (adjusted index)

In [9]:
df = pd.DataFrame({
    'Position': position,
    'Team': teams,
    'Played': played_games,
    'Wins': wins,
    'Draws': draws,
    'Losses': losses,
    'Goals': goals,
    'Goal Difference': goal_diff,
    'Points': points
})

In [10]:
df.head(23)

Unnamed: 0,Position,Team,Played,Wins,Draws,Losses,Goals,Goal Difference,Points
0,1,Man Utd,42,28,5,5,86:43,43,89
1,2,Man City,42,23,9,6,66:34,32,78
2,3,Chelsea,42,22,9,7,75:39,36,75
3,4,Arsenal,42,21,10,7,72:37,35,73
4,5,Tottenham,42,21,9,8,66:46,20,72
5,6,Everton,42,16,15,7,55:40,15,63
6,7,Liverpool,42,16,13,9,71:43,28,61
7,8,West Brom,42,14,7,17,53:57,-4,49
8,9,Swansea,42,11,13,14,47:51,-4,46
9,10,West Ham,42,12,10,16,45:53,-8,46


In [11]:
df.replace({'Man City': 'Manchester City', 'Man Utd': 'Manchester United', 'Sheff Utd': 'Sheffield United', 'Sheff Wed': 'Sheffield Wednesday', 'Nottm Forest': 'Nottingham Forest', 'QPR': 'Queens Park Rangers', 'Wolves': 'Wolverhampton Wanderers', 'Leeds': 'Leeds United', 'Ipswich': 'Ipswich Town', 'Coventry': 'Coventry City', 'Wimbledon FC': 'Wimbledon', 'Tottenham': 'Tottenham Hotspur', 'Norwich': 'Norwich City', 'Blackburn': 'Blackburn Rovers', 'Newcastle': 'Newcastle United', 'West Ham': 'West Ham United', 'Leicester': 'Leicester City', 'Bolton': 'Bolton Wanderers', 'Derby': 'Derby County', 'Barnsley FC': 'Barnsley', 'Charlton': 'Charlton Athletic', 'Bradford': 'Bradford City', 'West Brom': 'West Bromwich Albion', 'Birmingham': 'Birmingham City', 'Wolves': 'Wolverhampton Wanderers', 'Wigan': 'Wigan Athletic', 'Swansea': 'Swansea City'}, inplace=True)

In [12]:
# Close the driver
driver.quit()

In [13]:
df.head(23)

Unnamed: 0,Position,Team,Played,Wins,Draws,Losses,Goals,Goal Difference,Points
0,1,Manchester United,42,28,5,5,86:43,43,89
1,2,Manchester City,42,23,9,6,66:34,32,78
2,3,Chelsea,42,22,9,7,75:39,36,75
3,4,Arsenal,42,21,10,7,72:37,35,73
4,5,Tottenham Hotspur,42,21,9,8,66:46,20,72
5,6,Everton,42,16,15,7,55:40,15,63
6,7,Liverpool,42,16,13,9,71:43,28,61
7,8,West Bromwich Albion,42,14,7,17,53:57,-4,49
8,9,Swansea City,42,11,13,14,47:51,-4,46
9,10,West Ham United,42,12,10,16,45:53,-8,46


In [14]:
df2 = pd.read_csv("EPL Matches 1992_1993_2023_2024.csv", index_col=0)

In [15]:
def add_managers_to_league_table(matches_df, league_table_df, season):
    """
    Add a column to the league table dataframe with managers who managed each team during a given season.
    
    Args:
        matches_df (pd.DataFrame): The dataframe containing match data.
        league_table_df (pd.DataFrame): The league table dataframe.
        season (str): The season to look for managers (e.g., '1992/1993').
    
    Returns:
        pd.DataFrame: The updated league table dataframe with a new 'Manager' column.
    """
    # Filter matches for the provided season
    season_matches = matches_df[matches_df['Season'] == season]

    # Create an empty dictionary to hold team names and their corresponding managers
    team_managers = {}

    # Loop through each match to get managers for the home and away teams
    for _, match in season_matches.iterrows():
        # For Home Team
        home_team = match['Home_Team']
        home_manager = match['Manager_Home']
        
        if home_team not in team_managers:
            team_managers[home_team] = set()  # Use a set to avoid duplicates
        team_managers[home_team].add(home_manager)

        # For Away Team
        away_team = match['Away_Team']
        away_manager = match['Manager_Away']
        
        if away_team not in team_managers:
            team_managers[away_team] = set()  # Use a set to avoid duplicates
        team_managers[away_team].add(away_manager)
    
    # Now, for each team in the league table, find their managers and add to the new column
    managers_list = []
    
    for _, row in league_table_df.iterrows():
        team_name = row['Team']
        
        if team_name in team_managers:
            # Convert the set of managers to a sorted list, then to a string
            managers = ', '.join(sorted(team_managers[team_name]))
        else:
            managers = 'Unknown'
        
        managers_list.append(managers)

    # Add the new column to the league table dataframe
    league_table_df['Manager'] = managers_list

    return league_table_df

In [16]:
updated_league_table = add_managers_to_league_table(df2, df, '2012/2013')
updated_league_table.head(23)

Unnamed: 0,Position,Team,Played,Wins,Draws,Losses,Goals,Goal Difference,Points,Manager
0,1,Manchester United,42,28,5,5,86:43,43,89,Sir Alex Ferguson
1,2,Manchester City,42,23,9,6,66:34,32,78,"Brian Kidd, Roberto Mancini"
2,3,Chelsea,42,22,9,7,75:39,36,75,"Rafael Benitez, Roberto Di Matteo"
3,4,Arsenal,42,21,10,7,72:37,35,73,Arsene Wenger
4,5,Tottenham Hotspur,42,21,9,8,66:46,20,72,Andre Villas-Boas
5,6,Everton,42,16,15,7,55:40,15,63,David Moyes
6,7,Liverpool,42,16,13,9,71:43,28,61,Brendan Rodgers
7,8,West Bromwich Albion,42,14,7,17,53:57,-4,49,Steve Clarke
8,9,Swansea City,42,11,13,14,47:51,-4,46,Michael Laudrup
9,10,West Ham United,42,12,10,16,45:53,-8,46,Sam Allardyce


In [17]:
updated_league_table.replace({'Eamonn Dolan +': 'Eamonn Dolan'}, inplace=True)

In [18]:
updated_league_table.head(23)

Unnamed: 0,Position,Team,Played,Wins,Draws,Losses,Goals,Goal Difference,Points,Manager
0,1,Manchester United,42,28,5,5,86:43,43,89,Sir Alex Ferguson
1,2,Manchester City,42,23,9,6,66:34,32,78,"Brian Kidd, Roberto Mancini"
2,3,Chelsea,42,22,9,7,75:39,36,75,"Rafael Benitez, Roberto Di Matteo"
3,4,Arsenal,42,21,10,7,72:37,35,73,Arsene Wenger
4,5,Tottenham Hotspur,42,21,9,8,66:46,20,72,Andre Villas-Boas
5,6,Everton,42,16,15,7,55:40,15,63,David Moyes
6,7,Liverpool,42,16,13,9,71:43,28,61,Brendan Rodgers
7,8,West Bromwich Albion,42,14,7,17,53:57,-4,49,Steve Clarke
8,9,Swansea City,42,11,13,14,47:51,-4,46,Michael Laudrup
9,10,West Ham United,42,12,10,16,45:53,-8,46,Sam Allardyce


In [19]:
df_manager = pd.read_csv("Manager History.csv")

In [20]:
team_id_mapping = dict(zip(df_manager['Team_Name'], df_manager['Team_ID']))
updated_league_table['Team_ID'] = updated_league_table['Team'].map(team_id_mapping)

In [21]:
updated_league_table.head(23)

Unnamed: 0,Position,Team,Played,Wins,Draws,Losses,Goals,Goal Difference,Points,Manager,Team_ID
0,1,Manchester United,42,28,5,5,86:43,43,89,Sir Alex Ferguson,MUN
1,2,Manchester City,42,23,9,6,66:34,32,78,"Brian Kidd, Roberto Mancini",MCI
2,3,Chelsea,42,22,9,7,75:39,36,75,"Rafael Benitez, Roberto Di Matteo",CHE
3,4,Arsenal,42,21,10,7,72:37,35,73,Arsene Wenger,ARS
4,5,Tottenham Hotspur,42,21,9,8,66:46,20,72,Andre Villas-Boas,THO
5,6,Everton,42,16,15,7,55:40,15,63,David Moyes,EVE
6,7,Liverpool,42,16,13,9,71:43,28,61,Brendan Rodgers,LIV
7,8,West Bromwich Albion,42,14,7,17,53:57,-4,49,Steve Clarke,WBR
8,9,Swansea City,42,11,13,14,47:51,-4,46,Michael Laudrup,SWA
9,10,West Ham United,42,12,10,16,45:53,-8,46,Sam Allardyce,WHA


In [22]:
updated_league_table['Season'] = '2012/2013'
updated_league_table = updated_league_table[['Season', 'Position', 'Team_ID', 'Team', 'Played', 'Wins', 'Draws', 'Losses', 'Goals', 'Goal Difference', 'Points', 'Manager']]
updated_league_table.head(23)

Unnamed: 0,Season,Position,Team_ID,Team,Played,Wins,Draws,Losses,Goals,Goal Difference,Points,Manager
0,2012/2013,1,MUN,Manchester United,42,28,5,5,86:43,43,89,Sir Alex Ferguson
1,2012/2013,2,MCI,Manchester City,42,23,9,6,66:34,32,78,"Brian Kidd, Roberto Mancini"
2,2012/2013,3,CHE,Chelsea,42,22,9,7,75:39,36,75,"Rafael Benitez, Roberto Di Matteo"
3,2012/2013,4,ARS,Arsenal,42,21,10,7,72:37,35,73,Arsene Wenger
4,2012/2013,5,THO,Tottenham Hotspur,42,21,9,8,66:46,20,72,Andre Villas-Boas
5,2012/2013,6,EVE,Everton,42,16,15,7,55:40,15,63,David Moyes
6,2012/2013,7,LIV,Liverpool,42,16,13,9,71:43,28,61,Brendan Rodgers
7,2012/2013,8,WBR,West Bromwich Albion,42,14,7,17,53:57,-4,49,Steve Clarke
8,2012/2013,9,SWA,Swansea City,42,11,13,14,47:51,-4,46,Michael Laudrup
9,2012/2013,10,WHA,West Ham United,42,12,10,16,45:53,-8,46,Sam Allardyce


In [23]:
updated_league_table.to_csv('2012_2013 League Table.csv')