## English League Football Statistics from 2009/2010 season to 2023/2024

#### Import required libaries

In [33]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
import glob
import os

#### Import all league data to folders

In [None]:
# Define list of seasons
target_seasons = {
    "2324": "2023_2024",
    "2223": "2022_2023",
    "2122": "2021_2022",
    "2021": "2020_2021",
    "1920": "2019_2020",
    "1819": "2018_2019",
    "1718": "2017_2018",
    "1617": "2016_2017",
    "1516": "2015_2016",
    "1415": "2014_2015",
    "1314": "2013_2014",
    "1213": "2012_2013",
    "1112": "2011_2012",
    "1011": "2010_2011",
    "0910": "2009_2010",
    }

# Define list of English leagues
target_leagues = {
    "E0": "Premier League",
    "E1": "Championship League",
    "E2": "League 1",
    "E3": "League 2",
    "EC": "National League"
}

# Base URL
base_url = "https://www.football-data.co.uk/englandm.php"

# Direntory to save files
base_dir = "FootballData"

# Create directories if they don't exist
for league_folder in target_leagues.values():
    os.makedirs(os.path.join(base_dir, league_folder), exist_ok=True)

# Page request and setting soup to html content
response = requests.get(base_url)
soup = BeautifulSoup(response.content,"html.parser")

# Loop through each season and league, find and download files
for season_code, season_name in target_seasons.items():
    for code, league_name in target_leagues.items():
        for link in soup.find_all('a', href=True):
            if season_code in link['href'] and f"{code}.csv" in link['href']:
                file_url = f"https://www.football-data.co.uk/{link['href']}"
                file_name = f"{season_name}_{code}.csv"
                
                # Full path to save the file
                save_path = os.path.join(base_dir, league_name, file_name)
                
                # Download the file
                file_response = requests.get(file_url)
                with open(save_path, 'wb') as file:
                    file.write(file_response.content)
                
                print(f"Downloaded {save_path}")

#### Set options and parameters

In [35]:
# Set pandas display - all columns
pd.set_option("display.max_columns", None)

# Set main folder path to loop through
folder_path = r"C:\Users\BvanStaden\OneDrive - Premier FMCG (PTY) Ltd\Documents\Code\Python\Football\FootballData"
    
# Get listof all csv files in folders/sub-folders
csv_files = glob.glob(os.path.join(folder_path, "**", "*.csv"), recursive=True)

# Set empty array
dfs = []

#### Data import and cleanse

In [36]:
# For loop through all files
for file in csv_files:
    # Extract season and league from the filename
    base_name = os.path.basename(file).replace(".csv", "")
    season_part, league_code = base_name.rsplit("_", 1)
    
    # Format season to "YYYY/YYYY"
    season = season_part.replace("_", "/")
    
    # Read and insert season column with specified encoding
    # I had encoding issues
    try:
        df = pd.read_csv(file, encoding='ISO-8859-1')  # or use 'utf-16'
    except Exception as e:
        print(f"Error reading {file}: {e}")
        continue

    # Update Season
    df.insert(0, "Season", season)
    
    dfs.append(df)

if dfs:  # Check if there are any DataFrames to concatenate
    combined_df = pd.concat(dfs, ignore_index=True)

    combined_df.rename(columns={
        "Div": "League Division",
        "Date": "Match Date (dd/mm/yy)",
        "Time": "Time of match kick off",
        "HomeTeam": "Home Team",
        "AwayTeam": "Away Team",
        "FTHG": "Full Time Home Team Goals",
        "FTAG": "Full Time Away Team Goals",
        "FTR": "Full Time Result (H=Home Win, D=Draw, A=Away Win)",
        "HTHG": "Half Time Home Team Goals",
        "HTAG": "Half Time Away Team Goals",
        "HTR": "Half Time Result (H=Home Win, D=Draw, A=Away Win)",
        "Referee": "Match Referee",
        "HS": "Home Team Shots",
        "AS": "Away Team Shots",
        "HST": "Home Team Shots on Target",
        "AST": "Away Team Shots on Target",
        "HHW": "Home Team Hit Woodwork",
        "AHW": "Away Team Hit Woodwork",
        "HC": "Home Team Corners",
        "AC": "Away Team Corners",
        "HF": "Home Team Fouls Committed",
        "AF": "Away Team Fouls Committed",
        "HFKC": "Home Team Free Kicks Conceded",
        "AFKC": "Away Team Free Kicks Conceded",
        "HO": "Home Team Offsides",
        "AO": "Away Team Offsides",
        "HY": "Home Team Yellow Cards",
        "AY": "Away Team Yellow Cards",
        "HR": "Home Team Red Cards",
        "AR": "Away Team Red Cards"
    },inplace=True)
    # Drop unused columns
    combined_df = combined_df.iloc[:, :24]
else:
    print("No valid data frames to concatenate.")

#### Test data import

In [None]:
# display sample data
combined_df.shape
combined_df.head(5)




#### Points funtion

In [38]:
def pointsPerSeason(season,league):
        
    # Import data to new dataset
    data = combined_df

    # Creating the DataFrame
    df = pd.DataFrame(data)

    # Filter the DataFrame by the specified season
    df = df[df["Season"] == season]
    df = df[df["League Division"] == league]

    # Define points for results
    points = {'H': 3, 'A': 3, 'D': 1}

    # Initialize a list to store points data
    points_data = []

    # Iterate over each match
    for index, row in df.iterrows():
        season = row['Season']
        home_team = row['Home Team']
        away_team = row['Away Team']
        result = row['Full Time Result (H=Home Win, D=Draw, A=Away Win)']
        
        # Points for home team and away team
        home_points = 0
        away_points = 0
        
        if result == 'H':  
            # Home win
            home_points = points['H']
        elif result == 'A':  
            # Away win
            away_points = points['A']
        elif result == 'D':  
            # Draw
            home_points = points['D']
            away_points = points['D']

        # Store the points data
        points_data.append({'Season': season, 'Team': home_team, 'Points': home_points})
        points_data.append({'Season': season, 'Team': away_team, 'Points': away_points})

    # Create a DataFrame from the points data list
    points_per_season = pd.DataFrame(points_data)

    # Group by Season and Team and sum the points
    points_summary = points_per_season.groupby(['Season', 'Team'], as_index=False).sum()

    # Filter by season
    filtered_points = points_summary.loc[points_summary["Season"] == season]

    # Sort by number of points
    sorted_points = filtered_points.sort_values(by="Points", ascending=False)

    # Display the points per season summary
    return(sorted_points)

#### Statistics function

In [39]:
def statsPerSeason(season,league,stat_type):
    # Import data to new dataset
    data = combined_df

    # Creating the DataFrame
    df = pd.DataFrame(data)

    # Filter the DataFrame by the specified season
    df = df[df["Season"] == season]
    df = df[df["League Division"] == league]

    # Initialize a list to store the statistics data
    stats_data = []

    # Iterate over each match
    for index, row in df.iterrows():
        # Get the season and teams
        match_season = row["Season"]
        home_team = row["Home Team"]
        away_team = row["Away Team"]

        # Initialize variables for the current statistic
        home_stat = 0
        away_stat = 0
        stat_label = ""

        if stat_type == "goals":
            home_stat = row["Full Time Home Team Goals"]
            away_stat = row["Full Time Away Team Goals"]
            stat_label = "goals"
        elif stat_type == "halftime_goals":
            home_stat = row["Half Time Home Team Goals"]
            away_stat = row["Half Time Away Team Goals"]
            stat_label = "halftime_goals"
        elif stat_type == "goals_conceded":
            # Set goals conceded as the opponent's goals
            home_stat = row["Full Time Away Team Goals"]  # Goals conceded by the home team
            away_stat = row["Full Time Home Team Goals"]  # Goals conceded by the away team
            stat_label = "goals_conceded"
        elif stat_type == "total_shots":
            home_stat = row["Home Team Shots"]
            away_stat = row["Away Team Shots"]
            stat_label = "total_shots"
        elif stat_type == "on_target_shots":
            home_stat = row["Home Team Shots on Target"]
            away_stat = row["Away Team Shots on Target"]
            stat_label = "on_target_shots"
        # elif stat_type == "hit_woodwork":
        #     home_stat = row["Home Team Hit Woodwork"]
        #     away_stat = row["Away Team Hit Woodwork"]
        #     stat_label = "Hit Woodwork"
        elif stat_type == "corners":
            home_stat = row["Home Team Corners"]
            away_stat = row["Away Team Corners"]
            stat_label = "corners"
        elif stat_type == "fouls_commited":
            home_stat = row["Home Team Fouls Committed"]
            away_stat = row["Away Team Fouls Committed"]
            stat_label = "fouls_commited"
        # elif stat_type == "free_kicks_conceded":
        #     home_stat = row["Home Team Free Kicks Conceded"]
        #     away_stat = row["Away Team Free Kicks Conceded"]
        #     stat_label = "Free Kicks Conceded"
        # elif stat_type == "offsides":
        #     home_stat = row["Home Team Offsides"]
        #     away_stat = row["Away Team Offsides"]
        #     stat_label = "Offsides"
        elif stat_type == "yellow_cards":
            home_stat = row["Home Team Yellow Cards"]
            away_stat = row["Away Team Yellow Cards"]
            stat_label = "yellow_cards"
        elif stat_type == "red_cards":
            home_stat = row["Home Team Red Cards"]
            away_stat = row["Away Team Red Cards"]
            stat_label = "red_cards"
        else:
            raise ValueError("Invalid stat_type. Use 'goals', 'halftime goals', 'total_shots', 'on_target_shots', 'hit_woodwork', 'corners', 'fouls_commited', 'free_kicks_conceded', 'offsides', 'yellow_cards', 'red_cards'.")

        # Store the data
        stats_data.append({"Season": match_season, "Team": home_team, stat_label: home_stat})
        stats_data.append({"Season": match_season, "Team": away_team, stat_label: away_stat})

    # Create DataFrame from stats data list
    stats_per_season = pd.DataFrame(stats_data)

    # Group by Season and Team and sum the statistics
    stats_summary = stats_per_season.groupby(["Season", "Team"], as_index=False).sum()

    # Sort by the specified statistic
    sorted_stats = stats_summary.sort_values(by=stat_label, ascending=False)

    # Return the filtered stats summary
    return sorted_stats


#### Compare points scored across all divisions for the last 15 seasons

In [None]:
# Run specific season, league, type statistic

# # stat_type

# Points
# "goals"
# "halftime_goals"
# "goals_conceded"
# "total_shots"
# "on_target_shots"
# "corners"
# "fouls_commited"
# "yellow_cards"
# "red_cards"

# leagues
# "Premier League"
# "Championship League"
# "League 1"
# "League 2"
# "National League"

# List of seasons and leagues to analyze
stat_type = 'Points'
seasons = [
    "2023/2024",
    "2022/2023",
    "2021/2022",
    "2020/2021",
    "2019/2020",
    "2018/2019",
    "2017/2018",
    "2016/2017",
    "2015/2016",
    "2014/2015",
    "2013/2014",
    "2012/2013",
    "2011/2012",
    "2010/2011",
    "2009/2010"
    ]
leagues = ['E0', 'E1', 'E2', 'E3', 'EC']

# Loop through each season
for season in seasons:
    # Create a dictionary to hold the results for the current season
    results = {league: pointsPerSeason(season, league) for league in leagues}

    # Combine DataFrames
    min_max_df = pd.concat(
        [pd.DataFrame({'Team': results[league]['Team'], stat_type: results[league][stat_type], 'League': league}) for league in leagues],
        ignore_index=True
    )

    # Create a scatter plot
    plt.figure(figsize=(10, 6))
    for league in min_max_df['League'].unique():
        league_data = min_max_df[min_max_df['League'] == league]
        plt.scatter(league_data['Team'], league_data[stat_type], label=league)

    plt.title(f'{stat_type} Comparison between Leagues - {season}')
    plt.xlabel('Teams', fontsize=8)
    plt.ylabel(stat_type)
    plt.xticks(rotation=90)
    plt.legend()
    plt.grid()
    plt.tight_layout()
    plt.show()

#### Compare specific statistics across all divisions for the last 15 seasons

In [None]:
# Run specific season, league, type statistic

# # stat_type

# Points
# "goals"
# "halftime_goals"
# "goals_conceded"
# "total_shots"
# "on_target_shots"
# "corners"
# "fouls_commited"
# "yellow_cards"
# "red_cards"

# leagues
# "Premier League"
# "Championship League"
# "League 1"
# "League 2"
# "National League"

# List of seasons and leagues to analyze
stat_type = 'total_shots'
seasons = [
    "2023/2024",
    "2022/2023",
    "2021/2022",
    "2020/2021",
    "2019/2020",
    "2018/2019",
    "2017/2018",
    "2016/2017",
    "2015/2016",
    "2014/2015",
    "2013/2014",
    "2012/2013",
    "2011/2012",
    "2010/2011",
    "2009/2010"
    ]
leagues = ['E0', 'E1', 'E2', 'E3', 'EC']
# leagues = ['E0']

# Loop through each season
for season in seasons:
    # Create a dictionary to hold the results for the current season
    results = {league: statsPerSeason(season,league,stat_type) for league in leagues}
    

    # Combine DataFrames
    min_max_df = pd.concat(
        [pd.DataFrame({'Team': results[league]['Team'], stat_type: results[league][stat_type], 'League': league}) for league in leagues],
        ignore_index=True
    )

    # Create a scatter plot
    plt.figure(figsize=(10, 6))
    for league in min_max_df['League'].unique():
        league_data = min_max_df[min_max_df['League'] == league]
        plt.scatter(league_data['Team'], league_data[stat_type], label=league)

    plt.title(f'{stat_type} Comparison between Leagues - {season}')
    plt.xlabel('Teams', fontsize=8)
    plt.ylabel(stat_type)
    plt.xticks(rotation=90)
    plt.legend()
    plt.grid()
    plt.tight_layout()
    plt.show()

#### Investigate data in detail

In [None]:
# Parameters
season = "2022/2023"
league = "E0"
stat_type = "goals"

merged_df = []


# Specific dataset - Points
points_df = pointsPerSeason(season,league)
# Specific dataset - Other stats
specific_df = statsPerSeason(season,league,stat_type)


# Merge the two DataFrames side by side
merged1_df = pd.concat([points_df.set_index('Team'), specific_df.set_index('Team')], axis=1)
merged1_df["Goals per Point"] = merged1_df[stat_type] / merged1_df["Points"]

merged1_df[['Points', 'goals', 'Goals per Point']]

order1_df = merged1_df.sort_values(by='Goals per Point', ascending=False)
order1_df


### Most effective scoring teams (Total Shots / Goals)

In [None]:
# Points
# "goals"
# "halftime_goals"
# "goals_conceded"
# "total_shots"
# "on_target_shots"
# "corners"
# "fouls_commited"
# "yellow_cards"
# "red_cards"

# Parameters
season = "2015/2016"
league = "E0"
stat_type1 = "goals"
stat_type2 = "total_shots"

# Specific dataset - Points
goals_df = statsPerSeason(season,league,stat_type1)
# Specific dataset - Other stats
shots_df = statsPerSeason(season,league,stat_type2)

# Merge the two DataFrames side by side
new_df = pd.concat([goals_df.set_index('Team'), shots_df.set_index('Team')], axis=1)
new_df["Goals per Shots"] = new_df["total_shots"] / new_df["goals"]

# Accessing the correct column names
new_df[['total_shots', 'goals', 'Goals per Shots']]

order_df = new_df.sort_values(by='Goals per Shots', ascending=True)


# Set the aesthetic style of the plots
sns.set(style="whitegrid")

# Create a bar plot for Goals per Shots
plt.figure(figsize=(10, 6))
sns.barplot(x=order_df.index, y='Goals per Shots', data=order_df, palette='hsv')

# Rotate x labels for better readability
plt.xticks(rotation=45)

# Set titles and labels
plt.title(f'Goals per Shots by Team for Season {season}', fontsize=16)
plt.xlabel('Teams', fontsize=12)
plt.xticks(rotation=90)
plt.ylabel('Goals per Shots', fontsize=12)

# Show the plot
plt.tight_layout()
plt.show()


### Most red cards in specific league

In [None]:
# Run specific season, league, type statistic

# # stat_type

# Points
# "goals"
# "halftime_goals"
# "goals_conceded"
# "total_shots"
# "on_target_shots"
# "corners"
# "fouls_commited"
# "yellow_cards"
# "red_cards"

# leagues
# "Premier League"
# "Championship League"
# "League 1"
# "League 2"
# "National League"

# List of seasons and leagues to analyze
stat_type = 'red_cards'
seasons = [
    "2023/2024",
    "2022/2023",
    "2021/2022",
    "2020/2021",
    "2019/2020",
    "2018/2019",
    "2017/2018",
    "2016/2017",
    "2015/2016",
    "2014/2015",
    "2013/2014",
    "2012/2013",
    "2011/2012",
    "2010/2011",
    "2009/2010"
    ]
# leagues = ['E0', 'E1', 'E2', 'E3', 'EC']
leagues = ['E0']

# Loop through each season
for season in seasons:
    # Create a dictionary to hold the results for the current season
    results = {league: statsPerSeason(season,league,stat_type) for league in leagues}
    

    # Combine DataFrames
    min_max_df = pd.concat(
        [pd.DataFrame({'Team': results[league]['Team'], stat_type: results[league][stat_type], 'League': league}) for league in leagues],
        ignore_index=True
    )

    # Create a scatter plot
    plt.figure(figsize=(10, 6))
    for league in min_max_df['League'].unique():
        league_data = min_max_df[min_max_df['League'] == league]
        plt.scatter(league_data['Team'], league_data[stat_type], label=league)

    plt.title(f'{stat_type} Comparison between Leagues - {season}')
    plt.xlabel('Teams', fontsize=8)
    plt.ylabel(stat_type)
    plt.xticks(rotation=90)
    plt.legend()
    plt.grid()
    plt.tight_layout()
    plt.show()

### Most goals in the last 5 EPL seasons

In [None]:
# Run specific season, league, type statistic

# # stat_type

# Points
# "goals"
# "halftime_goals"
# "goals_conceded"
# "total_shots"
# "on_target_shots"
# "corners"
# "fouls_commited"
# "yellow_cards"
# "red_cards"


# List of seasons and leagues to analyze
stat_type = 'goals'
seasons = [
    "2023/2024",
    "2022/2023",
    "2021/2022",
    "2020/2021",
    "2019/2020",
    "2018/2019",
    "2017/2018",
    "2016/2017",
    "2015/2016",
    "2014/2015",
    "2013/2014",
    "2012/2013",
    "2011/2012",
    "2010/2011",
    "2009/2010"
    ]
leagues = ['E0']

# Accumulated goals
total_goals = pd.DataFrame()

# Loop through each season to accumulate goals
for season in seasons:
    # Calculate stats for each league in the season
    results = {league: statsPerSeason(season, league, stat_type) for league in leagues}

    # Combine DataFrames for each league
    season_df = pd.concat(
        [pd.DataFrame({'Team': results[league]['Team'], stat_type: results[league][stat_type], 'League': league}) for league in leagues],
        ignore_index=True
    )
    
    # Append season data to the total goals DataFrame
    total_goals = pd.concat([total_goals, season_df], ignore_index=True)

# Sum goals by team across all seasons
total_goals_summary = total_goals.groupby('Team')[stat_type].sum().reset_index()

total_goals_summary_order = total_goals_summary.sort_values(by='goals', ascending=False)

# Plot the total goals for all seasons
plt.figure(figsize=(10, 6))
plt.bar(total_goals_summary_order['Team'], total_goals_summary_order[stat_type], color='orange')
plt.title(f'Total {stat_type.capitalize()} Across All Seasons')
plt.xlabel('Teams')
plt.ylabel(f'Total {stat_type.capitalize()}')
plt.xticks(rotation=90)
plt.grid(axis='y')
plt.tight_layout()
plt.show()

### Points compared to goals

In [None]:
# Parameters
seasons = [
    "2023/2024",
    # "2022/2023",
    # "2021/2022",
    # "2020/2021",
    # "2019/2020"
    ]
league = "E0"
stat_type = "goals"

# Initialize a list to store combined data for each season
combined_data = []


# Loop through each season to gather data
for season in seasons:
    # Retrieve goals data
    results_goals = statsPerSeason(season, league, stat_type)
    goals_df = pd.DataFrame({'Team': results_goals['Team'], 'Goals': results_goals[stat_type]})
    
    # Retrieve points data
    results_points = pointsPerSeason(season, league)
    points_df = pd.DataFrame({'Team': results_points['Team'], 'Points': results_points['Points']})
    
    # Merge the two DataFrames on Team
    combined_data_df = pd.merge(goals_df, points_df, on='Team', how='inner')
    combined_data_df['Season'] = season  # Add a season column for reference
    
    # Add to the list
    combined_data.append(combined_data_df)

# Concatenate all seasons' data into one DataFrame
all_seasons_df = pd.concat(combined_data, ignore_index=True)

# Pivot the DataFrame to make 'Season' as columns
pivoted_df = all_seasons_df.pivot(index='Team', columns='Season', values=['Goals', 'Points'])

# Plotting
teams = pivoted_df.index
x = np.arange(len(seasons))  # X-axis labels (seasons)
width = 0.35  # Width of the bars

fig, ax = plt.subplots(figsize=(12, 8))

# Plot goals and points for each team
for i, team in enumerate(teams):
    # Offset for each team so they are side-by-side for each season
    ax.bar(x - width/2 + i*(width/len(teams)), pivoted_df.loc[team, ('Goals', slice(None))], 
           width/len(teams), label=f'{team} Goals')
    ax.bar(x + width/2 + i*(width/len(teams)), pivoted_df.loc[team, ('Points', slice(None))], 
           width/len(teams), label=f'{team} Points')

# Labels and title
ax.set_xlabel('Season')
ax.set_ylabel('Values')
ax.set_title('Goals and Points by Season for Each Team')
ax.set_xticks(x)
ax.set_xticklabels(seasons, rotation=45)
ax.legend(title="Teams")

plt.tight_layout()
plt.show()

### How much does a point cost a team?

In [None]:
from payrollData import data

# Disable scientific notation
pd.options.display.float_format = '{:.2f}'.format

season = '2023/2024'
league = 'E0'

# Convert payroll to DataFrame
payroll_df = pd.DataFrame(data)

# Filter data for specified saeson
payroll_df = payroll_df[payroll_df['season'] == season]

# Convert columns to float
payroll_df[['weekly_gross_eur', 'annual_gross_eur']] = payroll_df[['weekly_gross_eur', 'annual_gross_eur']].astype(float)

# Get points data for season
points_season_df = pointsPerSeason(season,league)


# Merge the two DataFrames on the team/club names
pointscost_df = pd.merge(points_season_df, payroll_df, left_on='Team', right_on='club')

# Calculate the cost per point
pointscost_df['Cost_per_Point'] = pointscost_df['annual_gross_eur'] / pointscost_df['Points']

pointscost_df = pointscost_df.sort_values(by="Cost_per_Point",ascending=True)

# # Display the result
# print(pointscost_df[['Team', 'annual_gross_eur', 'Points', 'Cost_per_Point']])

# Plotting
plt.figure(figsize=(12, 8))
plt.barh(pointscost_df['Team'], pointscost_df['Cost_per_Point'], color='red')

# Add titles and labels
plt.title(f'Cost per Point for Each Team in Season {season}')
plt.xlabel('Cost per Point (EUR)')
plt.ylabel('Team')

# Format the x-axis to show currency in a readable format (optional)
plt.gca().xaxis.set_major_formatter(plt.FuncFormatter(lambda x, _: f'€{x:,.0f}'))

# Display the plot
plt.grid(axis='x', linestyle='--', alpha=0.7)
plt.show()


### How much does a goal cost?

In [None]:
from payrollData import data

# Disable scientific notation
pd.options.display.float_format = '{:.2f}'.format

season = '2022/2023'
league = 'E0'
stat_type = 'goals'

# Convert payroll to DataFrame
payroll_df = pd.DataFrame(data)

# Filter data for specified saeson
payroll_df = payroll_df[payroll_df['season'] == season]

# Convert columns to float
payroll_df[['weekly_gross_eur', 'annual_gross_eur']] = payroll_df[['weekly_gross_eur', 'annual_gross_eur']].astype(float)

# Get points data for season
goals_season_df = statsPerSeason(season,league,stat_type)

# Merge the two DataFrames on the team/club names
goalscost_df = pd.merge(goals_season_df, payroll_df, left_on='Team', right_on='club')

# Calculate the cost per point
goalscost_df['Cost_per_Goal'] = goalscost_df['annual_gross_eur'] / goalscost_df['goals']

goalscost_df = goalscost_df.sort_values(by="Cost_per_Goal",ascending=True)

# # Display the result
# print(goalscost_df[['Team', 'annual_gross_eur', 'goals', 'Cost_per_Goal']])

# Plotting
plt.figure(figsize=(12, 8))
plt.barh(goalscost_df['Team'], goalscost_df['Cost_per_Goal'], color='limegreen')

# Add titles and labels
plt.title(f'Cost per Goal for Each Team in Season {season}')
plt.xlabel('Cost per Goal (EUR)')
plt.ylabel('Team')

# Format the x-axis to show currency in a readable format (optional)
plt.gca().xaxis.set_major_formatter(plt.FuncFormatter(lambda x, _: f'€{x:,.0f}'))

# Display the plot
plt.grid(axis='x', linestyle='--', alpha=0.7)
plt.show()
