# Build the League table from thre match results using pandas

In [None]:
import pandas as pd
import numpy as np

In [None]:
# Import the match results into a pandas dataframe
MATCH_DATA_URL = "https://raw.githubusercontent.com/MarkWilcock/CourseDatasets/main/Football/EPL%20Data%2020-21.csv"
df_match = pd.read_csv(MATCH_DATA_URL)
df_match.head()

In [None]:
result_col_list = ('Team', 'Opposing Team', 'GF', 'GA')

#  Each match generates two results - one for the home teams and one for teh away team
home_cols_list = ['HomeTeam', 'AwayTeam', 'FTHG', 'FTAG']
df_home = df_match.loc[:, home_cols_list]
df_home.columns = result_col_list 
df_home.head()


In [None]:

# The columns are ordered from the away teams perspective
# e.g. for the away team , Goals For (GF) is the number of away goals
away_cols_list = ['AwayTeam', 'HomeTeam', 'FTAG', 'FTHG']
df_away = df_match.loc[:, away_cols_list]
df_away.columns =  result_col_list # ('Team', 'Opposing Team', 'GF', 'GA')
df_away.head()

In [None]:
# This contains all results - two for each match - one for the home team and one for the away team
#df_all = df_home.append(df_away)
df_all = pd.concat([df_home, df_away])
df_all.head()

In [None]:
# Calculate whether the match was won, drawn or lost by the team
df_all['won'] = np.where(df_all['GF'] > df_all['GA'], 1 , 0)
df_all['drawn'] = np.where(df_all['GF'] == df_all['GA'], 1 , 0)
df_all['lost'] = np.where(df_all['GF'] < df_all['GA'], 1 , 0)
df_all.head()

In [None]:
#  Group by Teams to get the league table and the total won / drawn / lost
df_league = df_all.groupby(['Team']).sum()
df_league.head()

In [None]:

#  A  match will either result ina win / draw / loss so summing these (since either 1 or 0) will return matches played
df_league['played'] = df_league['won'] + df_league['drawn'] + df_league['lost']
#  Taems get 3 points for a win, 1 for a draw and no points for a loss
df_league['points'] = 3 * df_league['won'] + df_league['drawn']

#  The goal difference (GD) 
df_league['GD'] = df_league['GF'] - df_league['GA']

df_league.head()


In [None]:
# Sort by Points (high to low) and then by GD (also high to low)
df_league  = df_league.sort_values(by = ['points', 'GD'], ascending = False)

# Now that the league table is in the proper order , assign positions
df_league['position'] = np.arange(len(df_league)) + 1

#  Put the columns in the usual exapected order
df_league = df_league.loc[:, ['position', 'played', 'won', 'drawn', 'lost', 'GF', 'GA', 'GD', 'points']]

df_league.head(3)