In [149]:
# https://preppindata.blogspot.com/2021/04/2021-week-16-super-league.html

import pandas as pd
import numpy as np


### Input the data

In [150]:
df = pd.read_csv(r'data\PD 2021 Wk 16 Input.csv')
df

Unnamed: 0,Round Number,Date,Location,Home Team,Away Team,Result
0,1,12/09/2020 12:30,Craven Cottage,Fulham,Arsenal,0 - 3
1,1,12/09/2020 15:00,Selhurst Park,Crystal Palace,Southampton,1 - 0
2,1,12/09/2020 17:30,Anfield,Liverpool,Leeds,4 - 3
3,1,12/09/2020 20:00,London Stadium,West Ham,Newcastle,0 - 2
4,1,13/09/2020 14:00,The Hawthorns,West Brom,Leicester,0 - 3
...,...,...,...,...,...,...
375,38,23/05/2021 16:00,Anfield,Liverpool,Crystal Palace,
376,38,23/05/2021 16:00,Etihad Stadium,Man City,Everton,
377,38,23/05/2021 16:00,Bramall Lane,Sheffield Utd,Burnley,
378,38,23/05/2021 16:00,London Stadium,West Ham,Southampton,


### Calculate the Total Points for each team. The points are as follows: 
- Win - 3 Points
- Draw - 1 Point
- Lose - 0 Points

In [151]:
# clean the na and split the result first
df.dropna(inplace=True)
df[['home result', 'away result']] = df['Result'].str.split(' - ',expand=True)
df

Unnamed: 0,Round Number,Date,Location,Home Team,Away Team,Result,home result,away result
0,1,12/09/2020 12:30,Craven Cottage,Fulham,Arsenal,0 - 3,0,3
1,1,12/09/2020 15:00,Selhurst Park,Crystal Palace,Southampton,1 - 0,1,0
2,1,12/09/2020 17:30,Anfield,Liverpool,Leeds,4 - 3,4,3
3,1,12/09/2020 20:00,London Stadium,West Ham,Newcastle,0 - 2,0,2
4,1,13/09/2020 14:00,The Hawthorns,West Brom,Leicester,0 - 3,0,3
...,...,...,...,...,...,...,...,...
312,32,17/04/2021 12:30,St. James' Park,Newcastle,West Ham,3 - 2,3,2
314,32,17/04/2021 20:15,Molineux Stadium,Wolves,Sheffield Utd,1 - 0,1,0
315,32,18/04/2021 13:30,Emirates Stadium,Arsenal,Fulham,1 - 1,1,1
316,32,18/04/2021 16:00,Old Trafford,Man Utd,Burnley,3 - 1,3,1


In [152]:
# use np.select to do multiple condition assignment (cases)
conditions_home = [
    df['home result'] > df['away result'],
    df['home result'] == df['away result'],
    df['home result'] < df['away result']
]
conditions_away= [
    df['away result'] > df['home result'],
    df['away result'] == df['home result'],
    df['away result'] < df['home result']
]
values = [3, 1, 0]

df['Home Total Points'] = np.select(conditions_home, values)
df['Away Total Points'] = np.select(conditions_away, values)

df.head()

Unnamed: 0,Round Number,Date,Location,Home Team,Away Team,Result,home result,away result,Home Total Points,Away Total Points
0,1,12/09/2020 12:30,Craven Cottage,Fulham,Arsenal,0 - 3,0,3,0,3
1,1,12/09/2020 15:00,Selhurst Park,Crystal Palace,Southampton,1 - 0,1,0,3,0
2,1,12/09/2020 17:30,Anfield,Liverpool,Leeds,4 - 3,4,3,3,0
3,1,12/09/2020 20:00,London Stadium,West Ham,Newcastle,0 - 2,0,2,0,3
4,1,13/09/2020 14:00,The Hawthorns,West Brom,Leicester,0 - 3,0,3,0,3


### Calculate the goal difference for each team. Goal difference is the difference between goals scored and goals conceded

In [153]:
df[['home result','away result']] = df[['home result','away result']].astype(int)
df['Home Goal Difference'] = df['home result'] - df['away result']
df['Away Goal Difference'] = df['away result'] - df['home result']
df.head()

Unnamed: 0,Round Number,Date,Location,Home Team,Away Team,Result,home result,away result,Home Total Points,Away Total Points,Home Goal Difference,Away Goal Difference
0,1,12/09/2020 12:30,Craven Cottage,Fulham,Arsenal,0 - 3,0,3,0,3,-3,3
1,1,12/09/2020 15:00,Selhurst Park,Crystal Palace,Southampton,1 - 0,1,0,3,0,1,-1
2,1,12/09/2020 17:30,Anfield,Liverpool,Leeds,4 - 3,4,3,3,0,1,-1
3,1,12/09/2020 20:00,London Stadium,West Ham,Newcastle,0 - 2,0,2,0,3,-2,2
4,1,13/09/2020 14:00,The Hawthorns,West Brom,Leicester,0 - 3,0,3,0,3,-3,3


In [154]:
# home and away df
df_home = df[['Round Number', 'Date', 'Location', 'Home Team','home result','Home Total Points','Home Goal Difference']]
df_away = df[['Round Number', 'Date', 'Location', 'Away Team','away result','Away Total Points','Away Goal Difference']]

column_name = ['Round Number', 'Date', 'Location', 'Team','result','Total Points','Goal Difference']
df_home.columns = column_name
df_away.columns = column_name

df_team = pd.concat([df_home,df_away])
df_team

Unnamed: 0,Round Number,Date,Location,Team,result,Total Points,Goal Difference
0,1,12/09/2020 12:30,Craven Cottage,Fulham,0,0,-3
1,1,12/09/2020 15:00,Selhurst Park,Crystal Palace,1,3,1
2,1,12/09/2020 17:30,Anfield,Liverpool,4,3,1
3,1,12/09/2020 20:00,London Stadium,West Ham,0,0,-2
4,1,13/09/2020 14:00,The Hawthorns,West Brom,0,0,-3
...,...,...,...,...,...,...,...
312,32,17/04/2021 12:30,St. James' Park,West Ham,2,0,-1
314,32,17/04/2021 20:15,Molineux Stadium,Sheffield Utd,0,0,-1
315,32,18/04/2021 13:30,Emirates Stadium,Fulham,1,1,0
316,32,18/04/2021 16:00,Old Trafford,Burnley,1,0,-2


### Calculate the current rank/position of each team. This is based on Total Points (high to low) and in a case of a tie then Goal Difference (high to low)

In [155]:
df_current = df_team.groupby('Team',as_index=False).agg({'Date':'count','Total Points':'sum','Goal Difference':'sum'}).sort_values(by='Total Points', ascending=False)
df_current.rename(columns={'Date': 'Total Games Played'},inplace=True)
df_current

Unnamed: 0,Team,Total Games Played,Total Points,Goal Difference
11,Man City,32,74,44
12,Man Utd,32,66,29
9,Leicester,31,56,18
18,West Ham,32,55,11
4,Chelsea,31,54,19
10,Liverpool,32,53,16
16,Spurs,32,50,17
6,Everton,31,49,3
0,Arsenal,32,46,8
8,Leeds,32,46,0


### Assuming that the 'Big 6' didn't play any games this season, recalculate the league table.
- After removing the 6 clubs, how has the position changed for the remaining clubs?

In [156]:
df.head()


Unnamed: 0,Round Number,Date,Location,Home Team,Away Team,Result,home result,away result,Home Total Points,Away Total Points,Home Goal Difference,Away Goal Difference
0,1,12/09/2020 12:30,Craven Cottage,Fulham,Arsenal,0 - 3,0,3,0,3,-3,3
1,1,12/09/2020 15:00,Selhurst Park,Crystal Palace,Southampton,1 - 0,1,0,3,0,1,-1
2,1,12/09/2020 17:30,Anfield,Liverpool,Leeds,4 - 3,4,3,3,0,1,-1
3,1,12/09/2020 20:00,London Stadium,West Ham,Newcastle,0 - 2,0,2,0,3,-2,2
4,1,13/09/2020 14:00,The Hawthorns,West Brom,Leicester,0 - 3,0,3,0,3,-3,3


In [157]:
Big_6 = ['Man City','Man Utd','Arsenal','Chelsea','Liverpool','Spurs']
df_noBig6 = df[~(df['Home Team'].isin(Big_6) | df['Away Team'].isin(Big_6))]
df_noBig6

df_home = df_noBig6[['Round Number', 'Date', 'Location', 'Home Team','home result','Home Total Points','Home Goal Difference']]
df_away = df_noBig6[['Round Number', 'Date', 'Location', 'Away Team','away result','Away Total Points','Away Goal Difference']]

column_name = ['Round Number', 'Date', 'Location', 'Team','result','Total Points','Goal Difference']
df_home.columns = column_name
df_away.columns = column_name

df_team_noBig6 = pd.concat([df_home,df_away])
df_team_noBig6

df_update = df_team_noBig6.groupby('Team',as_index=False).agg({'Date':'count','Total Points':'sum','Goal Difference':'sum'}).sort_values(by='Total Points', ascending=False)
df_update.rename(columns={'Date': 'Total Games Played'},inplace=True)
df_update['Position'] = df_update['Total Points'].rank(ascending=False).astype(int)

df_current['Position_old'] = df_current['Total Points'].rank(ascending=False).astype(int)
df_current
df_update = df_update.merge(df_current[['Team','Position_old']], on='Team')
df_update['Position Change'] = df_update['Position_old'] - df_update['Position']

df_update = df_update[['Position Change','Position','Team', 'Total Games Played', 'Total Points', 'Goal Difference']]

### Output

In [158]:
df_current.to_csv(r'output/2021-week16-output1_current.csv')
df_update.to_csv(r'output/2021-week16-output2_update.csv')