## FPL Draft 2022-2023 Analysis

Discover why this season is not going as well as previous seasons

In [46]:
from sqlalchemy import create_engine
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px

In [2]:
engine = create_engine('sqlite:///fpl-draft-db.db')
conn = engine.connect()

In [3]:
draft_player_info = pd.read_sql_table('draft_player_info',conn)
fantasy_player_info = pd.read_sql_table('fantasy_player_info',conn)
player_picks = pd.read_sql_table('player_picks',conn)
deadlines = pd.read_sql_table('deadlines',conn)
player_stats = pd.read_sql_table('player_stats',conn)

# player names and entry ids
players = {15606:'Nicolaj',24788:'Jesus',42118:'Kris',154393:'Mattia',16133:'Ollie'}

In [4]:
# merge dataframes. code reused from dashboard.py

# match ids in draft player info with fantasy player info
draft_player_info = draft_player_info[['id','first_name','second_name','web_name','draft_rank','element_type', 'team']]
fantasy_player_info = fantasy_player_info[['id','first_name','second_name','web_name','element_type', 'team']]
player_info = draft_player_info.merge(fantasy_player_info, on=['first_name','second_name','web_name','element_type','team'])
player_info = player_info.rename(columns={'id_x':'draft_id','id_y':'fpl_id'})

# player stat dtypes
convert_dict = {col:'float' for col in player_stats.select_dtypes('object').columns.to_list()}
player_stats = player_stats.astype(convert_dict)

# merge player picks with player info, stats and deadlines
picks_detailed = player_picks.merge(player_info, left_on='element', right_on='draft_id')
picks_detailed = picks_detailed.merge(player_stats, left_on=['fpl_id','gw'], right_on=['id', 'gw']).drop(columns=['id'])
picks_detailed = picks_detailed.merge(deadlines[['deadline_time','month','id']], left_on=['gw'], right_on=['id']).drop(columns=['id'])

# overall table
ppm = picks_detailed.copy()
ppm = picks_detailed[picks_detailed.played==True] # only include points if they played
ppm = ppm[['team_id','gw','month','stats.total_points']]
ppm = ppm.groupby(['team_id','gw','month']).sum('stats.total_points').reset_index()

ppm['overall_points'] = ppm['stats.total_points'].groupby(ppm['team_id']).transform('cumsum')
ppm['overall_rank'] = ppm.groupby('gw')['overall_points'].rank(method='first', ascending=False).astype(int)

ppm['monthly_points'] = ppm.groupby(['team_id', 'month']).transform('cumsum')['stats.total_points']
ppm = ppm.merge(ppm.groupby(['team_id','month']).max()\
         .reset_index()[['monthly_points','team_id','month']], on=['team_id','month'],how='left')
ppm['monthly_rank'] = ppm.sort_values(by=['overall_rank']).groupby(['month','gw'])['monthly_points_x'].rank(method='first', ascending=False).astype(int)
ppm = ppm.rename(columns={'monthly_points_x':'cum_pts_month','monthly_points_y':'eom_pts'})

player_ppm = picks_detailed.copy()
player_ppm['player_cum_pts'] = player_ppm.groupby(['team_id', 'month', 'element'])['stats.total_points'].cumsum()
player_ppm = player_ppm.sort_values(by=['player_cum_pts','draft_rank'], ascending=[False,True])\
    .drop_duplicates(subset=['team_id','gw'])\
    .rename(columns={'web_name':'month_top_scorer'})

overall_table = ppm.merge(player_ppm[['month_top_scorer', 'gw', 'team_id','player_cum_pts']], on=['gw','team_id'],how='left')

## Best and worst gameweeks of the season

In [5]:
best_gw = overall_table.replace({'team_id':players})
best_gw.sort_values(by=['stats.total_points'], ascending=False).head()

Unnamed: 0,team_id,gw,month,stats.total_points,overall_points,overall_rank,cum_pts_month,eom_pts,monthly_rank,month_top_scorer,player_cum_pts
24,Nicolaj,25,February,101,1153,2,243,243,1,Martinelli,38
28,Nicolaj,29,April,95,1363,2,95,177,1,Wilson,19
121,Kris,29,April,83,1386,1,83,172,2,Toney,11
49,Ollie,19,January,77,899,1,77,166,1,Kane,16
90,Jesus,29,April,76,1245,4,76,163,3,De Bruyne,13


In [6]:
best_gw[best_gw['stats.total_points'] != 0].sort_values(by=['stats.total_points'], ascending=True).head()

Unnamed: 0,team_id,gw,month,stats.total_points,overall_points,overall_rank,cum_pts_month,eom_pts,monthly_rank,month_top_scorer,player_cum_pts
1,Nicolaj,2,August,19,71,5,71,201,5,Salah,14
66,Jesus,5,August,23,217,4,217,217,4,Jesus,32
134,Mattia,11,October,23,432,4,122,210,4,Foden,32
141,Mattia,18,December,23,645,5,53,53,5,Almirón,11
136,Mattia,13,October,24,495,4,185,210,4,Foden,33


In [7]:
my_best = best_gw[best_gw.team_id=='Nicolaj'].sort_values(by=['stats.total_points'], ascending=False).head()
my_best

Unnamed: 0,team_id,gw,month,stats.total_points,overall_points,overall_rank,cum_pts_month,eom_pts,monthly_rank,month_top_scorer,player_cum_pts
24,Nicolaj,25,February,101,1153,2,243,243,1,Martinelli,38
28,Nicolaj,29,April,95,1363,2,95,177,1,Wilson,19
19,Nicolaj,20,January,63,861,3,97,146,3,Fernandes,25
16,Nicolaj,17,December,60,728,3,60,96,2,Salah,12
7,Nicolaj,8,September,60,294,5,93,93,1,Saka,16


In [8]:
my_worst = best_gw[best_gw.team_id=='Nicolaj'].sort_values(by=['stats.total_points'], ascending=True)
my_worst[my_worst['stats.total_points']!=0].head()

Unnamed: 0,team_id,gw,month,stats.total_points,overall_points,overall_rank,cum_pts_month,eom_pts,monthly_rank,month_top_scorer,player_cum_pts
1,Nicolaj,2,August,19,71,5,71,201,5,Salah,14
27,Nicolaj,28,March,27,1268,3,115,115,5,Mitoma,25
25,Nicolaj,26,March,32,1185,3,32,115,4,Mitoma,13
5,Nicolaj,6,September,33,234,5,33,93,5,Haaland,9
18,Nicolaj,19,January,34,798,3,34,146,5,Pope,10


I've had the best gameweeks of the season with 101 points in gw25 and 95 in gw29.

I've also had the worst gameweek of the season with 19 points in gw2

In [93]:
# compare expected vs actual

gw25 = picks_detailed[(picks_detailed.gw==25) & (picks_detailed.team_id==15606)]
cols = ['stats.goals_scored','stats.expected_goals','stats.assists','stats.expected_assists','stats.goals_conceded','stats.expected_goals_conceded']
gw25[cols].sum()

stats.goals_scored                5.00
stats.expected_goals              3.29
stats.assists                     1.00
stats.expected_assists            1.56
stats.goals_conceded              8.00
stats.expected_goals_conceded    13.53
dtype: float64

In [10]:
gw29 = picks_detailed[(picks_detailed.gw==29) & (picks_detailed.team_id==15606)]
gw29[cols].sum()

stats.goals_scored                5.00
stats.expected_goals              3.29
stats.assists                     1.00
stats.expected_assists            1.56
stats.goals_conceded              8.00
stats.expected_goals_conceded    13.53
dtype: float64

In [96]:
gw2 = picks_detailed[(picks_detailed.gw==2) & (picks_detailed.team_id==15606)]
gw2[cols].sum()

stats.goals_scored                0.00
stats.expected_goals              1.17
stats.assists                     0.00
stats.expected_assists            1.45
stats.goals_conceded             16.00
stats.expected_goals_conceded    10.43
dtype: float64

The best gameweeks came in double gameweeks and where the team outperformed the expected goals, expected assists and expected goals conceded.

The worst gameweek came at the start of the season where the team underperformed all the same metrics.

In [67]:
# plot xg, xa, xgc vs actual

plot_df = picks_detailed.groupby(['team_id','gw']).sum(numeric_only=True)
plot_df['goal_plus_minus'] = plot_df['stats.goals_scored'] - plot_df['stats.expected_goals']
plot_df['assist_plus_minus'] = plot_df['stats.assists'] - plot_df['stats.expected_assists']
plot_df['gi_plus_minus'] = (plot_df['stats.goals_scored'] + plot_df['stats.assists']) - plot_df['stats.expected_goal_involvements']
plot_df['gc_plus_minus'] = plot_df['stats.expected_goals_conceded'] - plot_df['stats.goals_conceded']
plot_df.reset_index(inplace=True)
plot_df = plot_df.replace({'team_id':players})

fig = px.line(plot_df, x="gw", y="gi_plus_minus", color='team_id', width=900, height=500, markers=True)
fig.show()

There appears to be a trend through zero indicating teams aren't getting more or less goal involvements than they should. Overperformance here would suggest the team was scoring some fluke goals, while underperformance might suggest that the team is missing easy chances.

## Points on the bench

In [25]:
bench_points = picks_detailed[picks_detailed.played==False].groupby('team_id').sum(numeric_only=True).reset_index()
bench_points = bench_points.replace({'team_id':players})
bench_points[['team_id','stats.total_points']].sort_values(by='stats.total_points',ascending=False)

Unnamed: 0,team_id,stats.total_points
0,Nicolaj,347
2,Jesus,259
3,Kris,256
1,Ollie,201
4,Mattia,160


In [28]:
pts_subbed_on = picks_detailed[picks_detailed.sub_in==True].groupby('team_id').sum(numeric_only=True).reset_index()
pts_subbed_on = pts_subbed_on.replace({'team_id':players})
pts_subbed_on[['team_id','stats.total_points']].sort_values(by='stats.total_points',ascending=False)

Unnamed: 0,team_id,stats.total_points
3,Kris,87
4,Mattia,76
1,Ollie,41
0,Nicolaj,35
2,Jesus,33


In [61]:
print(bench_points[['team_id','stats.total_points']].mean(numeric_only=True))
print(pts_subbed_on[['team_id','stats.total_points']].mean(numeric_only=True))

stats.total_points    244.6
dtype: float64
stats.total_points    54.4
dtype: float64


Having the most points left on the bench, and second least points subbed on seems to be where my performance has suffered so far this season.

My points left on the bench is 102 points above the average of the rest of the league. With an average score here, I would be comfortably in first place.

My points subbed on is 20 points lower than the league average. With an average score, my overall position would not change but I would move within 10 points of first place.

In [99]:
my_bench = picks_detailed[picks_detailed.team_id==15606]
my_bench = my_bench[((my_bench.played==False) & (my_bench.sub_out==False)) | (my_bench.sub_in==True)]
my_bench = my_bench.sort_values(by='stats.total_points', ascending=False)
my_bench[my_bench['stats.total_points'] >= 4].web_name.value_counts()

Pope                8
Saliba              4
Arrizabalaga        3
Raya                3
Wilson              3
Castagne            2
Jesus               2
Alexander-Arnold    2
Robertson           2
Trossard            2
Nketiah             1
Coady               1
Havertz             1
Botman              1
Targett             1
Antony              1
Álvarez             1
João Félix          1
Sancho              1
Bowen               1
Name: web_name, dtype: int64

In [97]:
my_bench[my_bench['stats.total_points'] >= 4].team.value_counts()

15    13
1      9
6      5
12     4
4      3
14     2
10     2
13     1
8      1
19     1
Name: team, dtype: int64

Pope is the player that has been benched the most times when scoring 4+ points. I have benched a Newcastle player 30 times this season, with 13 of those being 4+ point games. Perhaps some bias from previous seasons despite them doing well now under new ownership. However, other goalkeepers such as Arrizabalaga and Raya also feature multiple times on the list so may need to further evaluate my choice of starting keeper.

I've also benched Arsenal players 19 (Nine 4+ point games) times this season, again another team that has been poor in the previous seasons but is doing well this season. This number is a little inflated due to Trossard's move to Arsenal from Brighton.