In [1]:
# Change directory to level up to import functions
import os
os.chdir('..')

import warnings

# Suppress all warnings
warnings.filterwarnings('ignore')

In [2]:
## Summary
# In general, avoid 5.0 mids from team's who have just been promoted, get a 5.0 mid from another team
# Otherwise, there's no statisticaly significant difference at other price points or positions, by different team strength
# Championship goals and assits do not always translate to FPL points the next season
# Patrick Bamford had a good season in 2020/21

# Note this isn't looking only at players who played in the chapmionship the season before. It includes new signings that season.

# This analysis covers
# Overview of price points by position (link to ttest eli5/youtube vidoe)
# Player overview? scatter plot value vs points 
# 5.0 Midfielders Detail -- Box plot this
# Top Performances in Promoted Teams (the outliers)
# Championship goals and assits vs FPL Points

# Other things I assessed 
# Experiemneted with different thresholds of definition for games played (mins/gws)
# Built models to predict scores to see if championship flag was an important feature/significant. Similar restults to t tests.

# We're only looking at players in teams promoted. 
# Not players from promoted teams as that can be influnced by the strength of the club they are going to. 
# E.g. When Tottenham signed Bale etc. This would be a seperate piece of analysis assesing the impact of signings 
# from different leagues. "E.g. Bundasliga transfers may perform better than La Liga at the different values". It's on my to do list. 


In [3]:
from src.data_prep.join_data import load_combine_fpl_data
from src.analysis.ttest import perform_ttest_on_df, format_result

from src.tools.yaml_loader import load_yaml_file

# Load parameters
file_path = "conf/parameters.yaml"
parameters = load_yaml_file(file_path)
number_gameweeks_played_min = parameters["number_gameweeks_played_min"]

In [10]:
season_years = [
    '2016-17',
    '2017-18',
    '2018-19',
    '2019-20',
    '2020-21',
    '2021-22',
    '2022-23',
    '2023-24',
]

# Load and save data
df = load_combine_fpl_data(season_years=season_years, export_csv=True)

# Filter to only players who have played more than the specified
df = df[df["count_gws_min_minutes"] >= number_gameweeks_played_min]

# Drop Cole Palmer anomoly season
df = df.drop(df[(df["name"] == "Cole Palmer") & (df["season"] == '2023-24')].index)

# Perform t-tests
team_strength_threshold = 3
result_df = perform_ttest_on_df(
    df,
    team_strength_threshold=team_strength_threshold
)

# Sample size filter
sample_size_threshold = 20
result_df = format_result(
    result_df=result_df,
    sample_size_threshold=sample_size_threshold,
    export_csv=True
)


In [11]:
result_df

Unnamed: 0,Position,Value,Avg. Score Promoted,Avg. Score Not Promoted,Difference,Statistically Significant,Num. Players Promoted,Num. Players Not Promoted,T-Test,P-Value
13,GK,45,115.2,112.7,2.5,No,19,38,0.28,0.781
10,DEF,45,81.1,80.3,0.8,No,73,161,0.21,0.831
11,DEF,50,82.2,88.4,-6.2,No,6,98,-0.36,0.73
2,MID,45,81.0,76.4,4.6,No,17,37,0.75,0.461
4,MID,50,78.0,86.5,-8.6,Yes,24,90,-2.02,0.049
1,MID,55,97.2,100.1,-2.9,No,24,81,-0.43,0.668
0,MID,60,109.0,110.0,-1.0,No,7,43,-0.08,0.939
3,MID,65,116.5,125.9,-9.4,No,2,30,-0.4,0.749
5,FWD,60,108.9,118.1,-9.2,No,8,18,-0.76,0.458
8,FWD,65,121.6,119.3,2.2,No,7,18,0.25,0.805
