In [1]:
import pandas as pd
import os
import janitor
import warnings
warnings.filterwarnings('ignore')
import seaborn as sns; sns.set_style("darkgrid")
%matplotlib inline
import matplotlib.pyplot as plt

In [2]:
data = pd.read_csv("https://raw.githubusercontent.com/vaastav/Fantasy-Premier-League/master/data/2023-24/gws/merged_gw.csv")

In [3]:
def accumulated(df):
    # Add a column 'n' with all values set to 1
    df['n'] = 1
    
    # Group by 'name' and 'team'
    grouped = df.groupby(['name', 'team'])
    
    # Define the columns to apply the cumulative sum and then subtract the original value
    cols_to_accumulate = ['goals_scored', 'assists', 'ict_index', 'goals_conceded', 'minutes', 'own_goals', 'bps', 'clean_sheets', 'bonus']
    
    # Apply the cumulative sum and subtraction
    for col in cols_to_accumulate:
        df[col + '_cum'] = grouped[col].cumsum() - df[col]
    
    # For 'total_points', just the cumulative sum is needed
    df['total_points_cum'] = grouped['total_points'].cumsum()
    
    # Select the desired columns
    df = df[['name', 'team', 'position', 'value', 'goals_scored_cum', 'assists_cum', 'ict_index_cum', 'goals_conceded_cum', 'minutes_cum', 'own_goals_cum', 'total_points_cum', 'bps_cum', 'bonus_cum', 'clean_sheets_cum', 'GW', 'kickoff_time']]
    
    # Filter rows where 'ict_index_cum' > 0
    #df = df[df['ict_index_cum'] > 0]
    
    # Drop the temporary 'n' column
    #df.drop(columns=['n'], inplace=True)
    
    # Rename the accumulated columns back to their original names for clarity
    df.rename(columns={col + '_cum': col for col in cols_to_accumulate}, inplace=True)
    df.rename(columns={'total_points_cum': 'total_points'}, inplace=True)
    
    return df

In [4]:
cumulative_df = accumulated(data)

In [5]:
cumulative_df

Unnamed: 0,name,team,position,value,goals_scored,assists,ict_index,goals_conceded,minutes,own_goals,total_points,bps,bonus,clean_sheets,GW,kickoff_time
0,Femi Seriki,Sheffield Utd,DEF,40,0,0,0.0,0,0,0,0,0,0,0,1,2023-08-12T14:00:00Z
1,Jack Hinshelwood,Brighton,MID,45,0,0,0.0,0,0,0,0,0,0,0,1,2023-08-12T14:00:00Z
2,Jadon Sancho,Man Utd,MID,70,0,0,0.0,0,0,0,1,0,0,0,1,2023-08-14T19:00:00Z
3,Rhys Norrington-Davies,Sheffield Utd,DEF,40,0,0,0.0,0,0,0,0,0,0,0,1,2023-08-12T14:00:00Z
4,Vitaly Janelt,Brentford,MID,55,0,0,0.0,0,0,0,2,0,0,0,1,2023-08-13T13:00:00Z
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
19505,Kyle Walker,Man City,DEF,55,0,3,86.2,25,2080,0,82,444,8,6,27,2024-03-03T15:30:00Z
19506,Jacob Brown,Luton,FWD,49,2,1,49.1,26,1013,0,39,115,3,1,27,2024-03-02T17:30:00Z
19507,Vicente Guaita,Crystal Palace,GK,44,0,0,0.0,0,0,0,0,0,0,0,27,2024-03-02T15:00:00Z
19508,Braian Ojeda Rodríguez,Nott'm Forest,MID,45,0,0,0.0,0,0,0,0,0,0,0,27,2024-03-02T15:00:00Z


In [6]:
cumulative_df["date"] = pd.to_datetime(cumulative_df["kickoff_time"]).dt.date
cumulative_df.index = cumulative_df["date"]
cumulative_df = cumulative_df.sort_index()
date_range = pd.date_range(start=cumulative_df.index.min(), end = cumulative_df.index.max())
cumulative_df["name_team"] = cumulative_df["name"] + "_" + cumulative_df["team"]
new_df = pd.DataFrame()
for combo in list(cumulative_df["name_team"].unique()):
    new_data = cumulative_df[cumulative_df["name_team"] == combo]
    post = new_data["position"].unique()[0]
    name, team = combo.split("_")[0], combo.split("_")[1]
    new_data_reindexed = new_data.reindex(date_range)
    new_data_reindexed["name"] = name
    new_data_reindexed["team"] = team
    new_data_reindexed["position"] = post
    new_data_reindexed["date"] = new_data_reindexed.index
    new_data_reindexed = new_data_reindexed.ffill(axis=0)
    new_data_reindexed = new_data_reindexed.dropna()
    new_data_reindexed = new_data_reindexed.reset_index(drop=True)
    
    new_df = pd.concat([new_df, new_data_reindexed], axis = 0).reset_index(drop=True)

In [7]:
latest_df = new_df[new_df["date"]==max(new_df["date"])][["name", "team", "total_points", "date", "position", "value", "GW"]].reset_index(drop=True)

In [8]:
gk = latest_df[latest_df["position"]=="GK"].sort_values("total_points", ascending=False).reset_index(drop=True).head(5)

Unnamed: 0,name,team,total_points,date,position,value,GW
0,Alphonse Areola,West Ham,97.0,2024-03-04,GK,42.0,27.0
1,Bernd Leno,Fulham,94.0,2024-03-04,GK,48.0,27.0
2,Jordan Pickford,Everton,93.0,2024-03-04,GK,46.0,27.0
3,André Onana,Man Utd,92.0,2024-03-04,GK,48.0,27.0
4,Alisson Ramses Becker,Liverpool,87.0,2024-03-04,GK,58.0,27.0


In [9]:
defenders = latest_df[latest_df["position"]=="DEF"].sort_values("total_points", ascending=False).reset_index(drop=True).head(10)

Unnamed: 0,name,team,total_points,date,position,value,GW
0,Kieran Trippier,Newcastle,108.0,2024-03-04,DEF,69.0,27.0
1,William Saliba,Arsenal,108.0,2024-03-04,DEF,57.0,27.0
2,Benjamin White,Arsenal,106.0,2024-03-04,DEF,56.0,27.0
3,Trent Alexander-Arnold,Liverpool,103.0,2024-03-04,DEF,85.0,27.0
4,Gabriel dos Santos Magalhães,Arsenal,100.0,2024-03-04,DEF,53.0,27.0
5,Virgil van Dijk,Liverpool,98.0,2024-03-04,DEF,64.0,27.0
6,Fabian Schär,Newcastle,96.0,2024-03-04,DEF,54.0,27.0
7,Pedro Porro,Spurs,87.0,2024-03-04,DEF,57.0,27.0
8,Oleksandr Zinchenko,Arsenal,85.0,2024-03-04,DEF,50.0,27.0
9,Vitalii Mykolenko,Everton,82.0,2024-03-04,DEF,46.0,27.0


In [10]:
midfielders = latest_df[latest_df["position"]=="MID"].sort_values("total_points", ascending=False).reset_index(drop=True).head(10)

Unnamed: 0,name,team,total_points,date,position,value,GW
0,Mohamed Salah,Liverpool,167.0,2024-03-04,MID,130.0,27.0
1,Bukayo Saka,Arsenal,166.0,2024-03-04,MID,92.0,27.0
2,Son Heung-min,Spurs,149.0,2024-03-04,MID,97.0,27.0
3,Phil Foden,Man City,147.0,2024-03-04,MID,81.0,27.0
4,Cole Palmer,Chelsea,128.0,2024-03-04,MID,57.0,27.0
5,Anthony Gordon,Newcastle,127.0,2024-03-04,MID,61.0,27.0
6,Jarrod Bowen,West Ham,125.0,2024-03-04,MID,78.0,27.0
7,Martin Ødegaard,Arsenal,124.0,2024-03-04,MID,85.0,27.0
8,Douglas Luiz Soares de Paulo,Aston Villa,119.0,2024-03-04,MID,56.0,27.0
9,Pascal Groß,Brighton,117.0,2024-03-04,MID,66.0,27.0


In [11]:
forwards = latest_df[latest_df["position"]=="FWD"].sort_values("total_points", ascending=False).reset_index(drop=True).head(10)

Unnamed: 0,name,team,total_points,date,position,value,GW
0,Ollie Watkins,Aston Villa,174.0,2024-03-04,FWD,89.0,27.0
1,Erling Haaland,Man City,147.0,2024-03-04,FWD,145.0,27.0
2,Dominic Solanke,Bournemouth,128.0,2024-03-04,FWD,71.0,27.0
3,Julián Álvarez,Man City,127.0,2024-03-04,FWD,66.0,27.0
4,Carlton Morris,Luton,109.0,2024-03-04,FWD,51.0,27.0
5,Matheus Santos Carneiro Da Cunha,Wolves,108.0,2024-03-04,FWD,56.0,27.0
6,Darwin Núñez Ribeiro,Liverpool,106.0,2024-03-04,FWD,76.0,27.0
7,Alexander Isak,Newcastle,87.0,2024-03-04,FWD,75.0,27.0
8,João Pedro Junqueira de Jesus,Brighton,83.0,2024-03-04,FWD,53.0,27.0
9,Elijah Adebayo,Luton,80.0,2024-03-04,FWD,49.0,27.0


In [12]:
total_df = pd.concat([gk, defenders, midfielders, forwards], axis = 0).sort_values("position", ascending=False).reset_index(drop=True)

In [13]:
total_df

Unnamed: 0,name,team,total_points,date,position,value,GW
0,Son Heung-min,Spurs,149.0,2024-03-04,MID,97.0,27.0
1,Mohamed Salah,Liverpool,167.0,2024-03-04,MID,130.0,27.0
2,Pascal Groß,Brighton,117.0,2024-03-04,MID,66.0,27.0
3,Douglas Luiz Soares de Paulo,Aston Villa,119.0,2024-03-04,MID,56.0,27.0
4,Martin Ødegaard,Arsenal,124.0,2024-03-04,MID,85.0,27.0
5,Jarrod Bowen,West Ham,125.0,2024-03-04,MID,78.0,27.0
6,Anthony Gordon,Newcastle,127.0,2024-03-04,MID,61.0,27.0
7,Cole Palmer,Chelsea,128.0,2024-03-04,MID,57.0,27.0
8,Phil Foden,Man City,147.0,2024-03-04,MID,81.0,27.0
9,Bukayo Saka,Arsenal,166.0,2024-03-04,MID,92.0,27.0


In [14]:
max(total_df["GW"])

27.0