In [1]:
import pandas as pd
import os
import warnings
warnings.filterwarnings('ignore')

In [2]:
data = pd.read_csv("https://raw.githubusercontent.com/vaastav/Fantasy-Premier-League/master/data/2023-24/gws/merged_gw.csv")

In [3]:
def accumulated(df):
    # Add a column 'n' with all values set to 1
    df['n'] = 1
    
    # Group by 'name' and 'team'
    grouped = df.groupby(['name', 'team'])
    
    # Define the columns to apply the cumulative sum and then subtract the original value
    cols_to_accumulate = ['goals_scored', 'assists', 'ict_index', 'goals_conceded', 'minutes', 'own_goals', 'bps', 'clean_sheets', 'bonus']
    
    # Apply the cumulative sum and subtraction
    for col in cols_to_accumulate:
        df[col + '_cum'] = grouped[col].cumsum() - df[col]
    
    # For 'total_points', just the cumulative sum is needed
    df['total_points_cum'] = grouped['total_points'].cumsum()
    
    # Select the desired columns
    df = df[['name', 'team', 'position', 'value', 'goals_scored_cum', 'assists_cum', 'ict_index_cum', 'goals_conceded_cum', 'minutes_cum', 'own_goals_cum', 'total_points_cum', 'bps_cum', 'bonus_cum', 'clean_sheets_cum', 'GW', 'kickoff_time']]
    
    # Filter rows where 'ict_index_cum' > 0
    #df = df[df['ict_index_cum'] > 0]
    
    # Drop the temporary 'n' column
    #df.drop(columns=['n'], inplace=True)
    
    # Rename the accumulated columns back to their original names for clarity
    df.rename(columns={col + '_cum': col for col in cols_to_accumulate}, inplace=True)
    df.rename(columns={'total_points_cum': 'total_points'}, inplace=True)
    
    return df

In [4]:
cumulative_df = accumulated(data)

In [5]:
cumulative_df

Unnamed: 0,name,team,position,value,goals_scored,assists,ict_index,goals_conceded,minutes,own_goals,total_points,bps,bonus,clean_sheets,GW,kickoff_time
0,Femi Seriki,Sheffield Utd,DEF,40,0,0,0.0,0,0,0,0,0,0,0,1,2023-08-12T14:00:00Z
1,Jack Hinshelwood,Brighton,MID,45,0,0,0.0,0,0,0,0,0,0,0,1,2023-08-12T14:00:00Z
2,Jadon Sancho,Man Utd,MID,70,0,0,0.0,0,0,0,1,0,0,0,1,2023-08-14T19:00:00Z
3,Rhys Norrington-Davies,Sheffield Utd,DEF,40,0,0,0.0,0,0,0,0,0,0,0,1,2023-08-12T14:00:00Z
4,Vitaly Janelt,Brentford,MID,55,0,0,0.0,0,0,0,2,0,0,0,1,2023-08-13T13:00:00Z
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
21422,Hákon Valdimarsson,Brentford,GK,40,0,0,0.0,0,0,0,0,0,0,0,29,2024-03-16T15:00:00Z
21423,Andros Townsend,Luton,MID,50,1,4,50.1,20,835,0,51,167,3,3,29,2024-03-16T15:00:00Z
21424,Jacob Brown,Luton,FWD,49,2,1,49.1,26,1013,0,39,115,3,1,29,2024-03-16T15:00:00Z
21425,Braian Ojeda Rodríguez,Nott'm Forest,MID,45,0,0,0.0,0,0,0,0,0,0,0,29,2024-03-16T15:00:00Z


In [6]:
cumulative_df["date"] = pd.to_datetime(cumulative_df["kickoff_time"]).dt.date
cumulative_df.index = cumulative_df["date"]
cumulative_df = cumulative_df.sort_index()
date_range = pd.date_range(start=cumulative_df.index.min(), end = cumulative_df.index.max())
cumulative_df["name_team"] = cumulative_df["name"] + "_" + cumulative_df["team"]
new_df = pd.DataFrame()
for combo in list(cumulative_df["name_team"].unique()):
    new_data = cumulative_df[cumulative_df["name_team"] == combo]
    post = new_data["position"].unique()[0]
    name, team = combo.split("_")[0], combo.split("_")[1]
    new_data_reindexed = new_data.reindex(date_range)
    new_data_reindexed["name"] = name
    new_data_reindexed["team"] = team
    new_data_reindexed["position"] = post
    new_data_reindexed["date"] = new_data_reindexed.index
    new_data_reindexed = new_data_reindexed.ffill(axis=0)
    new_data_reindexed = new_data_reindexed.dropna()
    new_data_reindexed = new_data_reindexed.reset_index(drop=True)
    
    new_df = pd.concat([new_df, new_data_reindexed], axis = 0).reset_index(drop=True)

In [7]:
latest_df = new_df[new_df["date"]==max(new_df["date"])][["name", "team", "total_points", "date", "position", "value", "GW", "minutes"]].reset_index(drop=True)
latest_df["points_per_game"] = (latest_df["total_points"]/latest_df["minutes"])*90
latest_df = latest_df[latest_df["minutes"]>= max(latest_df["GW"])*(0.5*90)]

gk = latest_df[latest_df["position"]=="GK"].sort_values("points_per_game", ascending=False).reset_index(drop=True).head(5)
defenders = latest_df[latest_df["position"]=="DEF"].sort_values("points_per_game", ascending=False).reset_index(drop=True).head(15)
midfielders = latest_df[latest_df["position"]=="MID"].sort_values("points_per_game", ascending=False).reset_index(drop=True).head(15)
forwards = latest_df[latest_df["position"]=="FWD"].sort_values("points_per_game", ascending=False).reset_index(drop=True).head(15)

total_df = pd.concat([gk, defenders, midfielders, forwards], axis = 0).sort_values("position", ascending=False).reset_index(drop=True)

In [8]:
total_df

Unnamed: 0,name,team,total_points,date,position,value,GW,minutes,points_per_game
0,Richarlison de Andrade,Spurs,108.0,2024-03-17,MID,69.0,29.0,1341.0,7.248322
1,Mohamed Salah,Liverpool,168.0,2024-03-17,MID,130.0,28.0,1786.0,8.465845
2,Jarrod Bowen,West Ham,149.0,2024-03-17,MID,80.0,29.0,2427.0,5.52534
3,Mohammed Kudus,West Ham,98.0,2024-03-17,MID,68.0,29.0,1591.0,5.543683
4,Pascal Groß,Brighton,132.0,2024-03-17,MID,66.0,28.0,2122.0,5.598492
5,Scott McTominay,Man Utd,87.0,2024-03-17,MID,46.0,28.0,1374.0,5.69869
6,Hwang Hee-chan,Wolves,105.0,2024-03-17,MID,55.0,28.0,1618.0,5.840544
7,Kai Havertz,Arsenal,110.0,2024-03-17,MID,71.0,28.0,1683.0,5.882353
8,Moussa Diaby,Aston Villa,102.0,2024-03-17,MID,63.0,29.0,1519.0,6.04345
9,James Maddison,Spurs,94.0,2024-03-17,MID,80.0,29.0,1398.0,6.051502
