In [1]:
# import required packages
# from fastai2.tabular.all import *
import xgboost as xgb
import pandas as pd
import numpy as np
from pathlib import Path
from sklearn.preprocessing import LabelEncoder, OneHotEncoder
from sklearn.feature_extraction import DictVectorizer
from helpers import *

pd.options.display.max_columns = None

In [2]:
# path to project directory
path = Path('./')

In [3]:
# read in training dataset
train_df = pd.read_csv(path/'data/train_v7.csv', index_col=0, dtype={'season':str,
                                                                     'squad':str,
                                                                     'comp':str})

In [4]:
train_df.shape

(102768, 58)

In [5]:
# features for model
season = '2021'
cat_vars = ['season', 'position', 'was_home']
cont_vars = ['gw', 'minutes']
dep_var = ['total_points']

# find the latest gameweek
last_gw = train_df['gw'][train_df['season'] == season].max()

if np.isnan(last_gw): 
    next_gw = 1 
else: 
    next_gw = last_gw + 1
    
next_gw

22

In [6]:
# read in remaining_season.csv and sort
remaining_season_df = pd.read_csv(path/'data/remaining_season.csv', 
                                  index_col=0).sort_values(by=['gw', 'team'])

# update fields needed for processing
remaining_season_df['season'] = season
remaining_season_df['total_points'] = 0

# need to add an ordered date-like field  
remaining_season_df['kickoff_time'] = [str(9000 + x) + '-99-99' for x in remaining_season_df['match_no']]

# remove match_no, don't need it anymore
remaining_season_df = remaining_season_df.drop('match_no', axis=1)

# # append remaining season to end of training set
train_df = train_df.append(remaining_season_df, sort=False).reset_index(drop=True)

In [7]:
# # read in remaining_season.csv and sort
# remaining_season_df = pd.read_csv(path/'data/remaining_season.csv', 
#                                   index_col=0).sort_values(by=['gw', 'team'])

# remaining_season_df['season'] = season
# remaining_season_df['total_points'] = 0
# remaining_season_df['kickoff_time'] = '9999-99-99'

# # append remaining season to end of training set
# train_df = train_df.append(remaining_season_df, sort=False).reset_index(drop=True)

In [8]:
# add player lag features
lag_train_df, team_lag_vars = team_lag_features(train_df, ['total_points'], ['all', 3, 5, 10, 20])
lag_train_df, player_lag_vars = player_lag_features(lag_train_df, ['total_points'], ['all', 1, 2, 3, 5, 10, 20])

# add team conceded fields for opposition
pattern = re.compile('total_points_team_pg_last_.*_opponent')
team_lag_vars += [x.replace('team', 'team_conceded') for x in team_lag_vars if pattern.match(x)]

# but remove scored for opposition
pattern = re.compile('total_points_team_pg_last_.*_opponent')
team_lag_vars = [x for x in team_lag_vars if not pattern.match(x)]

In [9]:
team_lag_vars

['total_points_team_pg_last_all',
 'total_points_team_pg_last_3',
 'total_points_team_pg_last_5',
 'total_points_team_pg_last_10',
 'total_points_team_pg_last_20',
 'total_points_team_conceded_pg_last_all_opponent',
 'total_points_team_conceded_pg_last_3_opponent',
 'total_points_team_conceded_pg_last_5_opponent',
 'total_points_team_conceded_pg_last_10_opponent',
 'total_points_team_conceded_pg_last_20_opponent']

In [10]:
# we want to set gw and season as ordered categorical variables
# need lists with ordered categories
ordered_gws = list(range(1,39))
ordered_seasons = ['1617', '1718', '1819', '1920', '2021']

# set as categories with correct order 
lag_train_df['gw'] = lag_train_df['gw'].astype('category')
lag_train_df['season'] = lag_train_df['season'].astype('category')

lag_train_df['gw'].cat.set_categories(ordered_gws, ordered=True, inplace=True)
lag_train_df['season'].cat.set_categories(ordered_seasons, ordered=True, inplace=True)

In [11]:
lag_train_df[lag_train_df['player'] == 'Bernd Leno'].tail(25)

Unnamed: 0,player,gw,position,minutes,team,opponent_team,relative_market_value_team,relative_market_value_opponent_team,was_home,total_points,assists,bonus,bps,clean_sheets,creativity,goals_conceded,goals_scored,ict_index,influence,own_goals,penalties_missed,penalties_saved,red_cards,saves,selected,team_a_score,team_h_score,threat,transfers_balance,transfers_in,transfers_out,yellow_cards,kickoff_time,season,play_proba,relative_market_value_team_season,relative_market_value_opponent_team_season,date,squad,comp,shots_total,shots_on_target,touches,pressures,tackles,interceptions,blocks,xg,npxg,xa,sca,gca,passes_completed,passes,passes_pct,carries,dribbles_completed,dribbles,price,short_name,total_points_team,total_points_team_conceded,total_points_team_last_all,total_points_team_conceded_last_all,total_points_team_pg_last_all,total_points_team_conceded_pg_last_all,total_points_team_last_3,total_points_team_conceded_last_3,total_points_team_pg_last_3,total_points_team_conceded_pg_last_3,total_points_team_last_5,total_points_team_conceded_last_5,total_points_team_pg_last_5,total_points_team_conceded_pg_last_5,total_points_team_last_10,total_points_team_conceded_last_10,total_points_team_pg_last_10,total_points_team_conceded_pg_last_10,total_points_team_last_20,total_points_team_conceded_last_20,total_points_team_pg_last_20,total_points_team_conceded_pg_last_20,total_points_team_opponent,total_points_team_conceded_opponent,total_points_team_last_all_opponent,total_points_team_conceded_last_all_opponent,total_points_team_pg_last_all_opponent,total_points_team_conceded_pg_last_all_opponent,total_points_team_last_3_opponent,total_points_team_conceded_last_3_opponent,total_points_team_pg_last_3_opponent,total_points_team_conceded_pg_last_3_opponent,total_points_team_last_5_opponent,total_points_team_conceded_last_5_opponent,total_points_team_pg_last_5_opponent,total_points_team_conceded_pg_last_5_opponent,total_points_team_last_10_opponent,total_points_team_conceded_last_10_opponent,total_points_team_pg_last_10_opponent,total_points_team_conceded_pg_last_10_opponent,total_points_team_last_20_opponent,total_points_team_conceded_last_20_opponent,total_points_team_pg_last_20_opponent,total_points_team_conceded_pg_last_20_opponent,minutes_last_all,minutes_last_1,minutes_last_2,minutes_last_3,minutes_last_5,minutes_last_10,minutes_last_20,total_points_last_all,total_points_pg_last_all,total_points_last_1,total_points_pg_last_1,total_points_last_2,total_points_pg_last_2,total_points_last_3,total_points_pg_last_3,total_points_last_5,total_points_pg_last_5,total_points_last_10,total_points_pg_last_10,total_points_last_20,total_points_pg_last_20
97918,Bernd Leno,14,1,90.0,Arsenal,Everton,1.31738,1.145049,False,1,0.0,0.0,10.0,0.0,0.0,2.0,0.0,0.8,8.4,0.0,0.0,0.0,0.0,1.0,577628.0,1.0,2.0,0.0,-35279.0,2232.0,37511.0,0.0,2020-12-19T17:30:00Z,2021,1.0,,,2020-12-19,Arsenal,Premier League,0.0,0.0,30.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,28.0,29.0,96.6,15.0,0.0,0.0,,,25,41.0,7357,5897.0,44.587879,35.739394,65.0,159.0,21.666667,53.0,141.0,253.0,28.2,50.6,313.0,496.0,31.3,49.6,766.0,825.0,38.3,41.25,41.0,25.0,6835.0,6627.0,41.424242,40.163636,162.0,79.0,54.0,26.333333,237.0,176.0,47.4,35.2,391.0,401.0,39.1,40.1,797.0,777.0,39.85,38.85,6651.0,90.0,180.0,270.0,450.0,900.0,1170.0,265,3.585927,2.0,2.0,4.0,2.0,5.0,1.666667,17.0,3.4,33.0,3.3,45.0,3.461538
98530,Bernd Leno,15,1,90.0,Arsenal,Chelsea,1.31738,1.85944,True,8,0.0,1.0,27.0,0.0,0.0,1.0,0.0,3.6,35.6,0.0,0.0,1.0,0.0,2.0,532869.0,1.0,3.0,0.0,-44555.0,3358.0,47913.0,0.0,2020-12-26T17:30:00Z,2021,1.0,,,2020-12-26,Arsenal,Premier League,0.0,0.0,32.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,17.0,31.0,54.8,11.0,0.0,0.0,,,60,23.0,7382,5938.0,44.46988,35.771084,74.0,130.0,24.666667,43.333333,117.0,242.0,23.4,48.4,293.0,510.0,29.3,51.0,717.0,853.0,35.85,42.65,23.0,60.0,8224.0,5698.0,49.542169,34.325301,125.0,125.0,41.666667,41.666667,226.0,206.0,45.2,41.2,536.0,346.0,53.6,34.6,1046.0,737.0,52.3,36.85,6741.0,90.0,180.0,270.0,450.0,900.0,1260.0,266,3.551402,1.0,1.0,3.0,1.5,5.0,1.666667,8.0,1.6,32.0,3.2,46.0,3.285714
99123,Bernd Leno,16,1,90.0,Arsenal,Brighton and Hove Albion,1.316625,0.541862,False,6,0.0,0.0,26.0,1.0,0.0,0.0,0.0,2.0,20.2,0.0,0.0,0.0,0.0,2.0,554251.0,1.0,0.0,0.0,16320.0,27036.0,10716.0,0.0,2020-12-29T18:00:00Z,2021,1.0,,,2020-12-29,Arsenal,Premier League,0.0,0.0,43.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,28.0,42.0,66.7,28.0,0.0,0.0,,,64,23.0,7442,5961.0,44.562874,35.694611,118.0,96.0,39.333333,32.0,150.0,223.0,30.0,44.6,328.0,472.0,32.8,47.2,711.0,859.0,35.55,42.95,23.0,64.0,4584.0,5868.0,35.534884,45.488372,121.0,117.0,40.333333,39.0,166.0,236.0,33.2,47.2,349.0,430.0,34.9,43.0,707.0,887.0,35.35,44.35,6831.0,90.0,180.0,270.0,450.0,900.0,1350.0,274,3.610013,8.0,8.0,9.0,4.5,11.0,3.666667,14.0,2.8,37.0,3.7,54.0,3.6
99622,Bernd Leno,17,1,90.0,Arsenal,West Bromwich Albion,1.261632,0.258537,False,7,0.0,0.0,27.0,1.0,0.0,0.0,0.0,1.9,19.4,0.0,0.0,0.0,0.0,3.0,615058.0,4.0,0.0,0.0,56845.0,72518.0,15673.0,0.0,2021-01-02T20:00:00Z,2021,1.0,,,2021-01-02,Arsenal,Premier League,0.0,0.0,31.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,23.0,29.0,79.3,16.0,0.0,0.0,,,81,15.0,7506,5984.0,44.678571,35.619048,149.0,87.0,49.666667,29.0,198.0,176.0,39.6,35.2,370.0,434.0,37.0,43.4,741.0,845.0,37.05,42.25,15.0,81.0,3157.0,4344.0,34.315217,47.217391,62.0,196.0,20.666667,65.333333,116.0,274.0,23.2,54.8,261.0,552.0,26.1,55.2,637.0,1025.0,31.85,51.25,6921.0,90.0,180.0,270.0,450.0,900.0,1440.0,280,3.641092,6.0,6.0,14.0,7.0,15.0,5.0,19.0,3.8,41.0,4.1,60.0,3.75
100175,Bernd Leno,18,1,90.0,Arsenal,Crystal Palace,1.264284,0.475724,True,6,0.0,0.0,25.0,1.0,0.0,0.0,0.0,2.0,20.0,0.0,0.0,0.0,0.0,2.0,975215.0,0.0,0.0,0.0,117258.0,160152.0,42894.0,0.0,2021-01-14T20:00:00Z,2021,1.0,,,2021-01-14,Arsenal,Premier League,0.0,0.0,32.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,29.0,31.0,93.5,20.0,0.0,0.0,,,51,51.0,7587,5999.0,44.893491,35.497041,205.0,61.0,68.333333,20.333333,263.0,134.0,52.6,26.8,389.0,426.0,38.9,42.6,795.0,820.0,39.75,41.0,51.0,51.0,6416.0,7418.0,37.964497,43.893491,125.0,121.0,41.666667,40.333333,166.0,262.0,33.2,52.4,365.0,456.0,36.5,45.6,694.0,935.0,34.7,46.75,7011.0,90.0,180.0,270.0,450.0,900.0,1530.0,287,3.684211,7.0,7.0,13.0,6.5,21.0,7.0,24.0,4.8,41.0,4.1,67.0,3.941176
100634,Bernd Leno,19,1,90.0,Arsenal,Newcastle United,1.272515,0.604033,True,6,0.0,0.0,22.0,1.0,0.0,0.0,0.0,1.1,11.2,0.0,0.0,0.0,0.0,1.0,690601.0,0.0,3.0,0.0,-21946.0,28671.0,50617.0,0.0,2021-01-18T20:00:00Z,2021,1.0,,,2021-01-18,Arsenal,Premier League,0.0,0.0,28.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,25.0,28.0,89.3,12.0,0.0,0.0,,,80,21.0,7638,6050.0,44.929412,35.588235,196.0,89.0,65.333333,29.666667,281.0,153.0,56.2,30.6,422.0,406.0,42.2,40.6,808.0,840.0,40.4,42.0,21.0,80.0,4997.0,5762.0,38.145038,43.984733,97.0,141.0,32.333333,47.0,146.0,233.0,29.2,46.6,323.0,476.0,32.3,47.6,701.0,896.0,35.05,44.8,7101.0,90.0,180.0,270.0,450.0,900.0,1620.0,293,3.713561,6.0,6.0,13.0,6.5,19.0,6.333333,28.0,5.6,45.0,4.5,73.0,4.055556
101553,Bernd Leno,20,1,90.0,Arsenal,Southampton,1.34389,0.538945,False,3,0.0,0.0,15.0,0.0,0.0,1.0,0.0,2.7,27.0,0.0,0.0,0.0,0.0,4.0,722859.0,3.0,1.0,0.0,4915.0,38783.0,33868.0,0.0,2021-01-26T20:15:00Z,2021,1.0,,,2021-01-26,Arsenal,Premier League,0.0,0.0,38.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,26.0,33.0,78.8,14.0,0.0,0.0,,,52,28.0,7718,6071.0,45.134503,35.502924,212.0,87.0,70.666667,29.0,336.0,133.0,67.2,26.6,453.0,375.0,45.3,37.5,869.0,797.0,43.45,39.85,28.0,52.0,6569.0,7305.0,38.415205,42.719298,129.0,138.0,43.0,46.0,204.0,252.0,40.8,50.4,436.0,400.0,43.6,40.0,895.0,702.0,44.75,36.947368,7191.0,90.0,180.0,270.0,450.0,900.0,1710.0,299,3.742178,6.0,6.0,12.0,6.0,19.0,6.333333,33.0,6.6,41.0,4.1,79.0,4.157895
102198,Bernd Leno,21,1,90.0,Arsenal,Manchester United,1.353529,1.679166,True,7,0.0,0.0,25.0,1.0,0.0,0.0,0.0,2.6,26.2,0.0,0.0,0.0,0.0,3.0,719531.0,0.0,0.0,0.0,-5667.0,20995.0,26662.0,0.0,2021-01-30T17:30:00Z,2021,1.0,,,2021-01-30,Arsenal,Premier League,0.0,0.0,28.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,16.0,26.0,61.5,12.0,0.0,0.0,,,52,48.0,7770,6099.0,45.174419,35.459302,183.0,100.0,61.0,33.333333,328.0,138.0,65.6,27.6,478.0,361.0,47.8,36.1,877.0,792.0,43.85,39.6,48.0,52.0,7980.0,6087.0,46.395349,35.389535,125.0,116.0,41.666667,38.666667,227.0,174.0,45.4,34.8,492.0,336.0,49.2,33.6,903.0,760.0,45.15,38.0,7281.0,90.0,180.0,270.0,450.0,900.0,1800.0,302,3.733004,3.0,3.0,9.0,4.5,15.0,5.0,28.0,5.6,42.0,4.2,82.0,4.1
102775,Bernd Leno,22,1,90.0,Arsenal,Wolverhampton Wanderers,1.353529,0.971383,False,0,,,,,,,,,,,,,,,,,,,,,,,9004-99-99,2021,1.0,,,,,,,,,,,,,,,,,,,,,,,,5.0,Leno,0,0.0,7822,6147.0,45.213873,35.531792,184.0,97.0,61.333333,32.333333,316.0,163.0,63.2,32.6,514.0,339.0,51.4,33.9,847.0,822.0,42.35,41.1,0.0,0.0,3992.0,3779.0,41.154639,38.958763,107.0,161.0,35.666667,53.666667,176.0,242.0,35.2,48.4,315.0,468.0,31.5,46.8,714.0,900.0,35.7,45.0,7371.0,90.0,180.0,270.0,450.0,900.0,1800.0,309,3.772894,7.0,7.0,10.0,5.0,16.0,5.333333,29.0,5.8,48.0,4.8,82.0,4.1
103425,Bernd Leno,23,1,90.0,Arsenal,Aston Villa,1.353529,0.747374,False,0,,,,,,,,,,,,,,,,,,,,,,,9018-99-99,2021,1.0,,,,,,,,,,,,,,,,,,,,,,,,5.0,Leno,0,0.0,7822,6147.0,44.954023,35.327586,104.0,76.0,34.666667,25.333333,235.0,148.0,47.0,29.6,498.0,282.0,49.8,28.2,801.0,797.0,40.05,39.85,0.0,0.0,2340.0,2355.0,39.661017,39.915254,100.0,72.0,33.333333,24.0,198.0,164.0,39.6,32.8,463.0,324.0,46.3,32.4,984.0,652.0,49.2,32.6,7461.0,90.0,180.0,270.0,450.0,900.0,1800.0,309,3.727382,0.0,0.0,7.0,3.5,10.0,3.333333,22.0,4.4,46.0,4.6,80.0,4.0


In [12]:
# create dataset with latest lag numbers for all future gameweeks
valid_season = '2021'
valid_gw = next_gw
valid_len = 38 - next_gw + 1

train_predict_df, train_idx, valid_idx = create_lag_train(lag_train_df, 
                                                      cat_vars, cont_vars, 
                                                      player_lag_vars, team_lag_vars, dep_var,
                                                      valid_season, valid_gw, valid_len)

In [13]:
team_lag_vars

['total_points_team_pg_last_all',
 'total_points_team_pg_last_3',
 'total_points_team_pg_last_5',
 'total_points_team_pg_last_10',
 'total_points_team_pg_last_20',
 'total_points_team_conceded_pg_last_all_opponent',
 'total_points_team_conceded_pg_last_3_opponent',
 'total_points_team_conceded_pg_last_5_opponent',
 'total_points_team_conceded_pg_last_10_opponent',
 'total_points_team_conceded_pg_last_20_opponent']

In [14]:
# take a look at the dataframe
train_predict_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 114138 entries, 0 to 114137
Data columns (total 33 columns):
 #   Column                                           Non-Null Count   Dtype   
---  ------                                           --------------   -----   
 0   gw                                               114138 non-null  category
 1   minutes                                          114138 non-null  float64 
 2   minutes_last_1                                   114138 non-null  float64 
 3   minutes_last_10                                  114138 non-null  float64 
 4   minutes_last_2                                   114138 non-null  float64 
 5   minutes_last_20                                  114138 non-null  float64 
 6   minutes_last_3                                   114138 non-null  float64 
 7   minutes_last_5                                   114138 non-null  float64 
 8   minutes_last_all                                 114138 non-null  float64 
 9   oppo

In [15]:
# split out dependent variable
X, y = train_predict_df[cat_vars + cont_vars + player_lag_vars + team_lag_vars].copy(), train_predict_df[dep_var].copy()

In [16]:
# since position is categorical, it should be a string
X['position'] = X['position'].apply(str)

# need to transform season
enc = LabelEncoder()
X['season'] = enc.fit_transform(X['season'])
X_dict = X.to_dict("records")

# Create the DictVectorizer object: dv
dv = DictVectorizer(sparse=False, separator='_')

# Apply dv on df: df_encoded
X_encoded = dv.fit_transform(X_dict)

X_df = pd.DataFrame(X_encoded, columns=dv.feature_names_)

In [17]:
# split out training and prediction sets
X_train = X_df.iloc[train_idx]
y_train = y.iloc[train_idx]
X_test = X_df.iloc[valid_idx]
y_test = y.iloc[valid_idx]

In [18]:
# instatiate and train XGB Regressor
model_params = {"gamma": 0.42,
                "learning_rate": 0.047, 
                "max_depth": 4,
                "n_estimators": 171,
                "subsample": 0.6}

xg_reg = xgb.XGBRegressor(objective="reg:squarederror",
                          gamma=model_params['gamma'], 
                          learning_rate=model_params['learning_rate'], 
                          max_depth=model_params['max_depth'],
                          n_estimators=model_params['n_estimators'],
                          subsample=model_params['subsample'])

xg_reg.fit(X_train, y_train)
preds = xg_reg.predict(X_test)

In [19]:
preds

array([-0.00608754, -0.00489032,  3.4215202 , ...,  0.00814486,
        3.5626278 ,  0.00814486], dtype=float32)

In [20]:
remaining_season_df['predicted_points'] = preds

In [21]:
remaining_season_df.head(50)

Unnamed: 0,gw,team,opponent_team,player,position,price,play_proba,short_name,was_home,relative_market_value_team,relative_market_value_opponent_team,season,minutes,total_points,kickoff_time,predicted_points
5817,22,Arsenal,Wolverhampton Wanderers,Mesut Özil,3,6.7,0.0,Özil,False,1.353529,0.971383,2021,0.0,0,9004-99-99,-0.006088
5818,22,Arsenal,Wolverhampton Wanderers,Sokratis Papastathopoulos,2,4.8,0.0,Sokratis,False,1.353529,0.971383,2021,0.0,0,9004-99-99,-0.00489
5819,22,Arsenal,Wolverhampton Wanderers,David Luiz Moreira Marinho,2,5.4,1.0,David Luiz,False,1.353529,0.971383,2021,90.0,0,9004-99-99,3.42152
5820,22,Arsenal,Wolverhampton Wanderers,Pierre-Emerick Aubameyang,3,11.3,0.5,Aubameyang,False,1.353529,0.971383,2021,43.0,0,9004-99-99,1.677552
5821,22,Arsenal,Wolverhampton Wanderers,Cédric Soares,2,4.6,1.0,Cédric,False,1.353529,0.971383,2021,54.0,0,9004-99-99,0.924389
5822,22,Arsenal,Wolverhampton Wanderers,Alexandre Lacazette,4,8.4,0.75,Lacazette,False,1.353529,0.971383,2021,66.0,0,9004-99-99,3.743907
5823,22,Arsenal,Wolverhampton Wanderers,Shkodran Mustafi,2,5.0,1.0,Mustafi,False,1.353529,0.971383,2021,0.0,0,9004-99-99,0.002651
5824,22,Arsenal,Wolverhampton Wanderers,Bernd Leno,1,5.0,1.0,Leno,False,1.353529,0.971383,2021,90.0,0,9004-99-99,3.6882
5825,22,Arsenal,Wolverhampton Wanderers,Granit Xhaka,3,5.2,1.0,Xhaka,False,1.353529,0.971383,2021,90.0,0,9004-99-99,2.995523
5826,22,Arsenal,Wolverhampton Wanderers,Pablo Marí,2,4.4,0.5,Marí,False,1.353529,0.971383,2021,22.5,0,9004-99-99,1.429808


In [22]:
remaining_season_df[remaining_season_df['gw'] == next_gw].sort_values(by='predicted_points', ascending=False).head(50)

Unnamed: 0,gw,team,opponent_team,player,position,price,play_proba,short_name,was_home,relative_market_value_team,relative_market_value_opponent_team,season,minutes,total_points,kickoff_time,predicted_points
5984,22,Manchester City,Burnley,Raheem Sterling,3,11.6,1.0,Sterling,False,2.391235,0.342623,2021,68.2,0,9009-99-99,7.385611
45,22,Liverpool,Brighton and Hove Albion,Mohamed Salah,3,12.6,1.0,Salah,True,2.50403,0.550097,2021,78.6,0,9001-99-99,6.782213
108,22,Manchester United,Southampton,Bruno Miguel Borges Fernandes,3,11.4,1.0,Fernandes,True,1.679166,0.538366,2021,88.666667,0,9003-99-99,6.690511
40,22,Liverpool,Brighton and Hove Albion,Roberto Firmino,4,9.4,1.0,Firmino,True,2.50403,0.550097,2021,62.8,0,9001-99-99,6.434958
5992,22,Manchester City,Burnley,Phil Foden,3,6.2,1.0,Foden,False,2.391235,0.342623,2021,71.166667,0,9009-99-99,6.312886
109,22,Manchester United,Southampton,Anthony Martial,4,8.7,1.0,Martial,True,1.679166,0.538366,2021,80.75,0,9003-99-99,6.244448
5979,22,Manchester City,Burnley,Ilkay Gündogan,3,5.8,1.0,Gündogan,False,2.391235,0.342623,2021,78.2,0,9009-99-99,6.124458
50,22,Liverpool,Brighton and Hove Albion,Trent Alexander-Arnold,2,7.3,1.0,Alexander-Arnold,True,2.50403,0.550097,2021,87.2,0,9001-99-99,6.029211
7,22,Tottenham Hotspur,Chelsea,Heung-Min Son,3,9.8,1.0,Son,True,1.662924,1.81294,2021,90.0,0,9000-99-99,5.72269
5688,22,Chelsea,Tottenham Hotspur,César Azpilicueta,2,5.6,1.0,Azpilicueta,False,1.81294,1.662924,2021,72.0,0,9000-99-99,5.658591


In [23]:
# rename previous week's predictions file
# generate previous week's filename
last_gw = next_gw - 1
filename = 'history/2020-21/predictions_gw' + str(last_gw) + '.csv'
# windows
! move predictions.csv $filename
# linux
# ! mv predictions.csv $filename

        1 file(s) moved.


In [24]:
# write to predictions.csv
## RENAME PREVIOUS WEEK FIRST
predictions = remaining_season_df[['gw', 'player', 'team', 'opponent_team', 
                                   'position', 'price', 'play_proba', 'short_name',
                                   'was_home', 'predicted_points', 'minutes']]
predictions.to_csv('predictions.csv')