In [3]:
import numpy as np
import pandas as pd
import seaborn as sns
import scipy

import matplotlib.pyplot as plt
import matplotlib.ticker as ticker

import plotly.express as px

from datetime import datetime
from scipy import stats

#Set Display options
pd.set_option('display.max_rows', 50)
pd.set_option('display.max_columns', 50)

In [4]:
#Pick Season
season = "2021-22"

#Import Files
all_players = pd.read_csv (r'./data/' + season + '/Merged_data/all_players_merged.csv', parse_dates= ["news_added"])
#all_teams = pd.read_csv (r'./data/' + season + '/Cleaned_data/all_teams.csv')
all_events = pd.read_csv (r'./data/' + season + '/Cleaned_data/all_events.csv', parse_dates= ["deadline_time"])
player_types = pd.read_csv(r'./data/' + season + '/Cleaned_data/player_types.csv')
game_phases = pd.read_csv(r'./data/' + season + '/Cleaned_data/game_phases.csv')

player_season_history = pd.read_csv(r'./data/' + season + '/Cleaned_data/player_season_history.csv')
player_gameweek_history = pd.read_csv(r'./data/' + season + '/Cleaned_data/player_gameweek_history.csv')
#player_gameweek_history = pd.read_csv(r'./data/2019-20/player_past_history/player_gameweek_history_raw.csv')
player_future_fixture = pd.read_csv(r'./data/' + season + '/Cleaned_data/player_future_fixture.csv', parse_dates = ["kickoff_time"])



In [5]:
player_future_fixture.head()

Unnamed: 0,fixture_id,fixture_code,home_team_id,team_h_score,away_team_id,team_a_score,event,finished,minutes,provisional_start_time,kickoff_time,event_name,is_home,difficulty,player_id,opponent_team_id,team_id
0,191,2210461,1,,20,,26.0,False,0,False,2022-02-24 19:45:00,Gameweek 26,True,3,1,20,1
1,279,2210549,18,,1,,28.0,False,0,False,2022-03-06 14:00:00,Gameweek 28,False,2,1,18,1
2,281,2210551,1,,9,,29.0,False,0,False,2022-03-13 16:30:00,Gameweek 29,True,3,1,9,1
3,261,2210531,1,,11,,29.0,False,0,False,2022-03-16 20:15:00,Gameweek 29,True,4,1,11,1
4,291,2210561,2,,1,,30.0,False,0,False,2022-03-19 12:30:00,Gameweek 30,False,3,1,2,1


In [6]:
def player_future(player_id):
    data = player_future_fixture[player_future_fixture.player_id == player_id]

    player = all_players[all_players.player_id == player_id]
    team = data.team_id.iloc[0]
    goals_scored = player.iloc[0].goals_scored
    player_name = player.iloc[0]['web_name']

    fig = px.line(
        data, 
        x = 'event_name', 
        y = 'difficulty', 
        title = player_name,
        hover_data=['opponent_team_id'])

    fig.show()

In [7]:
player_future(150)

In [8]:
    opponent_strength = player_future_fixture['difficulty']
    opponent_strength
    #opponent_strength = player_analysis.iloc[:,2]

0       3
1       2
2       3
3       4
4       3
       ..
9660    2
9661    2
9662    2
9663    3
9664    3
Name: difficulty, Length: 9665, dtype: int64

In [9]:
def get_SMA(player_id , count):
    data = player_future_fixture[player_future_fixture.player_id == player_id]
    opponent_strength = data['difficulty']
    # List to store moving average results
    SMA2 = list(range(0, len(opponent_strength) - count))

    # Compute moving average
    for ii in range(len(SMA2)):
        # Previous gw index
        index = range(ii, ii + count)
        # Get data for previous gw
        prev_gw2 = opponent_strength.iloc[index]

        # Sum previous gw
        summation2 = np.sum(prev_gw2)

        # Get average
        avg2 = summation2/count
        # Save results to list
        SMA2[ii] = avg2
         
    # Define column label
    label2 = f"Opponent strength SMA"

    
    # Get corresponding gw for moving_avg
    gameweeks2 = opponent_strength.index[:-count]
    gameweek = data.event_name[:-count]
    
    # Convert list into Pandas Series
    SMA2 = pd.Series(SMA2, name = label2, index = gameweek)
    
    
    return SMA2

In [10]:
def player_future_avg(player_id , no_games):
    data = get_SMA(player_id , no_games)

    player = all_players[all_players.player_id == player_id]

    player_name = player.iloc[0]['web_name']
    title = "Average difficulty next " + str(no_games) + " games for " + str(player_name)

    fig = px.line(
        data, 
        title = title)

    fig.show()

In [22]:
player_future_avg(100 , 8)

In [12]:
result = get_SMA(150,4)
result

event_name
Gameweek 26    3.25
Gameweek 27    3.25
Gameweek 28    3.50
Gameweek 29    3.25
Gameweek 30    2.50
Gameweek 31    2.50
Gameweek 32    2.25
Gameweek 33    2.00
Gameweek 34    2.25
Name: Opponent strength SMA, dtype: float64

## FANTASY PLAYER VALUE

In [13]:
all_players.columns

Index(['chance_of_playing_next_round', 'chance_of_playing_this_round',
       'player_code', 'cost_change_event', 'cost_change_event_fall',
       'cost_change_start', 'cost_change_start_fall', 'dreamteam_count',
       'player_type_id', 'ep_next', 'ep_this', 'event_points', 'first_name',
       'form', 'player_id', 'in_dreamteam', 'news', 'news_added', 'now_cost',
       'photo', 'PPG', 'second_name', 'selected_by_percent', 'status',
       'team_id', 'team_code', 'total_points', 'transfers_in',
       'transfers_in_event', 'transfers_out', 'transfers_out_event',
       'value_form', 'value_season', 'web_name', 'total_minutes',
       'goals_scored', 'assists', 'clean_sheets', 'goals_conceded',
       'own_goals', 'penalties_saved', 'penalties_missed', 'yellow_cards',
       'red_cards', 'saves', 'bonus', 'bps', 'influence', 'creativity',
       'threat', 'ict_index', 'influence_rank', 'influence_rank_type',
       'creativity_rank', 'creativity_rank_type', 'threat_rank',
       'thre

In [14]:
top_players = all_players[['web_name' , 'now_cost', 'status' , 'player_type_id', 'total_points' , 'total_minutes' , 'PP90' , 'PPG' , 'PPMM' , 'VAPM']][all_players['total_minutes']>100][all_players['status'] != "i"].sort_values(by='VAPM' , ascending = False).head(60)
top_players.head(20)


Boolean Series key will be reindexed to match DataFrame index.



Unnamed: 0,web_name,now_cost,status,player_type_id,total_points,total_minutes,PP90,PPG,PPMM,VAPM
599,Stones,52,a,2,45,591,6.852792,5.6,1.076923,0.692308
257,Torres,67,u,3,26,303,7.722772,6.5,0.970149,0.671642
535,Sarr,49,a,2,21,343,5.510204,5.2,1.061224,0.653061
475,Tierney,51,a,2,90,1466,5.525239,5.3,1.039216,0.647059
3,Ramsdale,51,a,1,103,1800,5.15,5.2,1.019608,0.627451
557,Branthwaite,40,a,2,9,145,5.586207,4.5,1.125,0.625
587,Alexander-Arnold,85,a,2,156,1953,7.18894,7.1,0.835294,0.6
26,Arrizabalaga,47,a,1,19,360,4.75,4.8,1.021277,0.595745
476,Gabriel,53,a,2,99,1746,5.103093,5.0,0.943396,0.566038
601,Cancelo,71,a,2,142,2147,5.952492,5.9,0.830986,0.549296


In [15]:
top_players[top_players['player_type_id'] == 1].head(3)

Unnamed: 0,web_name,now_cost,status,player_type_id,total_points,total_minutes,PP90,PPG,PPMM,VAPM
3,Ramsdale,51,a,1,103,1800,5.15,5.2,1.019608,0.627451
26,Arrizabalaga,47,a,1,19,360,4.75,4.8,1.021277,0.595745
80,Sá,52,a,1,112,2160,4.666667,4.7,0.903846,0.519231


In [16]:
top_players[top_players['player_type_id'] == 2].head(5)

Unnamed: 0,web_name,now_cost,status,player_type_id,total_points,total_minutes,PP90,PPG,PPMM,VAPM
599,Stones,52,a,2,45,591,6.852792,5.6,1.076923,0.692308
535,Sarr,49,a,2,21,343,5.510204,5.2,1.061224,0.653061
475,Tierney,51,a,2,90,1466,5.525239,5.3,1.039216,0.647059
557,Branthwaite,40,a,2,9,145,5.586207,4.5,1.125,0.625
587,Alexander-Arnold,85,a,2,156,1953,7.18894,7.1,0.835294,0.6


In [17]:
top_players[top_players['player_type_id'] == 3].head(5)

Unnamed: 0,web_name,now_cost,status,player_type_id,total_points,total_minutes,PP90,PPG,PPMM,VAPM
257,Torres,67,u,3,26,303,7.722772,6.5,0.970149,0.671642
353,Bowen,71,a,3,149,2218,6.045987,5.7,0.802817,0.521127
114,Coutinho,73,a,3,29,352,7.414773,5.8,0.794521,0.520548
90,Smith Rowe,58,a,3,103,1373,6.751639,4.9,0.844828,0.5
248,Luis Díaz,80,a,3,12,178,6.067416,6.0,0.75,0.5


In [18]:
top_players[top_players['player_type_id'] == 4].head(3)

Unnamed: 0,web_name,now_cost,status,player_type_id,total_points,total_minutes,PP90,PPG,PPMM,VAPM
403,Weghorst,65,a,4,21,341,5.542522,5.2,0.8,0.492308
461,Dennis,61,a,4,105,1728,5.46875,4.8,0.786885,0.459016


In [19]:
price_gk = top_players['now_cost'][top_players['player_type_id'] == 1].head(2).sum()
price_def = top_players['now_cost'][top_players['player_type_id'] == 2].head(5).sum()
price_mid = top_players['now_cost'][top_players['player_type_id'] == 3].head(5).sum()
price_fwd = top_players['now_cost'][top_players['player_type_id'] == 4].head(3).sum()


team_cost = price_gk + price_def + price_mid + price_fwd
print("Price for the best team is " + str(np.divide(team_cost,10)))

Price for the best team is 85.0


In [20]:
def player_value():

    fig = px.scatter(
        top_players.head(30), 
        x = 'now_cost', 
        y = 'VAPM',
        #size = 'minutes',
        color = 'total_points',
        text = 'web_name',
        hover_data=['PPG'])

    fig.show()

In [21]:
player_value()