In [1]:
import warnings
from io import BytesIO
from pathlib import Path
from urllib.parse import urlparse
from urllib.request import urlopen, urlretrieve
from zipfile import ZipFile, is_zipfile

import pandas as pd
import socceraction.vaep.features as features
import socceraction.vaep.labels as labels
from sklearn.metrics import brier_score_loss, roc_auc_score
from socceraction.spadl.wyscout import convert_to_spadl
from socceraction.vaep.formula import value
from tqdm.notebook import tqdm
from xgboost import XGBClassifier

In [2]:
warnings.filterwarnings('ignore', category=pd.io.pytables.PerformanceWarning)

In [3]:
data_files = {
    'events': 'https://ndownloader.figshare.com/files/14464685',  # ZIP file containing one JSON file for each competition
    'matches': 'https://ndownloader.figshare.com/files/14464622',  # ZIP file containing one JSON file for each competition
    'players': 'https://ndownloader.figshare.com/files/15073721',  # JSON file
    'teams': 'https://ndownloader.figshare.com/files/15073697'  # JSON file
}

In [4]:
#Download data and extract it
for url in tqdm(data_files.values()):
    url_s3 = urlopen(url).geturl()
    path = Path(urlparse(url_s3).path)
    file_name = path.name
    file_local, _ = urlretrieve(url_s3, file_name)
    if is_zipfile(file_local):
        with ZipFile(file_local) as zip_file:
            zip_file.extractall()

HBox(children=(IntProgress(value=0, max=4), HTML(value='')))




## Preprocess the Wyscout data

The `read_json_file` function reads and returns the content of a given JSON file. The function handles the encoding of special characters (e.g., accents in names of players and teams) that the `pd.read_json` function cannot handle properly.

In [5]:
def read_json_file(filename):
    with open(filename, 'rb') as json_file:
        return BytesIO(json_file.read()).getvalue().decode('unicode_escape')

### Teams and Players (json to h5)

In [12]:
def teams_players_j_h5(result_file,folder_path=''):
    json_teams = read_json_file(folder_path+'teams.json')
    json_players = read_json_file(folder_path+'players.json')

    df_teams = pd.read_json(json_teams)
    df_players = pd.read_json(json_players)

    df_teams.to_hdf(folder_path+result_file, key='teams', mode='w')
    df_players.to_hdf(folder_path+result_file, key='players', mode='a')
    print('Number of Teams : ',len(df_teams))
    print('Number of Players : ',len(df_players))
    print("teams_players_j_h5 done it's work")

Matches of a specific competitions from json to h5

In [17]:
def matches_j_h5(result_file,competitions,folder_path=''):
    dfs_matches = []
    for competition in competitions:
        competition_name = competition.replace(' ', '_')
        file_matches = f'matches_{competition_name}.json'
        json_matches = read_json_file(file_matches)
        df_matches = pd.read_json(json_matches)
        dfs_matches.append(df_matches)
    df_matches = pd.concat(dfs_matches)
    df_matches.to_hdf(folder_path+result_file, key='matches', mode='a')
    print('Number of Matches : ',len(df_matches))
    print("matches_j_h5 done it's work")

### Events of a specific competitions - assign events of matches to  `events/match_<match-id>` in h5

In [18]:
def events_j_h5(result_file,competitions,folder_path=''):
    for competition in competitions:
        competition_name = competition.replace(' ', '_')
        file_events = folder_path+f'events_{competition_name}.json'
        json_events = read_json_file(file_events)
        df_events = pd.read_json(json_events)
        df_events_matches = df_events.groupby('matchId', as_index=False)
        for match_id, df_events_match in df_events_matches:
            df_events_match.to_hdf(folder_path+result_file, key=f'events/match_{match_id}', mode='a')
        print('Number of Events : ',len(df_events))
   
    print("events_j_h5 done it's work")  

Competitions

In [140]:
competitions = [
#     'England',
#     'France',
#     'Germany',
#     'Italy',
#     'Spain',
    'European Championship',
    #'World Cup'
]

## Convert the data to the SPADL representation

In [141]:
def convert_h5_spadl(result_file,competitions,folder_path=''):
    teams_players_j_h5(result_file)
    matches_j_h5(result_file,competitions)
    events_j_h5(result_file,competitions)
    convert_to_spadl(folder_path+result_file, folder_path+ 'spadl_'+result_file)
    print("convert_h5_spadl done it's work")  

# result file is for each competition and the same name with pre 'spadl_' is the output file 
current_competition = competitions[0].replace(' ', '_')
result_file = current_competition+'.h5'
result_file_spadl = 'spadl_'+result_file

convert_h5_spadl(result_file,competitions)    

Number of Teams :  142
Number of Players :  3603
teams_players_j_h5 done it's work
Number of Matches :  51
matches_j_h5 done it's work
Number of Events :  78140
events_j_h5 done it's work
...Inserting actiontypes
...Inserting bodyparts
...Inserting results
...Converting games
...Converting players
...Converting teams
...Generating player_games


100%|████████████████████████████████████████████████████████████████████████████████| 51/51 [00:03<00:00, 15.68game/s]


...Converting events to actions


100%|████████████████████████████████████████████████████████████████████████████████| 51/51 [00:23<00:00,  2.15game/s]


convert_h5_spadl done it's work


حاول تفهم الجزء بتاع التحويل ده بشكل افضل 


# Value game states

This section generates features and labels for the game states, trains a predictive machine learning model for each label, and values the game states by applying the trained machine learning models.

1. Generate the features to describe the game states;
2. Generate the labels that capture the value of the game states;
3. Compose a dataset by selecting a set of features and the labels of the game states;
4. Train predictive machine learning models using the dataset;
5. Value the game states using the trained predictive machine learning model.


In [142]:
def read_spadl_keys(result_file=result_file,folder_path=''):
    df_games = pd.read_hdf(folder_path+result_file, key='games')
    df_actiontypes = pd.read_hdf(folder_path+result_file, key='actiontypes')
    df_bodyparts = pd.read_hdf(folder_path+result_file, key='bodyparts')
    df_results = pd.read_hdf(folder_path+result_file, key='results')
    print("read_spadl_keys done it's work")
    return df_games,df_actiontypes,df_bodyparts,df_results
df_games,df_actiontypes,df_bodyparts,df_results = read_spadl_keys(result_file_spadl)

read_spadl_keys done it's work


In [143]:
nb_prev_actions = 3

## Generate game state features

In [144]:
functions_features = [
    features.actiontype_onehot,
    features.bodypart_onehot,
    features.result_onehot,
    features.goalscore,
    features.startlocation,
    features.endlocation,
    features.movement,
    features.space_delta,
    features.startpolar,
    features.endpolar,
    features.team,
    features.time_delta
]

The following cell generates game states from consecutive actions in each game and computes the features for each game state.

1. Obtain the actions for the game (i.e., `df_actions`) by looping through the games;
2. Construct game states of a given length from the actions (i.e., `dfs_gamestates`);
3. Compute the features for the constructed game states (i.e., `df_features`) by looping through the list of *feature generators*.

In [145]:
def generate_features(df_games=df_games,df_actiontypes=df_actiontypes,df_bodyparts=df_bodyparts,df_results=df_results,
                    folder_path='',result_file_spadl=result_file_spadl,current_competition=current_competition):
    for _, game in tqdm(df_games.iterrows(), total=len(df_games)):
        game_id = game['game_id']
        df_actions = pd.read_hdf(folder_path+result_file_spadl, key=f'actions/game_{game_id}')
        df_actions = (df_actions
            .merge(df_actiontypes, how='left')
            .merge(df_results, how='left')
            .merge(df_bodyparts, how='left')
            .reset_index(drop=True)
        )
        
        dfs_gamestates = features.gamestates(df_actions, nb_prev_actions=nb_prev_actions)
        dfs_gamestates = features.play_left_to_right(dfs_gamestates, game['home_team_id'])
        
        df_features = pd.concat([function(dfs_gamestates) for function in functions_features], axis=1)
        df_features.to_hdf(folder_path+current_competition+'_features.h5', key=f'game_{game_id}')
    print('features is generated for competition : '+current_competition )
generate_features()

HBox(children=(IntProgress(value=0, max=51), HTML(value='')))


features is generated for competition : European_Championship


## Generate game state labels

In [146]:
functions_labels = [
    labels.scores,
    labels.concedes
]

The following cell computes the labels for each action:

In [147]:
def generate_labels(df_games=df_games,df_actiontypes=df_actiontypes,df_bodyparts=df_bodyparts,df_results=df_results,
                    folder_path='',result_file_spadl=result_file_spadl,current_competition=current_competition):
        for _, game in tqdm(df_games.iterrows(), total=len(df_games)):
            game_id = game['game_id']
            df_actions = pd.read_hdf(folder_path+result_file_spadl, key=f'actions/game_{game_id}')
            df_actions = (df_actions
                .merge(df_actiontypes, how='left')
                .merge(df_results, how='left')
                .merge(df_bodyparts, how='left')
                .reset_index(drop=True)
            )
            
            df_labels = pd.concat([function(df_actions) for function in functions_labels], axis=1)
            df_labels.to_hdf(folder_path+current_competition+'_labels.h5', key=f'game_{game_id}')
        print('labels file is generated for competition : '+current_competition )

generate_labels()

HBox(children=(IntProgress(value=0, max=51), HTML(value='')))


labels file is generated for competition : European_Championship


## Generate dataset

The following cell generates a list of names for the features to be included in the dataset.

In [148]:
columns_features = features.feature_column_names(functions_features, nb_prev_actions=nb_prev_actions)

The following cell obtains the relevant features for each game and stores them in the `df_features` `DataFrame` object.

In [149]:
columns_labels = [
    'scores',
    'concedes'
]
def generate_dataset(folder_path='',result_file_spadl=result_file_spadl,current_competition=current_competition):
    dfs_features = []
    dfs_labels = []
    for _, game in tqdm(df_games.iterrows(), total=len(df_games)):
        game_id = game['game_id']
        df_features = pd.read_hdf(folder_path+current_competition+'_features.h5', key=f'game_{game_id}')
        dfs_features.append(df_features[columns_features])
    df_features = pd.concat(dfs_features).reset_index(drop=True)

    for _, game in tqdm(df_games.iterrows(), total=len(df_games)):
        game_id = game['game_id']
        df_labels = pd.read_hdf(folder_path+current_competition+'_labels.h5', key=f'game_{game_id}')
        dfs_labels.append(df_labels[columns_labels])
    df_labels = pd.concat(dfs_labels).reset_index(drop=True)
    print('dataset is generated for competition : '+current_competition )
    return df_features,df_labels
df_features,df_labels=generate_dataset()

HBox(children=(IntProgress(value=0, max=51), HTML(value='')))




HBox(children=(IntProgress(value=0, max=51), HTML(value='')))


dataset is generated for competition : European_Championship


In [150]:
df_features.head(10)

Unnamed: 0,type_pass_a0,type_cross_a0,type_throw_in_a0,type_freekick_crossed_a0,type_freekick_short_a0,type_corner_crossed_a0,type_corner_short_a0,type_take_on_a0,type_foul_a0,type_tackle_a0,...,end_dist_to_goal_a0,end_angle_to_goal_a0,end_dist_to_goal_a1,end_angle_to_goal_a1,end_dist_to_goal_a2,end_angle_to_goal_a2,team_1,team_2,time_delta_1,time_delta_2
0,True,False,False,False,False,False,False,False,False,False,...,80.158369,0.187731,80.158369,0.187731,80.158369,0.187731,True,True,0.0,0.0
1,True,False,False,False,False,False,False,False,False,False,...,78.371487,0.35444,80.158369,0.187731,80.158369,0.187731,True,True,2.191167,2.191167
2,True,False,False,False,False,False,False,False,False,False,...,71.690861,0.258985,78.371487,0.35444,80.158369,0.187731,True,True,2.557667,4.748834
3,True,False,False,False,False,False,False,False,False,False,...,85.321753,0.176247,71.690861,0.258985,78.371487,0.35444,True,True,1.044167,3.601834
4,True,False,False,False,False,False,False,False,False,False,...,79.592694,0.357874,85.321753,0.176247,71.690861,0.258985,True,True,3.048458,4.092625
5,False,False,True,False,False,False,False,False,False,False,...,23.51977,0.895657,41.285553,0.741367,25.783747,0.618987,False,False,13.76709,16.815548
6,True,False,False,False,False,False,False,False,False,False,...,31.518001,1.08558,23.51977,0.895657,41.285553,0.741367,True,False,1.193161,14.960251
7,False,False,False,False,False,False,False,False,False,False,...,89.647658,0.388999,94.506002,0.299464,92.147597,0.200588,False,False,3.420165,4.613326
8,False,False,True,False,False,False,False,False,False,False,...,25.144733,1.045928,40.524098,0.995456,31.518001,1.08558,False,True,10.269111,13.689276
9,True,False,False,False,False,False,False,False,False,False,...,36.732596,0.890698,25.144733,1.045928,40.524098,0.995456,True,False,1.401809,11.67092


In [151]:
# df_features.to_csv('main.csv', index=False)

In [152]:
df_labels.head(10)

Unnamed: 0,scores,concedes
0,False,False
1,False,False
2,False,False
3,False,False
4,False,False
5,False,False
6,False,False
7,False,False
8,False,False
9,False,False


## Train classifiers

The following cell trains an XGBoost classifier for each label using the computed features. For each label:
1. Construct an XGBoost classifier with default hyperparameters;
2. Train the classifier using the computed features and the label;
3. Store the trained classifier in the `models` `dict`.

In [153]:
%%time
def train_model(df_features=df_features,df_labels=df_labels,column_labels=['scores','concedes']):
    models = {}
    for column_labels in columns_labels:
        model = XGBClassifier(
            eval_metric='logloss',
            use_label_encoder=False
        )
        model.fit(df_features, df_labels[column_labels])
        models[column_labels] = model
    print('train_model is done!')
    return models
models = train_model()

train_model is done!
Wall time: 31.2 s


## Estimate probabilities

The following cell predicts the labels for the game states using the trained XGBoost classifier. For each label:
1. Retrieve the model for the label;
2. Estimate the probabilities of the labels being `False` and `True` given the computed features;
3. Keep the probabilities for the `True` label;
4. Store the probabilities as a `Series` object in the `dfs_predictions` `dict`.

In [154]:
def predict_model(models=models, columns_label=columns_labels, df_features=df_features):
    dfs_predictions = {}
    for column_labels in columns_labels:
        model = models[column_labels]
        probabilities = model.predict_proba(df_features)
        predictions = probabilities[:, 1]
        dfs_predictions[column_labels] = pd.Series(predictions)
    df_predictions = pd.concat(dfs_predictions, axis=1)
    print('predict_model is done!')
    return df_predictions


df_predictions = predict_model()


predict_model is done!


In [155]:
df_predictions.head(10)

Unnamed: 0,scores,concedes
0,0.000169,0.000454
1,0.00054,0.000671
2,0.000685,0.001587
3,0.002187,0.000697
4,0.002197,0.00034
5,0.004104,3e-05
6,0.002794,3.5e-05
7,0.000152,0.00472
8,0.002308,9.9e-05
9,0.001126,4.8e-05


Store the predictions per game.

In [156]:
def store_predictions_per_game(df_predictions=df_predictions,folder_path='',result_file_spadl=result_file_spadl,current_competition=current_competition):
    dfs_game_ids = []
    for _, game in tqdm(df_games.iterrows(), total=len(df_games)):
        game_id = game['game_id']
        df_actions = pd.read_hdf(folder_path+result_file_spadl, key=f'actions/game_{game_id}')
        dfs_game_ids.append(df_actions['game_id'])
    df_game_ids = pd.concat(dfs_game_ids, axis=0).astype('int').reset_index(drop=True)
    df_predictions = pd.concat([df_predictions, df_game_ids], axis=1)
    df_predictions_per_game = df_predictions.groupby('game_id')
    for game_id, df_predictions in tqdm(df_predictions_per_game):
        df_predictions = df_predictions.reset_index(drop=True)
        df_predictions[columns_labels].to_hdf(folder_path+current_competition+'_predictions.h5', key=f'game_{game_id}')
    return df_predictions
df_predictions = store_predictions_per_game()

HBox(children=(IntProgress(value=0, max=51), HTML(value='')))




HBox(children=(IntProgress(value=0, max=51), HTML(value='')))




In [157]:
df_predictions.head(10)

Unnamed: 0,scores,concedes,game_id
0,0.000169,0.000454,1694440
1,0.00054,0.000671,1694440
2,0.000685,0.001587,1694440
3,0.002187,0.000697,1694440
4,0.002197,0.00034,1694440
5,0.004104,3e-05,1694440
6,0.002794,3.5e-05,1694440
7,0.000152,0.00472,1694440
8,0.002308,9.9e-05,1694440
9,0.001126,4.8e-05,1694440


# Value on-the-ball actions

In [158]:
def calculate_values(df_predictions=df_predictions,folder_path='',result_file_spadl=result_file_spadl,current_competition=current_competition):
    df_players = pd.read_hdf(folder_path+result_file_spadl, key='players')
    df_teams = pd.read_hdf(folder_path+result_file_spadl, key='teams')
    dfs_values = []
    for _, game in tqdm(df_games.iterrows(), total=len(df_games)):
        game_id = game['game_id']
        df_actions = pd.read_hdf(folder_path+result_file_spadl, key=f'actions/game_{game_id}')
        df_actions = (df_actions
            .merge(df_actiontypes, how='left')
            .merge(df_results, how='left')
            .merge(df_bodyparts, how='left')
            .merge(df_players, how='left')
            .merge(df_teams, how='left')
            .reset_index(drop=True)
        )
        
        df_predictions = pd.read_hdf(folder_path+current_competition+'_predictions.h5', key=f'game_{game_id}')
        df_values = value(df_actions, df_predictions['scores'], df_predictions['concedes'])
        
        df_all = pd.concat([df_actions, df_predictions, df_values], axis=1)
        dfs_values.append(df_all)
    df_values = (pd.concat(dfs_values)
        .sort_values(['game_id', 'period_id', 'time_seconds'])
        .reset_index(drop=True)
    )
    df_values.to_csv(folder_path+current_competition+'_values.csv', index=False)
    return df_values
df_values=calculate_values()

HBox(children=(IntProgress(value=0, max=51), HTML(value='')))




In [159]:
df_values[
    ['short_name', 'scores', 'concedes', 'offensive_value', 'defensive_value', 'vaep_value']
].head(10)

Unnamed: 0,short_name,scores,concedes,offensive_value,defensive_value,vaep_value
0,O. Giroud,0.001204,2.3e-05,0.0,-0.0,0.0
1,A. Griezmann,0.001902,5.3e-05,0.000699,-3e-05,0.000669
2,N. Kanté,0.000619,0.000156,-0.001284,-0.000104,-0.001387
3,L. Koscielny,0.001191,0.000832,0.000572,-0.000676,-0.000103
4,P. Evra,0.001004,0.000136,0.001004,-0.000136,0.000868
5,C. Săpunaru,0.0002,0.000455,6.3e-05,0.000549,0.000612
6,C. Săpunaru,4.8e-05,0.018807,-0.000152,-0.018352,-0.018504
7,B. Matuidi,0.004314,0.000202,-0.014493,-0.000154,-0.014648
8,C. Tătărușanu,0.005392,0.0007,0.00519,0.003614,0.008804
9,C. Tătărușanu,0.004101,0.001737,-0.001292,-0.001037,-0.002329


# Rate players

In [184]:
result_file_spadl

'spadl_European_Championship.h5'

In [203]:
dfs=[    
    'England_values.csv',
    'France_values.csv',
    'Germany_values.csv',
    'Italy_values.csv',
    'Spain_values.csv'
]
lst=[]
for i in range(len(dfs)):
    lst.append(pd.read_csv(dfs[i]))
    

In [3]:
competitions_leagues = [
    'England',
    'France',
    'Germany',
    'Italy',
    'Spain'
]
spadle=[    
    'spadl_England.h5',
    'spadl_France.h5',
    'spadl_Germany.h5',
    'spadl_Italy.h5',
    'spadl_Spain.h5'
]


## Rate according to total VAEP value

In [331]:
def calculate_ranking(folder_path='',result_file_spadl=result_file_spadl,current_competition=current_competition,df_values=df_values):
    df_ranking = (df_values[['player_id', 'team_name', 'short_name', 'vaep_value']]
        .groupby(['player_id', 'team_name', 'short_name'])
        .agg(vaep_count=('vaep_value', 'count'), vaep_sum=('vaep_value', 'sum'))
        .sort_values('vaep_sum', ascending=False)
        .reset_index()
    )
    df_player_games = pd.read_hdf(folder_path+result_file_spadl, 'player_games')
    #df_games
    df_games,df_actiontypes,df_bodyparts,df_results = read_spadl_keys(result_file_spadl)
    
    df_player_games = df_player_games[df_player_games['game_id'].isin(df_games['game_id'])]
    #calcualte minutes played for each player
    df_minutes_played = (df_player_games[['player_id', 'minutes_played']]
    .groupby('player_id')
    .sum()
    .reset_index())
    df_ranking_p90 = df_ranking.merge(df_minutes_played)
    
    if current_competition in competitions_leagues:
        df_ranking_p90 = df_ranking_p90[df_ranking_p90['minutes_played'] > 1800]
    else:
        df_ranking_p90 = df_ranking_p90[df_ranking_p90['minutes_played'] > 450]
      
    df_ranking_p90['vaep_rating'] = df_ranking_p90['vaep_sum'] * 90 / df_ranking_p90['minutes_played']
    df_ranking_p90 = df_ranking_p90.sort_values('vaep_rating', ascending=False)
    df_ranking_p90.to_csv(folder_path+current_competition+'_ranking_p90.csv', index=False)
    return df_ranking_p90
df_ranking_p90 = calculate_ranking()

read_spadl_keys done it's work


In [332]:
for i in range(len(competitions_leagues)):
    df=calculate_ranking(result_file_spadl=spadle[i],current_competition=competitions[i],df_values=lst[i])

read_spadl_keys done it's work
read_spadl_keys done it's work
read_spadl_keys done it's work
read_spadl_keys done it's work
read_spadl_keys done it's work


In [333]:
df_ranking_p90.head(10)

Unnamed: 0,player_id,team_name,short_name,vaep_count,vaep_sum,minutes_played,vaep_rating
0,8278.0,Wales,G. Bale,347,3.721007,551.009829,0.607776
5,25714.0,France,D. Payet,438,2.458752,516.544493,0.4284
3,3682.0,France,A. Griezmann,347,2.536231,538.531925,0.423858
2,70134.0,Portugal,Rui Patrício,252,2.939384,653.390335,0.40488
4,14723.0,Germany,T. Kroos,751,2.500215,559.043131,0.402508
7,26010.0,France,O. Giroud,163,1.948772,459.977108,0.381301
6,7926.0,Portugal,Nani,319,2.022372,629.940981,0.288937
14,14716.0,Germany,J. Boateng,532,1.544859,487.526462,0.285189
17,37762.0,Portugal,William Carvalho,397,1.477281,468.631873,0.28371
18,28907.0,Portugal,Raphaël Guerreiro,407,1.421805,467.415754,0.273766


# Dashboard 

In [4]:
competitions = [
    'England',
    'France',
    'Germany',
    'Italy',
    'Spain',
    'European_Championship',
    'World_Cup'
]

In [5]:
import pandas as pd

def preprare_data(competitions=competitions):
        folder_path = ''
        competitions_ranking = {}
        competitions_values = {}
        competitions_vaep_rating = {}
        competitions_player_list = {}
        for i in range(len(competitions)):
                competitions_ranking[competitions[i]] = pd.read_csv(folder_path+competitions[i]+'_ranking_p90.csv')
                competitions_values[competitions[i]] = pd.read_csv(folder_path+competitions[i]+'_values.csv')
                competitions_vaep_rating[competitions[i]] = competitions_ranking[competitions[i]]['vaep_rating'][:10].to_list()
                competitions_player_list[competitions[i]] = competitions_ranking[competitions[i]]['short_name'][:10].to_list()
        return competitions_ranking,competitions_values,competitions_vaep_rating,competitions_player_list 
competitions_ranking,competitions_values,competitions_vaep_rating,competitions_player_list = preprare_data()


In [6]:
import plotly.graph_objects as go
import dash
import dash_html_components as html
import dash_core_components as dcc
import dash_bootstrap_components as dbc
from dash.dependencies import Input, Output
import plotly.express as px

In [7]:
layout = go.Layout(
  margin=go.layout.Margin(
        l=2, # left margin
        r=0, # right margin
        b=0, # bottom margin
        t=0, # top margin
    ),paper_bgcolor='#002b36',plot_bgcolor='#002b36'
)

In [8]:
def fig_bars(competitions=competitions,competitions_player_list=competitions_player_list,competitions_vaep_rating=competitions_vaep_rating):
    bar_figures = {}
    for i in range(len(competitions)):
        fig = px.line(x=competitions_player_list[competitions[i]], y=competitions_vaep_rating[competitions[i]], color=px.Constant(""),
                    labels=dict(x="Players", y="Vaep Rating", color=competitions[i]))
        fig.add_bar(x=competitions_player_list[competitions[i]], y=competitions_vaep_rating[competitions[i]], name="Player Rating")
        fig.update_layout(layout,font_color='white')
        fig.update_traces(marker_color='#a9bdbd', selector=dict(type='bar'))
        bar_figures[competitions[i]] = fig
    return bar_figures

bar_figures = fig_bars()
         
                                                                                         

In [8]:
bar_figures['England'].show()   

In [9]:
competitions_leagues = [
    'England',
    'France',
    'Germany',
    'Italy',
    'Spain'
]
spadle=[    
    'spadl_England.h5',
    'spadl_France.h5',
    'spadl_Germany.h5',
    'spadl_Italy.h5',
    'spadl_Spain.h5'
]


In [10]:
#prepare data for summary boxes --- using competitions_values


def competition_summary(competitions=competitions,competitions_values=competitions_values):
    total_goals = {}
    shots_per_goal = {}
    Average_goals_per_match ={}
    Average_fouls_per_match = {}
    for i in range(len(competitions)):
        shot_goals=sum((competitions_values[competitions[i]][competitions_values[competitions[i]]['type_name']=='shot']['result_name'].reset_index(drop=True)=="success").to_list())

        Penalty_goals=sum((competitions_values[competitions[i]][competitions_values[competitions[i]]['type_name']=='shot_penalty']['result_name'].reset_index(drop=True)=="success").to_list())

        freekick_shot_goals=sum((competitions_values[competitions[i]][competitions_values[competitions[i]]['type_name']=='shot_freekick']['result_name'].reset_index(drop=True)=="success").to_list())

        #c1
        total_goals[competitions[i]] = shot_goals+Penalty_goals+freekick_shot_goals
        shots=len(competitions_values[competitions[i]][competitions_values[competitions[i]]['type_name']=='shot'])
        #c2
        shots_per_goal[competitions[i]] = round(shots/total_goals[competitions[i]],2)

        #c3
        if competitions[i] in competitions_leagues:
            if competitions[i]=='Germany':
                Average_goals_per_match[competitions[i]] = round(total_goals[competitions[i]]/306,2)
                fouls=len(competitions_values[competitions[i]][competitions_values[competitions[i]]['type_name']=='foul']) 
                #c4
                Average_fouls_per_match[competitions[i]] =round(fouls/306,2)
            else:
                Average_goals_per_match[competitions[i]] = round(total_goals[competitions[i]]/380,2)
                fouls=len(competitions_values[competitions[i]][competitions_values[competitions[i]]['type_name']=='foul'])
                #c4
                Average_fouls_per_match[competitions[i]] =round(fouls/380,2)
        else:
            Average_goals_per_match[competitions[i]] = round(total_goals[competitions[i]]/64,2)
            fouls=len(competitions_values[competitions[i]][competitions_values[competitions[i]]['type_name']=='foul'])
            #c4
            Average_fouls_per_match[competitions[i]] =round(fouls/64,2)
        
    return total_goals,shots_per_goal,Average_goals_per_match,Average_fouls_per_match

total_goals,shots_per_goal,Average_goals_per_match,Average_fouls_per_match = competition_summary()

In [13]:
shots_per_goal

{'England': 8.55,
 'France': 8.34,
 'Germany': 8.28,
 'Italy': 9.0,
 'Spain': 8.04,
 'European_Championship': 9.01,
 'World_Cup': 7.75}

In [12]:
from collections import defaultdict
#best 3 player radars  --- using competitions_values
def fig_player_radars(competitions_player_list=competitions_player_list,competitions_values=competitions_values,radars_figures=defaultdict(list)):
    for i in range(len(competitions)) :
        top_players = competitions_player_list[competitions[i]][:3]
        for j in range(len(top_players)):
            current_player=competitions_values[competitions[i]][competitions_values[competitions[i]]['short_name']==top_players[j]]

            player_radars_fouls = len(current_player[current_player['type_name']=='foul'])
            player_radars_pass =len(current_player[current_player['type_name']=='pass'])
            player_radars_take_on =len(current_player[current_player['type_name']=='take_on'])
            player_radars_tackle =len(current_player[current_player['type_name']=='tackle'])
            player_radars_shot =len(current_player[current_player['type_name']=='shot'])

            #plot them
            if competitions[i] in competitions_leagues:
                current_player_df = pd.DataFrame(dict(
                    r=[player_radars_fouls, player_radars_pass/30, player_radars_take_on, player_radars_tackle, player_radars_shot],
                    theta=['Fouls','Passes','Take on','Tackles', 'Shots']))
                fig = px.line_polar(current_player_df, r='r', theta='theta', line_close=True,template="plotly_dark")
            else:
                current_player_df = pd.DataFrame(dict(
                    r=[player_radars_fouls, player_radars_pass/60, player_radars_take_on, player_radars_tackle, player_radars_shot],
                    theta=['Fouls','Passes','Take on','Tackles', 'Shots']))
                fig = px.line_polar(current_player_df, r='r', theta='theta', line_close=True,template="plotly_dark")
            color = "#FFD700"
            if j == 1:
                color = "#C0C0C0"
            elif j == 2:
                color = '#CD7F32'
            fig.update_traces(fill='toself',fillcolor=color,line_color=color,opacity=0.6)
            fig.update_layout(title="Rank : "+str(j+1)+' '+top_players[j],font=dict(
                    family="Courier New, monospace",
                    size=18,
                    color=color
                ),paper_bgcolor='#073642',plot_bgcolor='#073642')
            # if not isinstance(radars_figures[competitions[i]], list):
            #     radars_figures[competitions[i]] = [radars_figures[competitions[i]]]          
            radars_figures[competitions[i]].append(fig)
    return radars_figures
radars_figures = fig_player_radars()

In [15]:
radars_figures['England'][0]

In [13]:
app = dash.Dash(external_stylesheets=[dbc.themes.SOLAR])

app.title = 'Player Ratings'
app.layout = html.Div(
    [
    dbc.Row(
        dbc.Col(html.H1("Player Rating Dashboard", style={"color": "#839496", "fontSize": 65, "textAlign": "center", }), )),
    dbc.Row(
        [   dbc.Col(html.Div(children=[
                    html.H1(children='Goals',style={'font-weight': 'bold', 'color':'#839496', 'textAlign':'Center'}),
                    html.H2(id = 'goals_Area',style={'font-weight': 'bold','color':'White', 'textAlign':'Center'})], 
                    style={'backgroundColor':'#073642', 'border':'15px solid #073642', 'border-radius':'20px'}   
                    , className='two columns'),),
            dbc.Col(html.Div(children=[
                    html.H1(children='Shots per Goal',style={'font-weight': 'bold', 'color':'#839496', 'textAlign':'Center'}),
                    html.H2(id = 'shots_pg_Area',style={'font-weight': 'bold','color':'White', 'textAlign':'Center'})], 
                    style={'backgroundColor':'#073642', 'border':'15px solid #073642', 'border-radius':'20px'}   
                    , className='two columns'),),
            dbc.Col(html.Div(children=[
                    html.H1(children='Goals per Match',style={'font-weight': 'bold', 'color':'#839496', 'textAlign':'Center'}),
                    html.H2(id = 'goals_pm_Area',style={'font-weight': 'bold','color':'White', 'textAlign':'Center'})], 
                    style={'backgroundColor':'#073642', 'border':'15px solid #073642', 'border-radius':'20px'}   
                    , className='two columns'),),
            dbc.Col(html.Div(children=[
                    html.H1(children='Fouls per Match',style={'font-weight': 'bold', 'color':'#839496', 'textAlign':'Center'}),
                    html.H2(id = 'fouls_pm_Area',style={'font-weight': 'bold','color':'White', 'textAlign':'Center'})], 
                    style={'backgroundColor':'#073642', 'border':'15px solid #073642', 'border-radius':'20px'}   
                    , className='two columns'),),
        ],
    ),
    dbc.Row([dbc.Col(
            dcc.Dropdown(
                id='dropdown',
                optionHeight=40,
                placeholder='Choose Tournement',
                options=[
                    {'label': 'English Premier League', 'value': 1},
                    {'label': 'French  Ligue 1', 'value': 2},
                    {'label': 'Germany Bundesliga', 'value': 3},
                    {'label': 'Italy Serie A', 'value': 4},
                    {'label': 'Spanish la liga', 'value': 5},
                    {'label': 'Euro Cup 2016', 'value': 6},
                    {'label': 'World Cup 2018', 'value':  7}
                ], value=1
            ), ),
    ]),
    dbc.Row(dbc.Col([dcc.Graph(id="Bar_fig")])),
    dbc.Row([
        dbc.Col([dcc.Graph(id="rank_1_fig")]),
        dbc.Col([dcc.Graph(id="rank_2_fig")]),
        dbc.Col([dcc.Graph(id="rank_3_fig")]),
    ]),
]
)
@app.callback(
    [Output('Bar_fig', 'figure'),
    Output('rank_1_fig', 'figure'),
    Output('rank_2_fig', 'figure'),
    Output('rank_3_fig', 'figure'),
    Output('goals_Area', 'children'),
    Output('shots_pg_Area', 'children'),
    Output('goals_pm_Area', 'children'),
    Output('fouls_pm_Area', 'children')],
    Input('dropdown', 'value'))
def update(value):
   
    rank=bar_figures[competitions[0]]
    rank_1=radars_figures[competitions[0]][0]
    rank_2=radars_figures[competitions[0]][1]
    rank_3=radars_figures[competitions[0]][2]
    c1=total_goals[competitions[0]]
    c2=shots_per_goal[competitions[0]]
    c3=Average_goals_per_match[competitions[0]]
    c4=Average_fouls_per_match[competitions[0]]
    if value:
            print(value)
            value -= 1
            rank=bar_figures[competitions[value]]
            rank_1=radars_figures[competitions[value]][0]
            rank_2=radars_figures[competitions[value]][1]
            rank_3=radars_figures[competitions[value]][2]
            c1=total_goals[competitions[value]]
            c2=shots_per_goal[competitions[value]]
            c3=Average_goals_per_match[competitions[value]]
            c4=Average_fouls_per_match[competitions[value]]
    
    return [rank,rank_1,rank_2,rank_3,c1,c2,c3,c4]
    

In [363]:
# !pip install django_plotly_dash==1.1.4
# !pip install –user channels
# !pip install –user bootstrap4
# !pip install django_plotly_dash==1.1.4
# !pip install  dpd_static_support==0.0.5
# !pip install dash_daq==0.3.1
# !pip install whitenoise==5.0.1

In [14]:
app.run_server()

Dash is running on http://127.0.0.1:8050/

 * Serving Flask app '__main__' (lazy loading)
 * Environment: production
[2m   Use a production WSGI server instead.[0m
 * Debug mode: off


 * Running on http://127.0.0.1:8050/ (Press CTRL+C to quit)
127.0.0.1 - - [13/Jan/2022 10:03:02] "GET / HTTP/1.1" 200 -
127.0.0.1 - - [13/Jan/2022 10:03:03] "GET /_dash-component-suites/dash_renderer/prop-types@15.v1_9_1m1641045456.7.2.min.js HTTP/1.1" 200 -
127.0.0.1 - - [13/Jan/2022 10:03:03] "GET /_dash-component-suites/dash_renderer/react@16.v1_9_1m1641045456.14.0.min.js HTTP/1.1" 200 -
127.0.0.1 - - [13/Jan/2022 10:03:03] "GET /_dash-component-suites/dash_renderer/polyfill@7.v1_9_1m1641045456.8.7.min.js HTTP/1.1" 200 -
127.0.0.1 - - [13/Jan/2022 10:03:03] "GET /_dash-component-suites/dash_renderer/react-dom@16.v1_9_1m1641045456.14.0.min.js HTTP/1.1" 200 -
127.0.0.1 - - [13/Jan/2022 10:03:03] "GET /_dash-component-suites/dash_core_components/dash_core_components-shared.v1_16_0m1641045459.js HTTP/1.1" 200 -
127.0.0.1 - - [13/Jan/2022 10:03:03] "GET /_dash-component-suites/dash_core_components/dash_core_components.v1_16_0m1641045459.min.js HTTP/1.1" 200 -
127.0.0.1 - - [13/Jan/2022 1

1


127.0.0.1 - - [13/Jan/2022 10:03:06] "GET /_dash-component-suites/dash_core_components/async-plotlyjs.v1_16_0m1617903285.js HTTP/1.1" 200 -
127.0.0.1 - - [13/Jan/2022 10:03:07] "GET /_favicon.ico?v=1.20.0 HTTP/1.1" 200 -
127.0.0.1 - - [13/Jan/2022 11:09:45] "POST /_dash-update-component HTTP/1.1" 200 -


5
