# Graph Neural Networks in football analytics

This notebook aim is to explore and prototype methods to apply GNN in football analytics.

In [50]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import os

import sys
import warnings
warnings.filterwarnings('ignore')
sys.path.append("../source")

from bokeh.io import output_notebook, show
output_notebook()


# Match analytics library (In progress)
from Match_Analytics import Match
from Tracking_Dynamics import calc_player_norm_positions
from Tracking_Visualization import plot_sliding_window, play_match,  draw_pitch
from Tracking_Filters import possesion_filter, ball_position_filter, time_window
from Tracking_Statistics import bivariate_normal_distribution

#GNN imports

from spektral.data import Dataset, Graph


### Pass network dataset build

The first step is to read the data from all the matches available and build the pass network dataset to train the GNN.

In [51]:
m = [Match(data_source="metrica-sports", match_id=i) for i in [1,2]]

Initializing match: 1

Reading team: home
Reading team: away
Filtering dead time...

Match preprocessed successfully.

Initializing match: 2

Reading team: home
Reading team: away
Filtering dead time...

Match preprocessed successfully.



In [79]:
data = m[0].events[m[0].events['Type']=='PASS'].groupby(['From',
                                                         'To',
                                                         'Start Time [s]',
                                                         'End Time [s]',
                                                         'Start X',
                                                         'Start Y',
                                                         'End X',
                                                         'End Y']).size().reset_index(name="Freq")
data = data.groupby(['From','To']).sum().reset_index()
data = data.apply(lambda x: x/size['Freq'] if x.name in [ 'Start Time [s]', 
                                                          'End Time [s]',
                                                          'Start X',
                                                          'End X',
                                                          'Start Y', 
                                                          'End Y',
                                                         ] else x)

In [130]:
A = data[["From", "To", "Freq"]]

In [133]:
vals

array(['Away_15', 'Away_16', 'Away_17', 'Away_18', 'Away_19', 'Away_20',
       'Away_21', 'Away_22', 'Away_23', 'Away_24', 'Away_25', 'Away_26',
       'Away_27', 'Away_28', 'Home_1', 'Home_10', 'Home_11', 'Home_12',
       'Home_13', 'Home_14', 'Home_2', 'Home_3', 'Home_4', 'Home_5',
       'Home_6', 'Home_7', 'Home_8', 'Home_9'], dtype=object)

In [117]:
vals = np.unique(data[['From', 'To']])
df2 = pd.DataFrame(0, index=vals, columns=vals)
f = df2.index.get_indexer
df2.values[f(size.From), f(size.To)] = data.Freq.values
df2

Unnamed: 0,Away_15,Away_16,Away_17,Away_18,Away_19,Away_20,Away_21,Away_22,Away_23,Away_24,...,Home_13,Home_14,Home_2,Home_3,Home_4,Home_5,Home_6,Home_7,Home_8,Home_9
Away_15,0,6,5,3,3,1,8,2,1,1,...,0,0,0,0,0,0,0,0,0,0
Away_16,9,0,3,2,2,5,10,2,0,1,...,0,0,0,0,0,0,0,0,0,0
Away_17,2,8,0,4,4,4,5,5,0,0,...,0,0,0,0,0,0,0,0,0,0
Away_18,3,1,3,0,7,0,3,3,0,2,...,0,0,0,0,0,0,0,0,0,0
Away_19,1,3,5,5,0,4,13,1,2,4,...,0,0,0,0,0,0,0,0,0,0
Away_20,1,5,1,4,7,0,7,6,1,0,...,0,0,0,0,0,0,0,0,0,0
Away_21,6,5,10,6,11,7,0,9,5,0,...,0,0,0,0,0,0,0,0,0,0
Away_22,0,2,7,0,3,8,9,0,2,0,...,0,0,0,0,0,0,0,0,0,0
Away_23,1,0,1,2,3,0,4,1,0,1,...,0,0,0,0,0,0,0,0,0,0
Away_24,1,0,0,2,2,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [124]:
bivariate_normal_distribution(m[0].tracking_home, m[0].home_players)
a = bivariate_normal_distribution(m[0].tracking_away, m[0].away_players)
a

Unnamed: 0,player_number,player,x_mean,y_mean,normx_mean,normy_mean,cov_x_std,cov_y_std,cov_angle,x_std,y_std,cov_normx_std,cov_normy_std,cov_norm_angle,normx_std,normy_std
0,15,Away_15,14.411639,11.357057,0.555538,0.941425,19.017633,8.200854,0.044696,19.002173,8.236612,0.356465,0.403915,0.283869,0.360413,0.400396
1,16,Away_16,15.966385,-0.941791,0.636475,-0.022351,15.946174,7.281258,0.082624,15.903133,7.374792,0.25838,0.386294,0.041456,0.258654,0.386111
2,17,Away_17,7.871638,6.970445,0.182551,0.60974,23.31269,17.264107,0.534389,21.904702,19.018805,0.436923,1.3402,-0.22187,0.518299,1.310879
3,18,Away_18,-3.424985,8.600321,-0.54463,0.740739,20.718248,15.564909,-0.091163,20.680816,15.61461,0.426183,1.120938,0.089251,0.436087,1.117122
4,19,Away_19,-0.633664,3.922333,-0.416383,0.370813,19.135164,12.600325,0.09036,19.090987,12.667158,0.305576,0.692921,-0.053789,0.307399,0.692114
5,20,Away_20,10.278246,-7.145387,0.289832,-0.517058,17.978829,10.123402,-0.034967,17.971324,10.136719,0.284039,0.58342,-0.244819,0.309734,0.570195
6,21,Away_21,2.021926,-3.734317,-0.219197,-0.250701,19.311157,13.401259,-0.182567,19.145452,13.636943,0.423095,0.742656,0.224655,0.444406,0.730103
7,22,Away_22,4.0273,-21.334377,-0.13848,-1.553007,21.763365,7.546773,-0.19723,21.392615,8.541303,0.54222,0.390003,-0.677578,0.488092,0.455929
8,23,Away_23,-13.731706,-2.130014,-1.247092,-0.126242,17.478597,12.889571,0.019095,17.477144,12.891542,0.38758,0.821538,0.059732,0.389984,0.820399
9,24,Away_24,-13.053088,-0.882836,-1.267668,0.037222,17.733821,11.455665,-0.00965,17.73334,11.45641,0.393704,0.736409,-0.107038,0.399278,0.733401


In [123]:
class MyDataset(Dataset):
    """
    A dataset of five random graphs.
    """
    def __init__(self, nodes, feats, **kwargs):
        self.nodes = nodes
        self.feats = feats

        super().__init__(**kwargs)
    def download(self):
        # data = ...  # Download from somewhere

        # Create the directory
        os.mkdir(self.path)

        # Write the data to file
        for i in range(5):
            x = np.random.rand(self.nodes, self.feats)
            a = np.random.randint(0, 2, (self.nodes, self.nodes))
            y = np.random.randint(0, 2)

            filename = os.path.join(self.path, f'graph_{i}')
            np.savez(filename, x=x, a=a, y=y)
    def read(self):
        # We must return a list of Graph objects
        output = []

        for i in range(5):
            data = np.load(os.path.join(self.path, f'graph_{i}.npz'))
            output.append(
                Graph(x=data['x'], a=data['a'], y=data['y'])
            )

        return output

In [39]:
dataset = MyDataset(3, 2)

In [49]:
dataset[2].a

array([[1, 0, 1],
       [1, 0, 1],
       [1, 0, 0]])