# Graph Neural Networks in football analytics

This notebook aim is to explore and prototype methods to apply GNN in football analytics.

In [50]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import os

import sys
import warnings
warnings.filterwarnings('ignore')
sys.path.append("../source")

from bokeh.io import output_notebook, show
output_notebook()


# Match analytics library (In progress)
from Match_Analytics import Match
from Tracking_Dynamics import calc_player_norm_positions
from Tracking_Visualization import plot_sliding_window, play_match,  draw_pitch
from Tracking_Filters import possesion_filter, ball_position_filter, time_window
from Tracking_Statistics import bivariate_normal_distribution

#GNN imports

from spektral.data import Dataset, Graph


### Pass network dataset build

The first step is to read the data from all the matches available and build the pass network dataset to train the GNN.

In [51]:
m = [Match(data_source="metrica-sports", match_id=i) for i in [1,2]]

Initializing match: 1

Reading team: home
Reading team: away
Filtering dead time...

Match preprocessed successfully.

Initializing match: 2

Reading team: home
Reading team: away
Filtering dead time...

Match preprocessed successfully.



In [59]:
m[0].events

Unnamed: 0,Team,Type,Subtype,Period,Start Frame,Start Time [s],End Frame,End Time [s],From,To,Start X,Start Y,End X,End Y
0,Away,SET PIECE,KICK OFF,1,1,0.04,0,0.00,Away_19,,,,,
1,Away,PASS,,1,1,0.04,3,0.12,Away_19,Away_21,-5.30,7.48,5.30,4.76
2,Away,PASS,,1,3,0.12,17,0.68,Away_21,Away_15,5.30,4.76,8.48,19.72
3,Away,PASS,,1,45,1.80,61,2.44,Away_15,Away_19,5.30,21.08,-5.30,12.92
4,Away,PASS,,1,77,3.08,96,3.84,Away_19,Away_21,-5.30,12.24,-1.06,2.04
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1740,Home,PASS,,2,143361,5734.44,143483,5739.32,Home_12,Home_13,-10.60,-11.56,32.86,30.60
1741,Home,PASS,,2,143578,5743.12,143593,5743.72,Home_13,Home_4,43.46,25.84,38.16,12.92
1742,Home,BALL LOST,INTERCEPTION,2,143598,5743.92,143618,5744.72,Home_4,,39.22,12.92,45.58,7.48
1743,Away,RECOVERY,BLOCKED,2,143617,5744.68,143617,5744.68,Away_16,,47.70,8.16,,


In [60]:
final_data=m[0].events[m[0].events['Type']=='PASS'].groupby(['From',
                                                         'To',
                                                         'Start Time [s]',
                                                         'End Time [s]',
                                                         'Start X',
                                                         'Start Y',
                                                         'End X',
                                                         'End Y']).size().reset_index(name="Freq")
size = final_data.groupby(['From','To']).sum().reset_index()

In [64]:
vals = np.unique(size[['From', 'To']])
df2 = pd.DataFrame(0, index=vals, columns=vals)
f = df2.index.get_indexer
df2.values[f(size.From), f(size.To)] = size.Freq.values
df2

Unnamed: 0,Away_15,Away_16,Away_17,Away_18,Away_19,Away_20,Away_21,Away_22,Away_23,Away_24,...,Home_13,Home_14,Home_2,Home_3,Home_4,Home_5,Home_6,Home_7,Home_8,Home_9
Away_15,0,6,5,3,3,1,8,2,1,1,...,0,0,0,0,0,0,0,0,0,0
Away_16,9,0,3,2,2,5,10,2,0,1,...,0,0,0,0,0,0,0,0,0,0
Away_17,2,8,0,4,4,4,5,5,0,0,...,0,0,0,0,0,0,0,0,0,0
Away_18,3,1,3,0,7,0,3,3,0,2,...,0,0,0,0,0,0,0,0,0,0
Away_19,1,3,5,5,0,4,13,1,2,4,...,0,0,0,0,0,0,0,0,0,0
Away_20,1,5,1,4,7,0,7,6,1,0,...,0,0,0,0,0,0,0,0,0,0
Away_21,6,5,10,6,11,7,0,9,5,0,...,0,0,0,0,0,0,0,0,0,0
Away_22,0,2,7,0,3,8,9,0,2,0,...,0,0,0,0,0,0,0,0,0,0
Away_23,1,0,1,2,3,0,4,1,0,1,...,0,0,0,0,0,0,0,0,0,0
Away_24,1,0,0,2,2,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [38]:
class MyDataset(Dataset):
    """
    A dataset of five random graphs.
    """
    def __init__(self, nodes, feats, **kwargs):
        self.nodes = nodes
        self.feats = feats

        super().__init__(**kwargs)
    def download(self):
        # data = ...  # Download from somewhere

        # Create the directory
        os.mkdir(self.path)

        # Write the data to file
        for i in range(5):
            x = np.random.rand(self.nodes, self.feats)
            a = np.random.randint(0, 2, (self.nodes, self.nodes))
            y = np.random.randint(0, 2)

            filename = os.path.join(self.path, f'graph_{i}')
            np.savez(filename, x=x, a=a, y=y)
    def read(self):
        # We must return a list of Graph objects
        output = []

        for i in range(5):
            data = np.load(os.path.join(self.path, f'graph_{i}.npz'))
            output.append(
                Graph(x=data['x'], a=data['a'], y=data['y'])
            )

        return output

In [39]:
dataset = MyDataset(3, 2)

In [49]:
dataset[2].a

array([[1, 0, 1],
       [1, 0, 1],
       [1, 0, 0]])