## Football Analysis with Metrica Sports Data

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.patches as patches

In [2]:
game_id = 2
DATADIR = f'Data/Sample_Game_{game_id}/Sample_Game_{game_id}_RawEventsData.csv'
events = pd.read_csv(DATADIR)
events.head()

Unnamed: 0,Team,Type,Subtype,Period,Start Frame,Start Time [s],End Frame,End Time [s],From,To,Start X,Start Y,End X,End Y
0,Away,SET PIECE,KICK OFF,1,51,2.04,51,2.04,Player23,,,,,
1,Away,PASS,,1,51,2.04,87,3.48,Player23,Player20,0.5,0.5,0.4,0.51
2,Away,PASS,,1,146,5.84,186,7.44,Player20,Player18,0.43,0.5,0.44,0.22
3,Away,PASS,,1,248,9.92,283,11.32,Player18,Player17,0.47,0.19,0.31,0.28
4,Away,PASS,,1,316,12.64,346,13.84,Player17,Player16,0.29,0.32,0.26,0.58


In [3]:
events['Type'].value_counts()

PASS              964
CHALLENGE         311
RECOVERY          248
BALL LOST         233
SET PIECE          80
BALL OUT           49
SHOT               24
FAULT RECEIVED     20
CARD                6
Name: Type, dtype: int64

In [7]:
# Convert positions from Metrica Units to metres (Origin at Center Circle)
def to_metric_coordinates(data, field_dimen=(106,68.)):
    x_columns = [c for c in data.columns if c[-1].lower() == 'x']
    y_columns = [c for c in data.columns if c[-1].lower() == 'y']
    
    data[x_columns] = ( data[x_columns]-0.5 ) * field_dimen[0]
    data[y_columns] = -1 * ( data[y_columns]-0.5 ) * field_dimen[1]
    ''' 
    ------------ ***NOTE*** ------------
    Metrica actually define the origin at the *top*-left of the field, not the bottom-left, as discussed in the YouTube video. 
    I've changed the line above to reflect this. It was originally:
    data[y_columns] = ( data[y_columns]-0.5 ) * field_dimen[1]
    ------------ ********** ------------
    '''
    return data

In [10]:
events = to_metric_coordinates(events)
events.head()

Unnamed: 0,Team,Type,Subtype,Period,Start Frame,Start Time [s],End Frame,End Time [s],From,To,Start X,Start Y,End X,End Y
0,Away,SET PIECE,KICK OFF,1,51,2.04,51,2.04,Player23,,,,,
1,Away,PASS,,1,51,2.04,87,3.48,Player23,Player20,-5671.0,-2278.0,-124772.6,-5422.32
2,Away,PASS,,1,146,5.84,186,7.44,Player20,Player18,-89042.12,-2278.0,-77131.96,85762.96
3,Away,PASS,,1,248,9.92,283,11.32,Player18,Player17,-41401.48,95195.92,-231964.04,66897.04
4,Away,PASS,,1,316,12.64,346,13.84,Player17,Player16,-255784.36,54319.76,-291514.84,-27432.56


In [13]:
# By Team
Home = events[events['Team'] == 'Home']
Away = events[events['Team'] == 'Away']
Home.head()

Unnamed: 0,Team,Type,Subtype,Period,Start Frame,Start Time [s],End Frame,End Time [s],From,To,Start X,Start Y,End X,End Y
8,Home,CHALLENGE,GROUND-WON,1,504,20.16,504,20.16,Player3,,137250.92,-134339.44,,
9,Home,RECOVERY,INTERCEPTION,1,504,20.16,504,20.16,Player3,,137250.92,-134339.44,,
10,Home,BALL OUT,,1,504,20.16,534,21.36,Player3,,137250.92,-134339.44,41969.64,-162638.32
18,Home,CHALLENGE,AERIAL-WON,1,1084,43.36,1084,43.36,Player6,,6239.16,-80886.0,,
19,Home,RECOVERY,INTERCEPTION,1,1084,43.36,1084,43.36,Player6,,6239.16,-80886.0,,


In [14]:
Away.head()

Unnamed: 0,Team,Type,Subtype,Period,Start Frame,Start Time [s],End Frame,End Time [s],From,To,Start X,Start Y,End X,End Y
0,Away,SET PIECE,KICK OFF,1,51,2.04,51,2.04,Player23,,,,,
1,Away,PASS,,1,51,2.04,87,3.48,Player23,Player20,-5671.0,-2278.0,-124772.6,-5422.32
2,Away,PASS,,1,146,5.84,186,7.44,Player20,Player18,-89042.12,-2278.0,-77131.96,85762.96
3,Away,PASS,,1,248,9.92,283,11.32,Player18,Player17,-41401.48,95195.92,-231964.04,66897.04
4,Away,PASS,,1,316,12.64,346,13.84,Player17,Player16,-255784.36,54319.76,-291514.84,-27432.56


In [16]:
Home['Type'].value_counts()

PASS              543
CHALLENGE         160
RECOVERY          135
BALL LOST         120
SET PIECE          34
BALL OUT           27
SHOT               13
FAULT RECEIVED      9
CARD                3
Name: Type, dtype: int64

In [18]:
Away['Type'].value_counts()

PASS              421
CHALLENGE         151
RECOVERY          113
BALL LOST         113
SET PIECE          46
BALL OUT           22
SHOT               11
FAULT RECEIVED     11
CARD                3
Name: Type, dtype: int64