# Visualise StatsPerform tracking data

We would like to showcase how StatsPerform's SportVU tracking data can be visualised.
The tracking data has been preprocessed.
Further, we show how the event data can be combined with the tracking data.

First, let's import some required packages.

In [2]:
import os
import pandas as pd
import numpy as np
import ipywidgets as widgets
from bqplot import *
import qgrid

TypeError: register() missing 1 required positional argument: 'widget'

Next we need to add the path where the following files can be found:
- 1_tracking.parquet --> contains the tracking data of the game 
- event_names.csv --> contains the descriptions for the event types; makes it more readable

Please add the path informaion to the variable **base_path**

In [5]:
# load data
current_directory = os.getcwd()
path_tracking = os.path.join(os.path.join(os.path.dirname(current_directory),'data'),"tracking_set_0")
print(path_tracking)
game_id = 1

path_events = os.path.join(os.path.join(os.path.dirname(current_directory),'data'),"first_10_events")
print(path_events)

# load the tracking data
df_tracking = pd.read_parquet(f'{path_tracking}/{game_id}_tracking.parquet')

# load event names
path_event_csv = os.path.join(os.path.dirname(current_directory),'data')
df_event_names = pd.read_csv(os.path.join(path_event_csv,'event_names.csv'))
dict_event_names = df_event_names.set_index('event_type_id').to_dict()['event_description']


c:\Users\Gabriel\OneDrive\Escritorio\SportsAnalyticsCourse\OptaForum\OptaChallenge_Clustering_Player_Styles\data\tracking_set_0
c:\Users\Gabriel\OneDrive\Escritorio\SportsAnalyticsCourse\OptaForum\OptaChallenge_Clustering_Player_Styles\data\first_10_events


In the following the event data is loaded from the Opta Vision dataset.
The event data can be synchronised with the tracking data using timeelapsed and current_phase.
Timeelapsed contains the time in seconds since the start of a half.
How you can determine the timeelapsed is shown in the function *load_event_data()* in the section event pre-processing.

In [6]:
# load event data
def load_event_data(file_name, base_path):
    # read in event file
    with open(f'{base_path}/{file_name}') as f:
        data=json.loads(f.read())

    f.close()
    
    # transform data into pandas dataframe
    df_events = pd.json_normalize(data['liveData']['event'])
    
    # preprocess event data and keep relevant information only
    # remove not required columns
    df_events = df_events[['typeId','contestantId','periodId','timeMin', 'timeSec','timeStamp','playerId','outcome']]

    # add timeelapsed to each event
    df_events['timestamp'] = pd.to_datetime(df_events.timeStamp).apply(lambda x: x.timestamp())

    df_events = df_events.query('periodId in [1,2]')

    def add_timeelapsed_to_events(df):
        start_time = df.query('typeId==32')['timestamp'].iloc[0]
        df['timestamp_new'] = np.int64((df['timestamp'] - start_time)*1000)

        df['timeelapsed'] = df['timestamp_new'].apply(lambda x: (40 * round(x/40))/1000)

        return df

    df_events = df_events.groupby('periodId').apply(add_timeelapsed_to_events)

    df_events = df_events.drop(columns=['timeStamp','timestamp','timestamp_new'])
    
    # rename some columns
    df_events = df_events.rename(columns=
        {
            'periodId':'current_phase',
            'typeId':'event_type_id',
            'timeMin':'period_minute',
            'timeSec':'period_second'
        }
    )
    
    return df_events

event_file = f'{game_id}.json'

df_events = load_event_data(
    base_path=path_events,
    file_name=event_file
)

# add event descriptions
df_events['event_description'] = df_events['event_type_id'].map(dict_event_names)

To preserve the previous behavior, use

	>>> .groupby(..., group_keys=False)


	>>> .groupby(..., group_keys=True)
  df_events = df_events.groupby('periodId').apply(add_timeelapsed_to_events)


#### The tracking data
The tracking data contains the following columns:

+ 'current_phase': the current period
+ 'timeelapsed': the time in seconds of the current period 
+ 'team_id_opta': Opta team id
+ 'player_id': Opta player id
+ 'jersey_no': jersey number of the player
+ 'pos_x': x-coordinate on the pitch; pitch coordinates in [-52.5, 52.5]
+ 'pos_y': y-coordinate on the pitch; pitch coordinates in [-34, 34]
+ 'frame_count': unique identifier for each frame
+ 'team_id': inidicates home(=1)/away(=2); team_id 4 is the ball
+ 'speed': speed
+ 'acc': acceleration
+ 'speed_x': speed regarding x-axis
+ 'speed_y': speed regarding y-axis
+ 'ball_x': x location of the ball
+ 'ball_y': y location of the ball
+ 'ball_speed': ball speed
+ 'ball_acc': ball acceleration
+ 'dop': direction of play of the team ('L'--> 'Left-to-Right; 'R' --> 'Right-to-Left'


In [7]:
df_tracking.head()

Unnamed: 0,current_phase,timeelapsed,team_id_opta,player_id,jersey_no,pos_x,pos_y,speed,frame_count,team_id,acc,speed_x,speed_y,dop,team_in_possession
0,1,0.0,bx0cdmzr2gwr70ez72dorx82p,azuc3tma44xyrbgf5y279o1xx,16.0,-19.477912,4.240699,0.464332,10000,2.0,0.727075,-0.121969,-0.357286,L,bx0cdmzr2gwr70ez72dorx82p
1,1,0.0,bx0cdmzr2gwr70ez72dorx82p,6u2ob6fv950r1qve8uejkq2uh,1.0,-0.117343,-0.234231,0.328293,10000,2.0,0.899067,0.224221,0.126503,L,bx0cdmzr2gwr70ez72dorx82p
2,1,0.0,bx0cdmzr2gwr70ez72dorx82p,7cp51c8zn7y08iyk0hc9ix5nt,5.0,-18.723356,-5.825823,0.288777,10000,2.0,0.587134,-0.227203,0.04311,L,bx0cdmzr2gwr70ez72dorx82p
3,1,0.0,bx0cdmzr2gwr70ez72dorx82p,4u281v53ges3kimtgac0tidm2,23.0,-10.883792,-22.826129,0.701829,10000,2.0,0.970594,-0.494652,-0.291488,L,bx0cdmzr2gwr70ez72dorx82p
4,1,0.0,bx0cdmzr2gwr70ez72dorx82p,7sep6mx2s67mh5fr3raxu7aei,13.0,-13.766865,22.911554,0.666157,10000,2.0,1.217318,0.533158,-0.085514,L,bx0cdmzr2gwr70ez72dorx82p


In [8]:
df_events.columns

Index(['event_type_id', 'contestantId', 'current_phase', 'period_minute',
       'period_second', 'playerId', 'outcome', 'timeelapsed',
       'event_description'],
      dtype='object')

#### The event data
This event data is the Opta event data and contains the following columns:
+ 'event_type_id': the Opta event type identifier; see 'event_description' for an explanation
+ 'contestantId': id of the team
+ 'playerId': id of the player
+ 'current_phase': the current period
+ 'timeelapsed': the time in seconds of the current period
+ 'period_minute': the minute in which the game is currently
+ 'period_second': the second of the minute in which the game is currently
+ 'outcome': outcome of the event, 1=successful, 0=otherwise
+ 'event_description': descriptions of 'event_type_id' (see below)

In [9]:
display(df_events[['event_type_id','event_description']].drop_duplicates().sort_values(by='event_type_id').reset_index(drop=True))
display(df_events.head())

Unnamed: 0,event_type_id,event_description
0,1,Pass
1,2,Offside Pass
2,3,Take On
3,4,Foul
4,5,Out
5,6,Corner Awarded
6,7,Tackle
7,8,Interception
8,10,Save
9,11,Claim


Unnamed: 0,event_type_id,contestantId,current_phase,period_minute,period_second,playerId,outcome,timeelapsed,event_description
2,32,3c3jcs7vc1t6vz5lev162jyv7,1,0,0,,1,0.0,Period start
3,32,bx0cdmzr2gwr70ez72dorx82p,1,0,0,,1,0.0,Period start
4,1,bx0cdmzr2gwr70ez72dorx82p,1,0,0,6u2ob6fv950r1qve8uejkq2uh,1,0.04,Pass
5,1,bx0cdmzr2gwr70ez72dorx82p,1,0,2,azuc3tma44xyrbgf5y279o1xx,1,2.84,Pass
6,1,bx0cdmzr2gwr70ez72dorx82p,1,0,7,7sep6mx2s67mh5fr3raxu7aei,1,7.88,Pass


#### Prepare animation class

Three classes are prepared that are used for animating the tracking data.
The class **BasicPlot** prepares the animation.
It plots the each frame separately.
The class prepares the scatter objects for the teams and the ball and adds the relevant data to the objects.

The class **InteractiveAnimation** uses the the class **BasicPlot** and builds the animation.
The animation widget from ipywidgets is used to allow the user to interact with the animation.

The class **InteractiveEventAnimation** uses the class **InteractiveAnimation** and adds a table to the animation.
The table is used to display the event data and requires the columns *current_phase* and *timeelapsed* for the interaction with the animation.
Further, the table can be used to filter the events and to move the animation to the tracking frame of the event that was clicked on.

In [7]:
class BasicPlot(widgets.VBox):
    """
    base class that constructs a plot of a tracking frame
    """
    def __init__(self, 
                 X=[-57.8, 55], 
                 Y=[-39.5, 37.0],
                 width=506.7,
                 height=346.7, 
                 pitch_img='pitch_white.png', 
                 df_tracking=None, 
                 scaling=1.8):
        """
        init widget object
        """
        super().__init__()
        self.pitch_img = pitch_img
        
        # Init plot
        self.image = self.__init_image(X,Y)
        self.team_scatter = self.__init_scatter(X, Y)
        self.ball_scatter = self.__init_scatter(X, Y, default_size=48)
        self.speed_scatter = self._BasicPlot__init_scatter(X, Y, default_size=15)
        self.line = self.__init_line(X, Y, default_size=10, close_path=False)

        self.default_marks = [
            self.image, 
            self.team_scatter, 
            self.ball_scatter,
            self.speed_scatter,
            self.line,
        ]

        self.fig = Figure(marks=self.default_marks, padding_x=0, padding_y=0, fig_margin={'top':0, 'bottom':0, 'left':30, 'right':30})
        self.fig.layout = widgets.Layout(width=f'{width*scaling}px', height=f'{height*scaling}px')
        self.children = [self.fig]

        # store tracking data
        self.df_tracking = df_tracking
        self.df_tracking['color'] = self.df_tracking['team_id'].apply(self.mapTeamColour)
        self.sample = None
        
        # add unique player identifier to each player
        self.__set_unique_jersey_no()
        
    
    def __init_image(self, X, Y):
        """
        init image upon which players are plotted
        """
        # read pitch image
        image_path = os.path.abspath(self.pitch_img)

        with open(image_path, 'rb') as f:
            raw_image = f.read()
            
        # set image as widget background
        ipyimage = widgets.Image(value=raw_image, format='png')

        scales_image = {'x': LinearScale(), 'y': LinearScale()}
        axes_options = {'x': {'visible': False}, 'y': {'visible': False}}

        image = Image(image=ipyimage, scales=scales_image, axes_options=axes_options)
        
        # Full screen
        image.x = X
        image.y = Y
        
        return image
        
    def __init_scatter(self, X, Y, default_size=64, selected_opacity=0.6, unselected_opacity=1.0):
        """
        init scatter plot that sets the players and the ball to the respective locations
        on the pitch
        """
        scales={'x': LinearScale(min=X[0], max=X[1]), 'y': LinearScale(min=Y[0], max=Y[1])}
        axes_options = {'x': {'visible': False}, 'y': {'visible': False}}
        
        
        team_scatter = Scatter(
                            scales= scales, 
                            default_size=default_size,
                            selected_style={'opacity': selected_opacity, 'stroke': 'Black'},
                            unselected_style={'opacity': unselected_opacity},
                            axes_options=axes_options)
        
        return team_scatter
    
    def __init_line(self, X, Y, close_path, default_size=64, selected_opacity=0.6, unselected_opacity=1.0):
        """
        init scatter plot that sets the players and the ball to the respective locations
        on the pitch
        """
        scales = {'x': LinearScale(min=X[0], max=X[1]), 'y': LinearScale(min=Y[0], max=Y[1])}
        axes_options = {'x': {'visible': False}, 'y': {'visible': False}}

        line = Lines(
            scales=scales,
            default_size=default_size,
            selected_style={'opacity': selected_opacity, 'stroke': 'Black'},
            unselected_style={'opacity': unselected_opacity},
            axes_options=axes_options)
        line.enable_move = False

        return line
    
    def __set_unique_jersey_no(self):
        """
        add unique jersey number to each player
        """
        # add column
        self.df_tracking.loc[:,'u_jersey_no'] = 0
        
        # convert jersey_no to int
        self.df_tracking['jersey_no'] = self.df_tracking['jersey_no'].astype(int)

        # fill column
        self.df_tracking.loc[self.df_tracking.team_id==1, 'u_jersey_no'] = self.df_tracking.loc[self.df_tracking.team_id==1, 'jersey_no'].astype(str)
        self.df_tracking.loc[self.df_tracking.team_id==2, 'u_jersey_no'] = self.df_tracking.loc[self.df_tracking.team_id==2, 'jersey_no'].astype(str) + ' '
        self.df_tracking.loc[self.df_tracking.team_id==4, 'u_jersey_no'] = ' '
    
    def set_data(self, selected_frame):
        """
        set data of scatter plots
        input:
          selected_frame: tuple consisting of (current_phase,timeelapsed)  
        """
        self.sample = self.df_tracking.query(f'current_phase == {selected_frame[0]} and timeelapsed == {selected_frame[1]}')
        self.sample_teams = self.sample.query('team_id != 4')
        self.sample_ball = self.sample.query('team_id == 4')

        # update team scatter
        self.team_scatter.x = self.sample_teams['pos_x']
        self.team_scatter.y = self.sample_teams['pos_y']
        
        # bqplot needs unique values if you want to label each player. I've added dummy spaces for one team. E.g. "1" and "1 "
        self.team_scatter.names=self.sample_teams['u_jersey_no']
        self.team_scatter.colors=self.sample_teams['color'].values.tolist()


        self.ball_scatter.x = self.sample_ball['pos_x']
        self.ball_scatter.y = self.sample_ball['pos_y']
        self.ball_scatter.names=self.sample_ball['u_jersey_no']
        self.ball_scatter.colors=self.sample_ball['color'].values.tolist()
        
        # speed "arrows"
        self.line.x = np.concatenate((self.sample_teams['pos_x'].values.reshape((-1,1)),
                                      (self.sample_teams['pos_x'].values + self.sample_teams['speed_x'].values).reshape((-1,1))),
                                     axis=1)
        self.line.y = np.concatenate((self.sample_teams['pos_y'].values.reshape((-1,1)),
                                      (self.sample_teams['pos_y'].values + self.sample_teams['speed_y'].values).reshape((-1,1))),
                                     axis=1)
        self.line.colors = self.sample_teams['color'].values.tolist()


        self.speed_scatter.x = self.sample_teams['pos_x'].values + self.sample_teams['speed_x'].values
        self.speed_scatter.y = self.sample_teams['pos_y'].values + self.sample_teams['speed_y'].values
        self.speed_scatter.colors = self.sample_teams['color'].values.tolist()
        

    def mapTeamColour(self, team, pretty=False):
        if team == 1:
            return 'blue'
        elif team == 2:
            return 'red'
        elif team == -1:
            return "g"
        elif team == 4:
            return "#ffb04f"
        else:
            return "#3c4766"

        
class InteractiveAnimation(widgets.VBox):
    """
    base class that constructs an interactive plot that allows moving around players/ball
    """
    def __init__(self, df_tracking):
        # store unique frame
        self.__frames = df_tracking[['current_phase','timeelapsed']].drop_duplicates().reset_index(drop=True)
        self.df_tracking = df_tracking.sort_values(by=['current_phase','timeelapsed','team_id', 'jersey_no'])
        
        
    def create_animation(self, 
                         X=[-57.8, 55], 
                         Y=[-39.5, 37.0],
                         width=506.7,
                         height=346.7, 
                         pitch_img='pitch_white.png', 
                         scaling=1.8,
                         step=1,
                         frame_rate=25):
        # create pitch widget
        self.animation_container = BasicPlot(
            X=X,
            Y=Y,
            width=width,
            height=height,
            pitch_img=pitch_img,
            scaling=scaling,
            df_tracking=self.df_tracking
        )
        
        # init animation widget
        self.animation_container.set_data((1,0))
        
        self.control_container = self.__add_to_layout(STEP=step,
                                                      frame_rate=frame_rate)
        
        return widgets.VBox([self.animation_container,
                             self.control_container])
        
    
    def __add_to_layout(self, frame_rate, STEP=1):
        """
        add slider elements to to widget container
        """
        # number of different frame
        no_frames = self.df_tracking.frame_count.nunique()
        
        # add play mode
        self.play = widgets.Play(interval=1000/frame_rate,
                                value=0,
                                step=STEP,
                                max=no_frames,
                                description="Press play",
                                disabled=False)
        
        # add slider
        self.slider = widgets.IntSlider(max=no_frames, continuous_update = False)
        widgets.jslink((self.play, 'value'), (self.slider, 'value'))
        
        # add interactivity
        self.slider.observe(self.__update_data, names='value')
        self.slider.value
        
        # add checkbox for speed arrows
        self.show_speed_arrow = widgets.Checkbox(
            value=True,
            description='Show speed arrows',
            disabled=False
        )
        self.show_speed_arrow.observe(self.__show_arrow, names='value')
        
        return widgets.HBox(
            [self.play, 
             self.slider,
             self.show_speed_arrow,
            ]
        )
        
    def __update_data(self, change):
        """
        update pitch plot
        """
        self.animation_container.set_data(self.__get_frameset(change['new']))
        
        
    def __show_arrow(self, change):
        """
        update pitch plot
        """
        self.animation_container.speed_scatter.visible = change['new']
        self.animation_container.line.visible = change['new']
        
            
    def __get_frameset(self, row):
        """
        helper function to get respective currentphase and timeelapsed of new
        value on the slider object
        """
        frameset = tuple(self.__frames.iloc[row])
        return frameset
    
    
class InteractiveEventAnimation(widgets.VBox):
    """
    base class that constructs an interactive plot that allows moving around players/ball
    """
    def __init__(self, df_tracking, df_events, offset=0):
        """
        init widget object by calling parent class constructor
        input:
            positions_df: dataframe that contains tracking data
            events_df: dataframe that contains the event data for the match
            offset: number of frames that should be started before an event
        """
        # store unique frame
        self.__frames = df_tracking[['current_phase','timeelapsed']].drop_duplicates().reset_index(drop=True)
        self.df_tracking = df_tracking.sort_values(by=['current_phase','timeelapsed','team_id', 'jersey_no'])
        self.df_events = df_events
        self.offset = offset

        
        
    def create_event_animation(self, 
                         X=[-57.8, 55], 
                         Y=[-39.5, 37.0],
                         width=506.7,
                         height=346.7, 
                         pitch_img='pitch_white.png', 
                         scaling=1.8,
                         step=1,
                         frame_rate=10):
        """
        compose and return widget to be displayed
        input:
            X: x-coordinates pitch
            Y: y-coordinates pitch
            width: width (pixel) of image upon which is plotted
            height: height (pixel) of image upon which is plotted
            pitch_img: file that contains image of soccer pitch
            positions_df: dataframe that contains tracking data
            scaling: scale factor for pitch_img
            step: step parameter for widget "play"
            frame_rate: sampling frequency of data; needed to display the animation correctly
        """
        # create pitch widget
        self.__ani_obj = InteractiveAnimation(df_tracking=self.df_tracking)
        animation_container = self.__ani_obj.create_animation(X=X,
                                                              Y=Y,
                                                              width=width,
                                                              height=height,
                                                              pitch_img=pitch_img,
                                                              scaling=scaling,
                                                              step=step,
                                                              frame_rate=frame_rate)
        
        self.__add_to_layout()
        
        return widgets.VBox([animation_container,
                             self.__event_container])
        
    
    def __add_to_layout(self):
        """
        add slider elements to to widget container
        """
        # Define qgrid widget
        qgrid.set_grid_option('maxVisibleRows', 10)
        col_opts = { 
            # 'editable': False,
            'editable': True,

        }
           
        self.__event_container = qgrid.show_grid(self.df_events, show_toolbar=False, column_options=col_opts)
        self.__event_container.layout = widgets.Layout(width='920px')
           
        self.__event_container.observe(self.__on_row_selected, names=['_selected_rows'])
        

            
    def __on_row_selected(self, change):
        """
        callback for row selection: update selected points in scatter plot
        """

        # get selcted event
        filtered_df = self.__event_container.get_changed_df()
        event = filtered_df.iloc[change.new]

        # event = self.events_df.iloc[change.new]
        
        # find index to which slider needs to be set
        idx = self.__frames.query(f'current_phase=={event.current_phase.item()} and timeelapsed=={event.timeelapsed.item()}').index[0]
        
        # set slider
        self.__ani_obj.slider.value = int(idx - self.offset)

## Animate the tracking data
To animate the tracking data we use the class **InteractiveAnimation**.
 1. An object of this class needs to be created.
 2. The animation is created (can take a few seconds when using a full game)
 3. display the animation and get started loooking into the game; use the buttons and the slider to get to the frame you are interested in
 
If the animation isn't displayed please make sure that you ran the following commands in a separate cell once (maybe you need to restart the kernel afterwards):
+ !jupyter nbextension enable --py --sys-prefix bqplot
+ !jupyter nbextension enable --py --sys-prefix qgrid
+ !jupyter nbextension enable --py --sys-prefix widgetsnbextension

In [8]:
iea_obj = InteractiveEventAnimation(
    df_tracking=df_tracking,
    df_events=df_events[['event_description','outcome','playerId','contestantId','current_phase','timeelapsed']]
)

animation = iea_obj.create_event_animation(
    frame_rate=25
)

display(animation)

NameError: name 'qgrid' is not defined