In [1]:
import sys

sys.path.append('C:\\Users\\DavidB\\PycharmProjects\\My_Football_Analytics')
import pandas as pd
import numpy as np
from mplsoccer import Pitch
import matplotlib.pyplot as plt
from floodlight.io import dfl
from lxml import etree
import flatdict
import datetime
from Basics.Data.DFL_data import parse_dfl_pos_data

In [2]:
pos_filepath = 'DFL_04_02_positions_raw_DFL-COM-000001_DFL-MAT-0027AD.xml'
mi_filepath = 'DFL_02_01_matchinformation_DFL-COM-000001_DFL-MAT-0027AD.xml'

teamsheets = dfl.read_teamsheets_from_mat_info_xml(mi_filepath)

links_jID_to_xID = {
        "Home": teamsheets['Home'].get_links("jID", "tID"),
        "Away": teamsheets['Away'].get_links("jID", "tID"),
    }
links_pID_to_jID = {
        "Home": teamsheets['Home'].get_links("pID", "jID"),
        "Away": teamsheets['Away'].get_links("pID", "jID"),
    }

In [3]:
data = dict()
data['ball'] = dict()
data['ball']['x'] = []
data['ball']['y'] = []
data['ballstatus'] = []
data['possession'] = []
data['GameSection'] = []
data['Time'] = []
# dummy dates with time to get time variable
firsthalf_time = datetime.datetime(100,1,1, 0, 0, 0, 0)
secondhalf_time = datetime.datetime(100,1,1, 0, 45, 0, 0)

# using floodlight function to get periods and the associated "N"s i.e frame names (not starting at 0!) from position data 
period_frames, est_framerate = dfl._create_periods_from_dat(pos_filepath)

# every FrameSet (one per player and half!)
for e, (_, frame_set) in enumerate(etree.iterparse(pos_filepath, tag="FrameSet")):
    
    # get ball position
    segment = frame_set.get("GameSection")   # firstHalf or secondHalf | constant in FrameSet!
    print(segment)
    # get ball position
    if frame_set.get("TeamId").lower() == "ball":          # the ball has its own TeamId ("ball")
        for frame in frame_set:                            # every frame = actual frame with coordinates, ball status and possession
            data['ball']['x'].append(float(frame.get('X')))       
            data['ball']['y'].append(float(frame.get('Y')))
            data['ballstatus'].append(float(frame.get("BallStatus")))
            data['possession'].append(float(frame.get("BallPossession")))
            data['GameSection'].append(frame_set.get("GameSection"))
            # add time column always starting at 0 and adding 0.04 seconds per frame (25fps)
            # starting at 45 minutes if segement is secondHalf
            if segment == 'firstHalf':
                data['Time'].append(firsthalf_time.time().strftime('%H:%M:%S.%f'))
                firsthalf_time = firsthalf_time + datetime.timedelta(milliseconds=40)
            elif segment == 'secondHalf':
                data['Time'].append(secondhalf_time.time().strftime('%H:%M:%S.%f'))
                secondhalf_time = secondhalf_time + datetime.timedelta(milliseconds=40)
    # get player positions
    else:
        # all frames in frame set 
        frames = [frame for frame in frame_set.iterfind("Frame")]
        
        # frame number where player's data starts and ends
        sf_player = int(frames[0].get("N"))
        ef_player = int(frames[-1].get("N"))
        
        # the correct index for data in general depends on the half. The start index is either 0 or the next index 
        # after the last index of the first half
        # the correct end index for the first half is the end frame of the player - the first half overall start frame 
        # for the end index we substract the half's start frame from the end frame and in case of the second half add the frames / indeces from first half
        start_index = 0 if segment == 'firstHalf' else period_frames['firstHalf'][1]-period_frames['firstHalf'][0] + 1
        end_index = period_frames['firstHalf'][1] - period_frames['firstHalf'][0] if segment == 'firstHalf' else period_frames['secondHalf'][1] - period_frames['secondHalf'][0] + start_index

        # players available position data has to start at the following index:
        # if half == 1 its just the player's start frame minus the overall start frame (+ start index (=0))
        # if half == 2 player's start frame - half's start frame + start_index (= indeces of first half)
        # for the second half we substract the second halfs start frame from the player's end frame (index number played in second half)
        # and add the start_index as the number of indeces / frames contained in first half
        start_index_player = sf_player - period_frames['firstHalf'][0] if segment == 'firstHalf' else sf_player - period_frames['secondHalf'][0] + start_index
        end_index_player = ef_player - period_frames['firstHalf'][0] if segment == 'firstHalf' else ef_player - period_frames['secondHalf'][0] + start_index

        
        print(frame_set.get('PersonId'))

        
        print(f'Players first and last frame: {sf_player, ef_player}') # his first frame
        print(f'As indeces this equates to: {start_index_player, end_index_player}')
        print(f'Overall the data for this half starts end ends with the follwing indeced: {start_index, end_index}')
        
        # home team player?
        if frame_set.get("PersonId") in links_pID_to_jID["Home"]:
            jrsy = links_pID_to_jID['Home'][frame_set.get("PersonId")] # get kit number from player ID
            
            # only for the first time we need to intitalize, then it already exists (for second half)
            if f'Home{jrsy}' not in data.keys():
                data[f'Home{jrsy}'] = dict()
                data[f'Home{jrsy}']['x'] = [] 
                data[f'Home{jrsy}']['y'] = []
                # if player starts in second half we need to add nans for first half as well
                if segment == 'secondHalf':
                    data[f'Home{jrsy}']['x'] = data[f'Home{jrsy}']['x'] + list(np.repeat(np.nan, start_index))
                    data[f'Home{jrsy}']['y'] = data[f'Home{jrsy}']['y'] + list(np.repeat(np.nan, start_index))

            # add as many nan at the start as required by difference in start indeces
            data[f'Home{jrsy}']['x'] = data[f'Home{jrsy}']['x'] + list(np.repeat(np.nan, start_index_player-start_index))
            data[f'Home{jrsy}']['y'] = data[f'Home{jrsy}']['y'] + list(np.repeat(np.nan, start_index_player-start_index))
            # add the given data
            for frame in frames:
                data[f'Home{jrsy}']['x'].append(float(frame.get("X")))
                data[f'Home{jrsy}']['y'].append(float(frame.get("Y")))
            # add as many nan at the end as required by difference in start indeces
            data[f'Home{jrsy}']['x'] = data[f'Home{jrsy}']['x'] + list(np.repeat(np.nan, end_index-end_index_player))
            data[f'Home{jrsy}']['y'] = data[f'Home{jrsy}']['y'] + list(np.repeat(np.nan, end_index-end_index_player))

            
        # away team player?
        elif frame_set.get("PersonId") in links_pID_to_jID["Away"]:
            jrsy = links_pID_to_jID['Away'][frame_set.get("PersonId")] # get kit number from player ID
            
            # only for the first time we need to intitalize, then it already exists (for second half)
            if f'Away{jrsy}' not in data.keys():
                data[f'Away{jrsy}'] = dict()
                data[f'Away{jrsy}']['x'] = [] 
                data[f'Away{jrsy}']['y'] = []
                # if player starts in second half we need to add nans for first half as well
                if segment == 'secondHalf':
                    data[f'Away{jrsy}']['x'] = data[f'Away{jrsy}']['x'] + list(np.repeat(np.nan, start_index))
                    data[f'Away{jrsy}']['y'] = data[f'Away{jrsy}']['y'] + list(np.repeat(np.nan, start_index))
            # add as many nan at the start as required by difference in start indeces
            data[f'Away{jrsy}']['x'] = data[f'Away{jrsy}']['x'] + list(np.repeat(np.nan, start_index_player-start_index))
            data[f'Away{jrsy}']['y'] = data[f'Away{jrsy}']['y'] + list(np.repeat(np.nan, start_index_player-start_index))
            # add the given data
            for frame in frames:
                data[f'Away{jrsy}']['x'].append(float(frame.get("X")))
                data[f'Away{jrsy}']['y'].append(float(frame.get("Y")))
            # add as many nan at the end as required by difference in start indeces
            data[f'Away{jrsy}']['x'] = data[f'Away{jrsy}']['x'] + list(np.repeat(np.nan, end_index-end_index_player))
            data[f'Away{jrsy}']['y'] = data[f'Away{jrsy}']['y'] + list(np.repeat(np.nan, end_index-end_index_player))



firstHalf
DFL-OBJ-00017V
Players first and last frame: (10001, 77780)
As indeces this equates to: (0, 67779)
Overall the data for this half starts end ends with the follwing indeced: (0, 67779)
firstHalf
DFL-OBJ-0000D9
Players first and last frame: (10001, 77780)
As indeces this equates to: (0, 67779)
Overall the data for this half starts end ends with the follwing indeced: (0, 67779)
firstHalf
firstHalf
DFL-OBJ-0000PC
Players first and last frame: (10001, 77780)
As indeces this equates to: (0, 67779)
Overall the data for this half starts end ends with the follwing indeced: (0, 67779)
firstHalf
DFL-OBJ-0026MA
Players first and last frame: (10001, 77780)
As indeces this equates to: (0, 67779)
Overall the data for this half starts end ends with the follwing indeced: (0, 67779)
firstHalf
DFL-OBJ-0001E6
Players first and last frame: (10001, 77780)
As indeces this equates to: (0, 67779)
Overall the data for this half starts end ends with the follwing indeced: (0, 67779)
firstHalf
DFL-OBJ-00

secondHalf
DFL-OBJ-00000C
Players first and last frame: (100001, 143961)
As indeces this equates to: (67780, 111740)
Overall the data for this half starts end ends with the follwing indeced: (67780, 137218)
secondHalf
DFL-OBJ-00266E
Players first and last frame: (165377, 169439)
As indeces this equates to: (133156, 137218)
Overall the data for this half starts end ends with the follwing indeced: (67780, 137218)
secondHalf
DFL-OBJ-0000IT
Players first and last frame: (100001, 154544)
As indeces this equates to: (67780, 122323)
Overall the data for this half starts end ends with the follwing indeced: (67780, 137218)
secondHalf
DFL-OBJ-0002B9
Players first and last frame: (100001, 169439)
As indeces this equates to: (67780, 137218)
Overall the data for this half starts end ends with the follwing indeced: (67780, 137218)
secondHalf
DFL-OBJ-00001D
Players first and last frame: (100001, 169439)
As indeces this equates to: (67780, 137218)
Overall the data for this half starts end ends with th

In [4]:
flat_data = flatdict.FlatDict(data, delimiter='_')
flat_data.keys()
df = pd.DataFrame.from_dict(flat_data, orient = 'index').transpose()
df

Unnamed: 0,ball_x,ball_y,ballstatus,possession,GameSection,Time,Home30_x,Home30_y,Home27_x,Home27_y,...,Home18_x,Home18_y,Away11_x,Away11_y,Away17_x,Away17_y,Home19_x,Home19_y,Home25_x,Home25_y
0,-0.38,-0.28,1.0,2.0,firstHalf,00:00:00.000000,-15.89,18.66,-9.7,-5.98,...,,,,,,,,,,
1,-0.35,-0.3,1.0,2.0,firstHalf,00:00:00.040000,-15.89,18.66,-9.71,-5.99,...,,,,,,,,,,
2,-0.33,-0.32,1.0,2.0,firstHalf,00:00:00.080000,-15.89,18.66,-9.73,-6.0,...,,,,,,,,,,
3,-0.31,-0.33,1.0,2.0,firstHalf,00:00:00.120000,-15.89,18.66,-9.75,-6.02,...,,,,,,,,,,
4,1.09,-0.13,1.0,2.0,firstHalf,00:00:00.160000,-15.88,18.66,-9.77,-6.03,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
137214,23.04,6.63,0.0,2.0,secondHalf,01:31:17.360000,,,,,...,26.71,27.6,37.0,-0.68,36.97,10.07,27.27,23.0,37.6,-1.34
137215,23.02,6.62,0.0,2.0,secondHalf,01:31:17.400000,,,,,...,26.72,27.57,36.98,-0.64,36.95,10.11,27.25,23.01,37.58,-1.33
137216,23.01,6.62,0.0,2.0,secondHalf,01:31:17.440000,,,,,...,26.74,27.54,36.96,-0.61,36.92,10.15,27.22,23.02,37.57,-1.31
137217,23.0,6.61,0.0,2.0,secondHalf,01:31:17.480000,,,,,...,26.77,27.53,36.94,-0.57,36.9,10.19,27.2,23.04,37.56,-1.29


In [5]:
df, x_range_data, y_range_data = parse_dfl_pos_data(pos_filepath=pos_filepath, mi_filepath=mi_filepath)
df

Unnamed: 0,ball_x,ball_y,ballstatus,possession,GameSection,Time,Time [s],Home_30_x,Home_30_y,Home_27_x,...,Away_15_y,Away_5_x,Away_5_y,Away_4_x,Away_4_y,Away_11_x,Away_11_y,Away_17_x,Away_17_y,Period
0,-0.38,-0.28,1.0,2.0,firstHalf,00:00:00.000000,0,-15.89,18.66,-9.7,...,33.4,24.99,12.99,,,,,,,1
1,-0.35,-0.3,1.0,2.0,firstHalf,00:00:00.040000,0.04,-15.89,18.66,-9.71,...,33.4,24.99,13.01,,,,,,,1
2,-0.33,-0.32,1.0,2.0,firstHalf,00:00:00.080000,0.08,-15.89,18.66,-9.73,...,33.4,24.99,13.02,,,,,,,1
3,-0.31,-0.33,1.0,2.0,firstHalf,00:00:00.120000,0.12,-15.89,18.66,-9.75,...,33.41,25.01,13.03,,,,,,,1
4,1.09,-0.13,1.0,2.0,firstHalf,00:00:00.160000,0.16,-15.88,18.66,-9.77,...,33.42,25.01,13.05,,,,,,,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
137214,23.04,6.63,0.0,2.0,secondHalf,01:31:17.360000,5488.56,,,,...,1.56,8.51,9.1,32.13,33.65,37.0,-0.68,36.97,10.07,2
137215,23.02,6.62,0.0,2.0,secondHalf,01:31:17.400000,5488.6,,,,...,1.57,8.53,9.11,32.1,33.64,36.98,-0.64,36.95,10.11,2
137216,23.01,6.62,0.0,2.0,secondHalf,01:31:17.440000,5488.64,,,,...,1.58,8.54,9.13,32.07,33.63,36.96,-0.61,36.92,10.15,2
137217,23.0,6.61,0.0,2.0,secondHalf,01:31:17.480000,5488.68,,,,...,1.58,8.56,9.13,32.03,33.62,36.94,-0.57,36.9,10.19,2


In [6]:
pitch = dfl.read_pitch_from_mat_info_xml(mi_filepath)
pitch.xlim

(-50.0, 50.0)

In [7]:
teamsheets

{'Home': Teamsheet(teamsheet=              player position                 team  jID             pID  \
 0          M. Frantz       RM  Sport-Club Freiburg    8  DFL-OBJ-0000PC   
 1         Ç. Söyüncü      IVL  Sport-Club Freiburg    4  DFL-OBJ-002709   
 2        J. Schuster     None  Sport-Club Freiburg   23  DFL-OBJ-000013   
 3         A. Abrashi      DML  Sport-Club Freiburg    6  DFL-OBJ-00266D   
 4   F. Niederlechner      STL  Sport-Club Freiburg    7  DFL-OBJ-0002GE   
 5         P. Stenzel       RV  Sport-Club Freiburg   15  DFL-OBJ-0026MA   
 6      J. Föhrenbach       LV  Sport-Club Freiburg   25  DFL-OBJ-00266E   
 7        A. Schwolow       TW  Sport-Club Freiburg    1  DFL-OBJ-00001D   
 8           O. Bulut     None  Sport-Club Freiburg   11  DFL-OBJ-000195   
 9        N. Petersen      STR  Sport-Club Freiburg   18  DFL-OBJ-0000IU   
 10    G. Niedermeier     None  Sport-Club Freiburg   24  DFL-OBJ-0000LJ   
 11        M. Philipp      STR  Sport-Club Freiburg   26  DF