In [5]:
import pandas as pd
import numpy as np
from lxml import etree
import xml.etree.ElementTree as ET
import time
from IPython.display import display, HTML

In [6]:
def get_tracking_data(xml_positions, half='firstHalf'):
        """Returns a dataframe with the tracking (position: x, y, z) of all players + the ball
        
        Dropping rows where ball positions is NAN

        +-------+-----------+-----------+
        |       |   BALL    |  TeamId   |
        +       +-----------+-----------+
        |       |  BallId   | PlayerId  |
        + N | T +---+---+---+---+---+---+
        |       | X | Y | Z | X | Y | A |
        +=======+===+===+===+===+===+===+
        | 0 | 0 |123|456|789|012|456|789|
        +-------+-----------+-----------+

        :param xml_positions: the parsed xml tracking data (lxml object)
        :param half: the description of the half: either "firstHalf" or "secondHalf", etc.
        :return: a dataframe containing the tracking data: position of all players and the ball
        """
        def change_type(df, frameset):
            """
            change type of columns
            different columns names and tpyes for the ball and for the rest of the players
            """
            if frameset.attrib.get('TeamId') == 'BALL':
                return df.astype(
                    # change types of columns for the ball
                    dtype={
                        'A':float, 
                        'D':float, 
                        'M':int, 
                        'N':int, 
                        'S':float, 
                        'X':float, 
                        'Y':float,
                        'Z':float,
                        #'T':'datetime64[ns]', # this breaks the timezone
                        'BallPossession':int, 
                        'BallStatus':int,
                    }, 
                    errors='raise'
                )
            else:
                return df.astype(
                    # change types of columns for the players
                    dtype={
                        'A':float, 
                        'D':float, 
                        'M':int, 
                        'N':int, 
                        'S':float, 
                        'X':float, 
                        'Y':float,
                        #'T':'datetime64[ns]', # this breaks the timezone
                    }, 
                    errors='raise'
                )
        

        def create_columns(df, frameset):
            """A utility function to create a multiindex columns"""
            person_id = frameset.attrib.get('PersonId')
            team_id = frameset.attrib.get('TeamId')
            df.columns = pd.MultiIndex.from_product(
                [[team_id], [person_id], df.columns],
                names=['TeamId', 'PersonId', 'Position']
            )
            return df

        tracking_players = pd.concat([
            pd.DataFrame.from_records(
                [dict(frame.attrib) for frame in frameset.getchildren()]
            ).pipe(change_type, frameset=frameset)
             .set_index(['N', 'T'])
             .pipe(create_columns, frameset=frameset)
            for frameset in xml_positions.xpath(F'//Positions/FrameSet[@GameSection = "{half}"]')
        ], axis=1, sort=False)
        
        # this is casting the 'T' values in the index to datetime with the correct time zone
        tracking_players.index.set_levels(pd.to_datetime(tracking_players.index.get_level_values(1)), level=1, inplace = True)

        return tracking_players

In [None]:
positions=get_tracking_data(xml_positions)   
kpimerged = pd.read_excel('C:/Users/Jung/Desktop/Q-Index Master/19-06-12_Mainz_GER_EST Sportec Data/DFB_KPI_Merged.xlsx')
kpimergednew = kpimerged.rename({'FRAME_NUMBER': 'N'}, axis=1)

In [None]:
class DataReader:
  """
  Main class to read in xml files provided by the DFB.

  Returns:
      ElementTree root: The root of the xml document.

  """

  def __init__(self,xml_file):
    self.xml_root = self._load_data(xml_file)


  def _load_data(self,xml_file):
    tree = ET.parse(xml_file)
    root = tree.getroot()
    return root

  def create_dataframe(self,df_cols=None,findall_string=".//Object",time_search=None):
    """
    Create a dataframe from the xml inputs

    Args:
        df_cols (list): Optional, the column names. If no names given, columns
          detected automatically
        findall_string (str): The xpath string to iterate trees (https://www.w3schools.com/xml/xml_xpath.asp)
        time_search (float): Will currently look at parent if it's an Event, it
          will compare the time_search to the EventTime. I.e. this is a means of getting
          all passes before the frame time.

    Returns:
        pd.dataframe
    """

    populate_column_names=False
    if df_cols is None:
        df_cols = []
        populate_column_names=True

    out_df = pd.DataFrame(columns = df_cols)

    parent_map = dict((c, p) for p in self.xml_root.getiterator() for c in p)
    for node in self.xml_root.findall(findall_string):
        if time_search is not None:
          if (parent_map[node].tag != "Event"): continue
          event_time = parent_map[node].attrib["EventTime"]
          ts = ciso8601.parse_datetime(event_time)
          # to get time in seconds:
          seconds=time.mktime(ts.timetuple())
          # If timestamp is larger than the search, exit
          if seconds > time_search: continue

        # If column names are not provided, get them
        if populate_column_names==True:
            for key,value in node.attrib.items():
                df_cols.append(key)
                populate_column_names=False

        res = []
        for c in df_cols:
            res.append(node.attrib.get(c))
        out_df = out_df.append(pd.Series(res, index = df_cols), ignore_index=True)
    return out_df


In [None]:
path='C:/Users/Jung/Desktop/Q-Index Master/19-06-12_Mainz_GER_EST Sportec Data/'
Germany_player_info = "DFL_01_05_masterdata_DFL-CLU-000N8Y_DFL-SEA-0001K3_player (1).xml"
germany_team_data = DataReader(path+Germany_player_info)
germany_team_meta_df = germany_team_data.create_dataframe()
germany_player_IDs = germany_team_meta_df.ObjectId.tolist()
print(germany_player_IDs)

In [None]:
def directionplay(half,teamid):
    gk_df=germany_team_meta_df.loc[germany_team_meta_df['PlayingPositionGerman']=='Torwart']
    gks=gk_df['ObjectId']
    if half == 1:
        for items in gks.iteritems():
            if items[1] in str(positions.columns):
                gkpos=positions.loc[(slice(10000,50000),),(slice(None),items[1],['X','Y'])]
                if gkpos[teamid,items[1],'X'].mean() < 0:
                    dirofplay = 'left to right'
                    print(dirofplay)
                else:
                    dirofplay = 'right to left'
                    print(dirofplay)
    else:
        for items in gks.iteritems():
            if items[1] in str(positions.columns):
                gkpos=positions.loc[(slice(10000,77674),),(slice(None),items[1],['X','Y'])]
                if gkpos[teamid,items[1],'X'].mean() < 0:
                    dirofplay = 'right to left'
                    print(dirofplay)
                else:
                    dirofplay = 'left to right'
                    print(dirofplay)

In [None]:
half={} # frames der kickoff zeiten in frames
for i in kickoffs:
    if i != 0:
        framedif=kpimergednew.iloc[i].N -kpimergednew.iloc[i-2].N
        if framedif > 10000:
            half['2nd half']=kpimergednew.iloc[i].N
    else:
        half['1st half']=kpimergednew.iloc[i].N

In [8]:
display(HTML('<h1>Packing horizontal/vertikal</h1>'))

In [None]:
i=74
passdf=kpimergednew[(kpimergednew['SUBTYPE']=='Pass') & (kpimergednew['EVALUATION']=='successfullyComplete')]
passdf.reset_index(drop=True)
CUID1=passdf.CUID.unique()[0]
CUID2=passdf.CUID.unique()[1]
CUID=passdf.iloc[i,:].CUID
PUID1=passdf.iloc[i,:].PUID1
PUID2=passdf.iloc[i,:].PUID2
t1=int(passdf.iloc[i,:].N)
t2=int(passdf.iloc[i,:].RECFRM)
p1x=float(passdf.iloc[i,:].X_TRACKING)
p2x=float(passdf.iloc[i,:].XRec)
p1y=float(passdf.iloc[i,:].Y_TRACKING)
p2y=float(passdf.iloc[i,:].YRec)
if CUID == CUID1:
    if directionplayframe(t1,CUID) == 'left to right':
        moment1_h=np.count_nonzero(positions.loc[t1,(CUID2, slice(None), ['X'])]>p1x)
        moment2_h=np.count_nonzero(positions.loc[t2,(CUID2, slice(None), ['X'])]>p2x)
        moment1_v=np.count_nonzero(positions.loc[t1,(CUID2, slice(None), ['Y'])]>p1y)
        moment2_v=np.count_nonzero(positions.loc[t2,(CUID2, slice(None), ['Y'])]>p2y)
        pack_h=moment1_h-moment2_h
        pack_v=moment1_v-moment2_v
        print(pack_h)
        print(pack_v)
    else:
        moment1_h=np.count_nonzero(positions.loc[t1,(CUID2, slice(None), ['X'])]<p1x)
        moment2_h=np.count_nonzero(positions.loc[t2,(CUID2, slice(None), ['X'])]<p2x)
        moment1_v=np.count_nonzero(positions.loc[t1,(CUID2, slice(None), ['Y'])]>p1y)
        moment2_v=np.count_nonzero(positions.loc[t2,(CUID2, slice(None), ['Y'])]>p2y)
        pack_h=moment1_h-moment2_h
        pack_v=moment1_v-moment2_v
        print(pack_h)
        print(pack_v)
if CUID == CUID2:
    if directionplayframe(t1,CUID) =='left to right':
        moment1_h=np.count_nonzero(positions.loc[t1,(CUID1, slice(None), ['X'])]>p1x)
        moment2_h=np.count_nonzero(positions.loc[t2,(CUID1, slice(None), ['X'])]>p2x)
        moment1_v=np.count_nonzero(positions.loc[t1,(CUID1, slice(None), ['Y'])]>p1y)
        moment2_v=np.count_nonzero(positions.loc[t2,(CUID1, slice(None), ['Y'])]>p2y)
        pack_h=moment1_h-moment2_h
        pack_v=moment1_v-moment2_v
        print(pack_h)
        print(pack_v)
    else:
        moment1_h=np.count_nonzero(positions.loc[t1,(CUID1, slice(None), ['X'])]<p1x)
        moment2_h=np.count_nonzero(positions.loc[t2,(CUID1, slice(None), ['X'])]<p2x)
        moment1_v=np.count_nonzero(positions.loc[t1,(CUID1, slice(None), ['Y'])]>p1y)
        moment2_v=np.count_nonzero(positions.loc[t2,(CUID1, slice(None), ['Y'])]>p2y)
        pack_h=moment1_h-moment2_h
        pack_v=moment1_v-moment2_v
        print(pack_h)
        print(pack_v)
print(t1,t2)

In [9]:
display(HTML('<h1>Pass 90 Grad in Spielrichtung</h1>'))

In [None]:
i=80   ###90 Grad Spielrichtung
passdf=kpimergednew[(kpimergednew['SUBTYPE']=='Pass') & (kpimergednew['EVALUATION']=='successfullyComplete')]
passdf.reset_index(drop=True)
CUID1=passdf.CUID.unique()[0]
CUID2=passdf.CUID.unique()[1]
CUID=passdf.iloc[i,:].CUID
PUID1=passdf.iloc[i,:].PUID1
PUID2=passdf.iloc[i,:].PUID2
t1=int(passdf.iloc[i,:].N)
t2=int(passdf.iloc[i,:].RECFRM)
p1=np.array([float(passdf.iloc[i,:].X_TRACKING),float(passdf.iloc[i,:].Y_TRACKING)])
p2=np.array([float(passdf.iloc[i,:].XRec),float(passdf.iloc[i,:].YRec)])
if directionplayframe(t1,CUID) == 'left to right':
    triangle=np.array([float(max(passdf.X_TRACKING)),float(passdf.iloc[i,:].Y_TRACKING)])
    passline=p2-p1
    passtor=triangle-p1
    cosine_angle = np.dot(passline, passtor) / (np.linalg.norm(passline) * np.linalg.norm(passtor))
    angle = np.arccos(cosine_angle)
    print (np.degrees(angle))
if directionplayframe(t1,CUID) == 'right to left':
    triangle=np.array([float(min(passdf.X_TRACKING)),float(passdf.iloc[i,:].Y_TRACKING)])
    passline=p2-p1
    passtor=triangle-p1
    cosine_angle = np.dot(passline, passtor) / (np.linalg.norm(passline) * np.linalg.norm(passtor))
    angle = np.arccos(cosine_angle)
    print (np.degrees(angle))

In [10]:
display(HTML('<h1>Pass 90 Grad in Richtung Tormitte</h1>'))

In [None]:
i=80  #90 Grad Spielrichtung Tormitte
passdf=kpimergednew[(kpimergednew['SUBTYPE']=='Pass') & (kpimergednew['EVALUATION']=='successfullyComplete')]
passdf.reset_index(drop=True)
CUID1=passdf.CUID.unique()[0]
CUID2=passdf.CUID.unique()[1]
CUID=passdf.iloc[i,:].CUID
PUID1=passdf.iloc[i,:].PUID1
PUID2=passdf.iloc[i,:].PUID2
t1=int(passdf.iloc[i,:].N)
t2=int(passdf.iloc[i,:].RECFRM)
p1=np.array([float(passdf.iloc[i,:].X_TRACKING),float(passdf.iloc[i,:].Y_TRACKING)])
p2=np.array([float(passdf.iloc[i,:].XRec),float(passdf.iloc[i,:].YRec)])
if directionplayframe(t1,CUID) == 'left to right':
    triangle=np.array([float(max(passdf.X_TRACKING)),0])
    passline=p2-p1
    passtor=triangle-p1
    cosine_angle = np.dot(passline, passtor) / (np.linalg.norm(passline) * np.linalg.norm(passtor))
    angle = np.arccos(cosine_angle)
    print (np.degrees(angle))
if directionplayframe(t1,CUID) == 'right to left':
    triangle=np.array([float(min(passdf.X_TRACKING)),0])
    passline=p2-p1
    passtor=triangle-p1
    cosine_angle = np.dot(passline, passtor) / (np.linalg.norm(passline) * np.linalg.norm(passtor))
    angle = np.arccos(cosine_angle)
    print (np.degrees(angle))