In [1]:
import matplotlib.pyplot as plt
import numpy as np
from scipy.interpolate import CubicSpline
from mplsoccer.pitch import Pitch
import json
import pandas as pd
import re
from math import isnan, sqrt
from datetime import datetime, timedelta
from smoothingWithCurvatureSigns import performSmoothing, euclidianDistance
from preprocessingStats import players
import time
import pickle



In [2]:
import csv

def read_csv_partially(file_path, start_line, end_line):
    data = []  # List to store extracted CSV rows

    with open(file_path, 'r', newline='') as file:
        reader = csv.reader(file)
        for line_num, row in enumerate(reader):
            if line_num >= start_line and line_num <= end_line:
                # Append the row to data
                data.append(row)
            elif line_num > end_line:
                break  # Stop reading once end_line is reached

    return data

In [3]:
file_path_skillcorner = '../data/matches/vs FC Lugano/Preprocessed Skillcorner data/1296476_tracking.csv'
lineup_path = '../data/matches/vs FC Lugano/Preprocessed Skillcorner data/1296476_lineup.csv'
partialframes = read_csv_partially(file_path_skillcorner, 0, 1500000)

lineup = read_csv_partially(lineup_path, 0, 33)
# targetIDSkillcorner = 7679
targetIDSkillcorner = 59893
targetPositionsSkillcorner = []

def getTrajectories(teamName, lineup, partialframes):
    playerIDs = set()

    for player in list(filter(lambda player: player[1] == teamName, lineup[1:])):
        playerIDs.add(str(player[2]))

    targetTrajs = {}
    for ID in playerIDs:
        targetTrajs[ID] = []

    for index, playerFrame in enumerate(partialframes):

        if(playerFrame[4] not in playerIDs or playerFrame[8] == 'True'):
            continue

        if targetTrajs[playerFrame[4]] == [] or int(targetTrajs[playerFrame[4]][-1][-1][-1]) != (int(playerFrame[2]) - 1):
            targetTrajs[playerFrame[4]].append([])
        
        targetTrajs[playerFrame[4]][-1].append([float(playerFrame[5]), float(playerFrame[6]), -1, datetime.fromtimestamp(int(playerFrame[3])/1000.0), int(playerFrame[2])])
    return targetTrajs

targetTrajs = getTrajectories("FC Basel", lineup, partialframes)

In [4]:
def plotMovement(playerID, df, start_frame=34591, end_frame=34764):

    pitch = Pitch(pitch_type='skillcorner', axis=True, label=True,
                  pitch_length=105, pitch_width=68,
                  pitch_color='white', line_color='black',)

    df = df[df['object_id'] == playerID].copy().reset_index()

    fig, ax = pitch.draw(figsize=(20, 16), constrained_layout=True, tight_layout=False)
    window = [60,-60,60,-60]
    for i, frame in df.iterrows():
        # for i in range(1, len(frame)):
        if frame['frame_id'] >= start_frame and frame['frame_id'] <= end_frame:
            x = float(frame['x'])
            y = float(frame['y'])
            window = [min(x, window[0]), max(x, window[1]), min(y, window[2]), max(y, window[3])]

            if isnan(x) or isnan(y):
                continue
            ax.plot(x, y, 'ro', markersize=10, color='blue')
            if i > 1:
                x_prev = float(df.loc[i-1, 'x'])
                y_prev = float(df.loc[i-1, 'y'])
                if isnan(x_prev) or isnan(y_prev):
                    continue
                ax.plot([x_prev, x], [y_prev, y], 'r-', linewidth=2)
                ax.arrow(x_prev, y_prev, x - x_prev, y - y_prev, color='r', head_width=0.2, head_length=0.4)
    
    ax.set_xlim([window[0]-5, window[1]+5])
    ax.set_ylim([window[2]-5, window[3]+5])
    plt.show()


In [5]:
# add for loop that starts here and smootens every trajectory in the traj list
targetedPlayerID = 59893

its = [
    2, 
    5, 
    10,
    25,
    30,
    40,
    50,
    # 10, 
    # 50, 
    # 100, 
    # 150, 
    # 200
]

def extractSmoothDataFrames(targetTrajs, targetedPlayerID, its, freq=10):
    smoothDataFrames = []
    for it in its:
        playerSmoothDataFrames = pd.DataFrame()

        for player, trajs in targetTrajs.items():

            if player != str(targetedPlayerID):
                continue
            
            if player == []:
                continue

            for traj in trajs:
                if(len(traj) < 2):
                    continue

                new_traj = performSmoothing(datapoints=traj, iterations=it)
                new_traj = pd.DataFrame(new_traj)
                new_traj[5] = player
                traj = pd.DataFrame(traj)
                for idx, row in traj.iterrows():
                    if(isnan(row[4])):
                        print('nan')

                new_traj[4] = traj[4]

                playerSmoothDataFrames = pd.concat([playerSmoothDataFrames, new_traj], axis=0, ignore_index=True)
        
        playerSmoothDataFrames[4] = playerSmoothDataFrames[4].astype(int)
        playerSmoothDataFrames.columns = ['x', 'y', 'arc_length', 'time', 'frame_id', 'object_id']

        playerSmoothDataFrames['speed'] = playerSmoothDataFrames['arc_length']*freq
        playerSmoothDataFrames.loc[playerSmoothDataFrames['arc_length'] == -1, 'speed'] = None

        playerSmoothDataFrames['frame_id'] = playerSmoothDataFrames['frame_id'].astype(int)
        playerSmoothDataFrames['x'] = playerSmoothDataFrames['x'].astype(float)
        playerSmoothDataFrames['y'] = playerSmoothDataFrames['y'].astype(float)

        smoothDataFrames.append(playerSmoothDataFrames.copy())
    return smoothDataFrames

originalDF = pd.DataFrame(partialframes[1:])
originalDF.columns = ['match_id', 'half', 'frame_id', 'timestamp', 'object_id', 'x', 'y', 'z', 'extrapolated']
originalDF['frame_id'] = originalDF['frame_id'].astype(int)

skillCornerSmoothDataFrames = {}
for name, skillCornerID, statsPerformID in players:
    print(f'Extracting smooth dataframes for {name}')
    start_time = time.time()
    skillCornerSmoothDataFrames[name] = extractSmoothDataFrames(targetTrajs, skillCornerID, its)
    end_time = time.time()
    print(f'Elapsed time for {name}: {end_time - start_time}')

Extracting smooth dataframes for Barry
Elapsed time for Barry: 11.7652108669281
Extracting smooth dataframes for Avdullahu
Elapsed time for Avdullahu: 30.15395498275757
Extracting smooth dataframes for Sigua
Elapsed time for Sigua: 33.3053297996521
Extracting smooth dataframes for Beney
Elapsed time for Beney: 43.22356200218201
Extracting smooth dataframes for Dräger
Elapsed time for Dräger: 31.806608200073242
Extracting smooth dataframes for Palma Veiga
Elapsed time for Palma Veiga: 118.1125431060791
Extracting smooth dataframes for van Breemen
Elapsed time for van Breemen: 66.46555685997009
Extracting smooth dataframes for Frei
Elapsed time for Frei: 67.9970440864563
Extracting smooth dataframes for Schmid
Elapsed time for Schmid: 91.42551398277283
Extracting smooth dataframes for Kade
Elapsed time for Kade: 124.23793005943298
Extracting smooth dataframes for Jovanović
Elapsed time for Jovanović: 139.6988489627838
Extracting smooth dataframes for Xhaka
Elapsed time for Xhaka: 100.376

In [14]:
mergedSmoothDataFrames = {}
for name, skillCornerID, statsPerformID in players:
    playerOriginalDF = originalDF[originalDF['object_id'] == str(skillCornerID)].copy()
    mergedSmoothDataFrames[name] = []

    for i, df in enumerate(skillCornerSmoothDataFrames[name]):
        mergedDF = playerOriginalDF.merge(df, how='left', on=['object_id', 'frame_id'])
        mergedDF['x_x'] = mergedDF['x_y'].fillna(mergedDF['x_x'])
        mergedDF['y_x'] = mergedDF['y_y'].fillna(mergedDF['y_x'])

        mergedDF.drop(columns=['x_y', 'y_y', 'time'], inplace=True)
        mergedDF.columns = ['match_id', 'half', 'frame_id', 'timestamp', 'object_id', 'x', 'y', 'z', 'extrapolated', 'arc_length', 'speed']
        mergedDF['timestamp'] = mergedDF['timestamp'].astype(int)
        mergedDF['frame_id'] = mergedDF['frame_id'].astype(int)
        mergedDF.dropna(subset=['arc_length'], inplace=True)
        mergedDF.reset_index(drop=True, inplace=True)
        mergedSmoothDataFrames[name].append(mergedDF.copy())

# extractedDF = mergedSmoothDataFrames["Schmid"][0]
# extractedDF

**Player Physical Metrics**

In [15]:
def extractMetrics(playerDF):
    maxSpeed = 0
    distanceCovered = 0
    metrics = {
        'maxSpeed': playerDF['speed'].max(),
        'distanceCovered': 0,
        'maximumSpeedSustained': 0,
        'sprintCount': 0,
    }

    # get sustained speed
    idxmax = playerDF['speed'].idxmax()
    l = idxmax
    h = l
    while (h < len(playerDF) and l >= 0):
        if(abs(playerDF.loc[h, 'speed'] - metrics['maxSpeed']) < 0.28):
            h += 1
        elif(abs(playerDF.loc[l, 'speed'] - metrics['maxSpeed']) < 0.28):
            l -= 1
        else:
            break
    metrics['maximumSpeedSustained'] = playerDF.loc[h, 'timestamp'] - playerDF.loc[l, 'timestamp']
    startOfSprintIDX = -1
    # handle unsmoothed data
    if("arc_length" not in playerDF.columns):
        for i, row in playerDF.iterrows():
            if(i == 0):
                continue
            if(row['frame_id'] == playerDF.loc[i-1, 'frame_id'] + 1):
                distanceCovered += euclidianDistance([row['x'], row['y']], [playerDF.loc[i-1, 'x'], playerDF.loc[i-1, 'y']])

            if(row['speed'] > 6.94):
                if(startOfSprintIDX == -1):
                    startOfSprintIDX = i
            else:
                if(startOfSprintIDX != -1):
                    timeOfSprint = playerDF.loc[i-1, 'timestamp'] - playerDF.loc[startOfSprintIDX, 'timestamp']
                    if(timeOfSprint > 700):
                        metrics['sprintCount'] += 1
                    startOfSprintIDX = -1
            
    # handle smoothened data
    else:
        for i, row in playerDF.iterrows():
            if(i == 0):
                continue
            arclength = row['arc_length']
            prevArclength = playerDF.loc[i-1, 'arc_length']
            if(arclength == -1 and prevArclength == -1):
                continue
            if((arclength != -1 and prevArclength == -1) or (arclength == -1 and prevArclength != -1)):
                distanceCovered += euclidianDistance([row['x'], row['y']], [playerDF.loc[i-1, 'x'], playerDF.loc[i-1, 'y']])
            else:
                distanceCovered += arclength/2 + prevArclength/2

            if(row['speed'] > 6.94):
                if(startOfSprintIDX == -1):
                    startOfSprintIDX = i
            else:
                if(startOfSprintIDX != -1):
                    timeOfSprint = playerDF.loc[i-1, 'timestamp'] - playerDF.loc[startOfSprintIDX, 'timestamp']
                    if(timeOfSprint > 700):
                        metrics['sprintCount'] += 1
                    startOfSprintIDX = -1
            
                
    metrics['distanceCovered'] = distanceCovered   
    return metrics

In [16]:
playerMetrics = pd.DataFrame()
for name, skillCornerID, _ in players:
    playerDF = mergedSmoothDataFrames[name][0]
    frames = mergedSmoothDataFrames[name][0]['frame_id']

    originaltargetedPlayerDF = originalDF[originalDF['object_id'] == str(skillCornerID)].copy().reset_index(drop=True)
    originaltargetedPlayerDF = pd.merge(originaltargetedPlayerDF, frames, how="right", on=['frame_id'])

    originaltargetedPlayerDF['x'] = originaltargetedPlayerDF['x'].astype(float)
    originaltargetedPlayerDF['y'] = originaltargetedPlayerDF['y'].astype(float)

    extractedDF = originaltargetedPlayerDF
    
    originaltargetedPlayerDF['timestamp'] = originaltargetedPlayerDF['timestamp'].astype(int)

    euclidean_distance1 = np.sqrt((originaltargetedPlayerDF['x'] - originaltargetedPlayerDF['x'].shift(1))**2 + (originaltargetedPlayerDF['y'] - originaltargetedPlayerDF['y'].shift(1))**2)
    euclidean_distance2 = np.sqrt((originaltargetedPlayerDF['x'] - originaltargetedPlayerDF['x'].shift(-1))**2 + (originaltargetedPlayerDF['y'] - originaltargetedPlayerDF['y'].shift(-1))**2)

    timediff1 = originaltargetedPlayerDF['timestamp'] - originaltargetedPlayerDF['timestamp'].shift(1)
    timediff2 = originaltargetedPlayerDF['timestamp'].shift(-1) - originaltargetedPlayerDF['timestamp']

    originaltargetedPlayerDF['speed'] = (euclidean_distance1+euclidean_distance2)/(timediff1+timediff2)*1000

    originaltargetedPlayerDF['timediff'] = (originaltargetedPlayerDF['timestamp'] - originaltargetedPlayerDF['timestamp'].shift(1)) + (originaltargetedPlayerDF['timestamp'].shift(-1) - originaltargetedPlayerDF['timestamp'])

    # if timediff is larger than 200, put speed to NaN
    originaltargetedPlayerDF.loc[originaltargetedPlayerDF['timediff'] > 200, 'speed'] = np.nan

    ogMetrics = pd.DataFrame([extractMetrics(originaltargetedPlayerDF)])
    ogMetrics['iteration'] = 0
    ogMetrics['name'] = name
    playerMetrics = pd.concat([playerMetrics, ogMetrics], axis=0)

    for i, df in enumerate(mergedSmoothDataFrames[name]):
        metrics = pd.DataFrame([extractMetrics(df)])
        metrics['iteration'] = its[i]
        metrics['name'] = name
        playerMetrics = pd.concat([playerMetrics, metrics], axis=0)


In [17]:
playerMetrics = playerMetrics[['name', 'iteration', 'maxSpeed', 'distanceCovered', 'maximumSpeedSustained', 'sprintCount']]
with open('dataframes/withCurvatureSign/totalMetricsSkillCornerSmallerIts.pkl', 'wb') as f:
    pickle.dump(playerMetrics, f)

In [19]:
with open('dataframes/withCurvatureSign/totalMetricsSkillCornerSmallerIts.pkl', 'rb') as f:
    playerMetrics = pickle.load(f)
    print(playerMetrics[playerMetrics['name'] == 'Kade'])


   name  iteration  maxSpeed  distanceCovered  maximumSpeedSustained  \
0  Kade          0  9.101897      6471.924846                    800   
0  Kade          2  9.073897      6470.937821                    800   
0  Kade          5  9.035275      6469.787748                    900   
0  Kade         10  8.982044      6468.247479                    900   
0  Kade         25  8.857682      6463.931492                   1100   
0  Kade         30  8.824976      6462.537230                   1100   
0  Kade         40  8.761599      6459.824212                   1200   
0  Kade         50  8.706413      6457.204391                   1200   

   sprintCount  
0           16  
0           17  
0           17  
0           17  
0           14  
0           14  
0           12  
0           10  


In [11]:
# metrics = []
# for df in skillCornerSmoothDataFrames:
#     metrics.append(extractMetrics(df))
    
# maxSpeedList = list(map(lambda m: m['maxSpeed'], metrics))
# print(maxSpeedList)
# plt.plot(its, maxSpeedList, label='Cumulated Maximum Speed of trajectories')
# plt.xlabel('Iterations')  # Replace 'Index' with the actual x-axis label
# plt.ylabel('Max Speed [m/s]')  # Replace 'Value' with the actual y-axis label  
# plt.show()
# metrics = pd.DataFrame(metrics)
# metrics['iterations'] = its
# metrics = metrics[['iterations', 'maxSpeed', 'maximumSpeedSustained', 'sprintCount', 'distanceCovered']]
# metrics

As we can see, there is a significant jump in speed when using extrapolated data at frame 49780. Data at this frame says that the time difference between previous and current frame is still only 100ms => high speed. If we ignore extrapolated frames, we basically get something very similar to the smoothened metrics 

In [12]:
def smoothSkillcornerDataFrame(tracking_file_path, lineup_path, iterations, lines_to_smooth=2000000, teamName="FC Basel"):
    partialframes = read_csv_partially(tracking_file_path, 0, lines_to_smooth)
    lineup = read_csv_partially(lineup_path, 0, 33)

    targetTrajs = getTrajectories(teamName, lineup, partialframes)
    
    smoothDataFrames = pd.DataFrame()

    for player, trajs in targetTrajs.items():
        if player == []:
            continue

        for traj in trajs:
            if(len(traj) < 2):
                continue

            new_traj = performSmoothing(datapoints=traj, iterations=iterations)
            new_traj = pd.DataFrame(new_traj)
            new_traj[5] = player
            traj = pd.DataFrame(traj)
            for idx, row in traj.iterrows():
                if(isnan(row[4])):
                    print('nan')

            new_traj[4] = traj[4]

            smoothDataFrames = pd.concat([smoothDataFrames, new_traj], axis=0, ignore_index=True)

    origDF = pd.DataFrame(partialframes[1:])

    smoothDataFrames[4] = smoothDataFrames[4].astype(int)

    smoothDataFrames.columns = ['x', 'y', 'arc_length', 'time', 'frame_id', 'object_id']
    origDF.columns = ['match_id', 'half', 'frame_id', 'timestamp', 'object_id', 'x', 'y', 'z', 'extrapolated']

    origDF['frame_id'] = origDF['frame_id'].astype(int)
    mergedDF = origDF.merge(smoothDataFrames, how='left', on=['object_id', 'frame_id'])
    mergedDF['speed'] = mergedDF['arc_length']*10
    mergedDF = mergedDF[['match_id', 'half', 'frame_id', 'timestamp', 'object_id', 'x_y', 'y_y', 'z', 'extrapolated', 'speed']]
    mergedDF.columns = ['match_id', 'half', 'frame_id', 'timestamp', 'object_id', 'x', 'y', 'z', 'extrapolated', 'speed']
   
    return mergedDF


In [13]:
# totalMetrics = pd.DataFrame(columns=['name', 'metrics'])
# # with open('dataframes/totalMetrics.pkl', 'wb') as file:
# #     pickle.dump(totalMetrics, file)
# metrics = pd.DataFrame({'name': 'giani', 'metrics': 0}, index=[0])
# totalMetrics = pd.concat([totalMetrics, metrics], axis=0)
# print(totalMetrics)
# # print(totalMetrics)

# with open('dataframes/totalMetricsStatsPerform.pkl', 'rb') as file:
#     loaded_df = pickle.load(file)
#     # print(loaded_df)
#     print(loaded_df[loaded_df['name'] =='Dräger'])