In [2]:
import matplotlib.pyplot as plt
import numpy as np
from scipy.interpolate import CubicSpline
from mplsoccer.pitch import Pitch
import json
import pandas as pd
import re
from math import isnan, sqrt
from datetime import datetime, timedelta
from smoothing import performSmoothing, euclidianDistance



In [3]:
import csv

def read_csv_partially(file_path, start_line, end_line):
    data = []  # List to store extracted CSV rows

    with open(file_path, 'r', newline='') as file:
        reader = csv.reader(file)
        for line_num, row in enumerate(reader):
            if line_num >= start_line and line_num <= end_line:
                # Append the row to data
                data.append(row)
            elif line_num > end_line:
                break  # Stop reading once end_line is reached

    return data

In [5]:
file_path_skillcorner = '../data/matches/Feb/1384039_tracking.csv'
lineup_path = '../data/matches/Feb/1384039_lineup.csv'
partialframes = read_csv_partially(file_path_skillcorner, 0, 1500000)

lineup = read_csv_partially(lineup_path, 0, 33)
# targetIDSkillcorner = 7679
targetIDSkillcorner = 10218
targetPositionsSkillcorner = []

def getTrajectories(teamName, lineup, partialframes):
    playerIDs = set()

    for player in list(filter(lambda player: player[1] == teamName, lineup[1:])):
        playerIDs.add(str(player[2]))

    targetTrajs = {}
    playerSmoothDataFrames = pd.DataFrame()
    for ID in playerIDs:
        targetTrajs[ID] = []
        playerSmoothDataFrames = pd.DataFrame()

    for index, playerFrame in enumerate(partialframes):

        if(playerFrame[4] not in playerIDs or playerFrame[8] == 'True'):
            continue

        if targetTrajs[playerFrame[4]] == [] or int(targetTrajs[playerFrame[4]][-1][-1][-1]) != (int(playerFrame[2]) - 1):
            targetTrajs[playerFrame[4]].append([])
            
        targetTrajs[playerFrame[4]][-1].append([float(playerFrame[5]), float(playerFrame[6]), 0, datetime.fromtimestamp(int(playerFrame[3])/1000.0), int(playerFrame[2])])
    return targetTrajs

targetTrajs = getTrajectories("FC Basel", lineup, partialframes)

In [6]:
def plotMovement(playerID, df, start_frame=34591, end_frame=34764):

    pitch = Pitch(pitch_type='skillcorner', axis=True, label=True,
                  pitch_length=105, pitch_width=68,
                  pitch_color='white', line_color='black',)

    df = df[df['object_id'] == playerID].copy().reset_index()

    fig, ax = pitch.draw(figsize=(20, 16), constrained_layout=True, tight_layout=False)
    window = [60,-60,60,-60]
    for i, frame in df.iterrows():
        # for i in range(1, len(frame)):
        if frame['frame_id'] >= start_frame and frame['frame_id'] <= end_frame:
            x = float(frame['x'])
            y = float(frame['y'])
            window = [min(x, window[0]), max(x, window[1]), min(y, window[2]), max(y, window[3])]

            if isnan(x) or isnan(y):
                continue
            ax.plot(x, y, 'ro', markersize=10, color='blue')
            if i > 1:
                x_prev = float(df.loc[i-1, 'x'])
                y_prev = float(df.loc[i-1, 'y'])
                if isnan(x_prev) or isnan(y_prev):
                    continue
                ax.plot([x_prev, x], [y_prev, y], 'r-', linewidth=2)
                ax.arrow(x_prev, y_prev, x - x_prev, y - y_prev, color='r', head_width=0.2, head_length=0.4)
    
    ax.set_xlim([window[0]-5, window[1]+5])
    ax.set_ylim([window[2]-5, window[3]+5])
    plt.show()


In [7]:
# pitch = Pitch(pitch_type='skillcorner', axis=True, label=True,
#                   pitch_length=105, pitch_width=68,
#                   pitch_color='white', line_color='black',)

# fig, ax = pitch.draw(figsize=(20, 16), constrained_layout=True, tight_layout=False)
# frame = performSmoothing(giani, 1)
# for i in range(1, len(frame)):
#     x = frame[i][0]
#     y = frame[i][1]
#     if isnan(x) or isnan(y):
#         continue
#     ax.plot(x, y, 'ro', markersize=20, color='blue')
#     if i > 1:
#         x_prev = frame[i-1][0]
#         y_prev = frame[i-1][1]
#         if isnan(x_prev) or isnan(y_prev):
#             continue
#         ax.plot([x_prev, x], [y_prev, y], 'r-')
#         ax.arrow(x_prev, y_prev, x - x_prev, y - y_prev, color='r')
# ax.set_xlim([-2.5, -2.4])
# ax.set_ylim([10.15, 10.2])
# plt.show()

In [8]:
# add for loop that starts here and smootens every trajectory in the traj list
playerSmoothDataFrames = pd.DataFrame()

for player, trajs in targetTrajs.items():
    if player == []:
        continue

    for traj in trajs:
        if(len(traj) < 2):
            continue

        new_traj = performSmoothing(datapoints=traj, iterations=1)
        new_traj = pd.DataFrame(new_traj)
        new_traj[5] = player
        traj = pd.DataFrame(traj)
        for idx, row in traj.iterrows():
            if(isnan(row[4])):
                print('nan')

        new_traj[4] = traj[4]

        playerSmoothDataFrames = pd.concat([playerSmoothDataFrames, new_traj], axis=0, ignore_index=True)

originalDF = pd.DataFrame(partialframes[1:])

playerSmoothDataFrames[4] = playerSmoothDataFrames[4].astype(int)

playerSmoothDataFrames.columns = ['x', 'y', 'arc_length', 'time', 'frame_id', 'object_id']
originalDF.columns = ['match_id', 'half', 'frame_id', 'timestamp', 'object_id', 'x', 'y', 'z', 'extrapolated']

originalDF['frame_id'] = originalDF['frame_id'].astype(int)
mergedDF = originalDF.merge(playerSmoothDataFrames, how='left', on=['object_id', 'frame_id'])


In [9]:
# mergedDF

Unnamed: 0,match_id,half,frame_id,timestamp,object_id,x_x,y_x,z,extrapolated,x_y,y_y,arc_length,time
0,1384039,1,100,0,-1,-0.71,0.86,0.23,True,,,,NaT
1,1384039,1,100,0,34469,-39.27,0.41,0.0,True,,,,NaT
2,1384039,1,100,0,22148,-20.31,-2.7,0.0,True,,,,NaT
3,1384039,1,100,0,644216,-19.77,6.54,0.0,True,,,,NaT
4,1384039,1,100,0,59898,-21.07,-12.92,0.0,True,,,,NaT
...,...,...,...,...,...,...,...,...,...,...,...,...,...
1067563,1384039,2,61042,3084200,30524,41.17,3.73,0.0,False,,,,NaT
1067564,1384039,2,61042,3084200,5270,30.09,6.81,0.0,False,,,,NaT
1067565,1384039,2,61042,3084200,174962,42.3,29.71,0.0,False,,,,NaT
1067566,1384039,2,61042,3084200,397985,42.84,-13.33,0.0,True,,,,NaT


In [10]:
newDF = mergedDF.copy()
newDF['x_x'] = newDF['x_y'].fillna(newDF['x_x'])
newDF['y_x'] = newDF['y_y'].fillna(newDF['y_x'])

newDF.drop(columns=['x_y', 'y_y', 'time'], inplace=True)
newDF.columns = ['match_id', 'half', 'frame_id', 'timestamp', 'object_id', 'x', 'y', 'z', 'extrapolated', 'arc_length']
newDF['timestamp'] = newDF['timestamp'].astype(int)

# newDF['timediff'] = (newDF['timestamp'] - newDF['timestamp'].shift(1))/2 + (newDF['timestamp'].shift(-1) - newDF['timestamp'])/2
# newDF['speed'] = newDF['arc_length']/newDF['timediff']*1000

newDF['speed'] = newDF['arc_length']*10

# newDF

Unnamed: 0,match_id,half,frame_id,timestamp,object_id,x,y,z,extrapolated,arc_length,speed
0,1384039,1,100,0,-1,-0.71,0.86,0.23,True,,
1,1384039,1,100,0,34469,-39.27,0.41,0.0,True,,
2,1384039,1,100,0,22148,-20.31,-2.7,0.0,True,,
3,1384039,1,100,0,644216,-19.77,6.54,0.0,True,,
4,1384039,1,100,0,59898,-21.07,-12.92,0.0,True,,
...,...,...,...,...,...,...,...,...,...,...,...
1067563,1384039,2,61042,3084200,30524,41.17,3.73,0.0,False,,
1067564,1384039,2,61042,3084200,5270,30.09,6.81,0.0,False,,
1067565,1384039,2,61042,3084200,174962,42.3,29.71,0.0,False,,
1067566,1384039,2,61042,3084200,397985,42.84,-13.33,0.0,True,,


In [11]:
# plotMovement('3233', originalDF, 49770, 49790)
# plotMovement('3233', newDF, 60940, 60980)

**Player Physical Metrics**

In [12]:
targetedPlayerID = 3233 # ID of the player that we're getting the metrics from

targetedPlayerDF = pd.DataFrame(newDF[newDF['object_id'] == str(targetedPlayerID)])
targetedPlayerDF['x'] = targetedPlayerDF['x'].astype(float)
targetedPlayerDF['y'] = targetedPlayerDF['y'].astype(float)

euclidean_distance1 = np.sqrt((targetedPlayerDF['x'] - targetedPlayerDF['x'].shift(1))**2 + (targetedPlayerDF['y'] - targetedPlayerDF['y'].shift(1))**2).fillna(0)
euclidean_distance2 = np.sqrt((targetedPlayerDF['x'] - targetedPlayerDF['x'].shift(-1))**2 + (targetedPlayerDF['y'] - targetedPlayerDF['y'].shift(-1))**2).fillna(0)

# Use the Series to fill the NaNs
targetedPlayerDF['arc_length'] = targetedPlayerDF['arc_length'].fillna((euclidean_distance1 + euclidean_distance2)/2)

# targetedPlayerDF['timediff'] = (targetedPlayerDF['timestamp'] - targetedPlayerDF['timestamp'].shift(1))/2 + (targetedPlayerDF['timestamp'].shift(-1) - targetedPlayerDF['timestamp'])/2
# targetedPlayerDF['speed'] = targetedPlayerDF['arc_length']/targetedPlayerDF['timediff']*1000

targetedPlayerDF['distance_covered'] = targetedPlayerDF['arc_length'].cumsum() - euclidean_distance2/2
targetedPlayerDF['max_speed'] = targetedPlayerDF['speed'].ffill().cummax()

# targetedPlayerDF

Unnamed: 0,match_id,half,frame_id,timestamp,object_id,x,y,z,extrapolated,arc_length,speed,distance_covered,max_speed
14,1384039,1,100,0,3233,15.91,-16.62,0.0,True,0.030414,,0.000000,
37,1384039,1,101,100,3233,15.90,-16.68,0.0,True,0.051029,,0.060828,
60,1384039,1,102,200,3233,15.89,-16.72,0.0,True,0.042976,,0.102059,
83,1384039,1,103,300,3233,15.87,-16.76,0.0,True,0.036503,,0.146780,
106,1384039,1,104,400,3233,15.85,-16.78,0.0,True,0.025322,,0.175064,
...,...,...,...,...,...,...,...,...,...,...,...,...,...
1067456,1384039,2,61038,3083800,3233,43.85,1.01,0.0,True,0.360048,,9621.472273,12.611391
1067479,1384039,2,61039,3083900,3233,43.63,1.30,0.0,True,0.361028,,9621.836278,12.611391
1067502,1384039,2,61040,3084000,3233,43.42,1.59,0.0,True,0.352374,,9622.194329,12.611391
1067525,1384039,2,61041,3084100,3233,43.23,1.88,0.0,True,0.353627,,9622.541027,12.611391


In [13]:
targetedPlayerID = 3233 # ID of the player that we're getting the metrics from

originaltargetedPlayerDF = pd.DataFrame(originalDF[originalDF['object_id'] == str(targetedPlayerID)])

originaltargetedPlayerDF['arc_length'] = np.nan 
originaltargetedPlayerDF['x'] = originaltargetedPlayerDF['x'].astype(float)
originaltargetedPlayerDF['y'] = originaltargetedPlayerDF['y'].astype(float)
originaltargetedPlayerDF['timestamp'] = originaltargetedPlayerDF['timestamp'].astype(int)

euclidean_distance1 = np.sqrt((originaltargetedPlayerDF['x'] - originaltargetedPlayerDF['x'].shift(1))**2 + (originaltargetedPlayerDF['y'] - originaltargetedPlayerDF['y'].shift(1))**2).fillna(0)
euclidean_distance2 = np.sqrt((originaltargetedPlayerDF['x'] - originaltargetedPlayerDF['x'].shift(-1))**2 + (originaltargetedPlayerDF['y'] - originaltargetedPlayerDF['y'].shift(-1))**2).fillna(0)

# Use the Series to fill the NaNs
originaltargetedPlayerDF['arc_length'] = originaltargetedPlayerDF['arc_length'].fillna((euclidean_distance1 + euclidean_distance2)/2)

originaltargetedPlayerDF['distance_covered'] = originaltargetedPlayerDF['arc_length'].cumsum() - euclidean_distance2/2
originaltargetedPlayerDF['timediff'] = (originaltargetedPlayerDF['timestamp'] - originaltargetedPlayerDF['timestamp'].shift(1))/2 + (originaltargetedPlayerDF['timestamp'].shift(-1) - originaltargetedPlayerDF['timestamp'])/2

originaltargetedPlayerDF['speed'] = originaltargetedPlayerDF['arc_length']/originaltargetedPlayerDF['timediff']*1000
originaltargetedPlayerDF['max_speed'] = originaltargetedPlayerDF['speed'].ffill().cummax()

# originaltargetedPlayerDF

Unnamed: 0,match_id,half,frame_id,timestamp,object_id,x,y,z,extrapolated,arc_length,distance_covered,timediff,speed,max_speed
14,1384039,1,100,0,3233,15.91,-16.62,0.0,True,0.030414,0.000000,,,
37,1384039,1,101,100,3233,15.90,-16.68,0.0,True,0.051029,0.060828,100.0,0.510293,0.510293
60,1384039,1,102,200,3233,15.89,-16.72,0.0,True,0.042976,0.102059,100.0,0.429762,0.510293
83,1384039,1,103,300,3233,15.87,-16.76,0.0,True,0.036503,0.146780,100.0,0.365028,0.510293
106,1384039,1,104,400,3233,15.85,-16.78,0.0,True,0.025322,0.175064,100.0,0.253225,0.510293
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1067456,1384039,2,61038,3083800,3233,43.85,1.01,0.0,True,0.360048,9749.466249,100.0,3.600477,52.049467
1067479,1384039,2,61039,3083900,3233,43.63,1.30,0.0,True,0.361028,9749.830255,100.0,3.610279,52.049467
1067502,1384039,2,61040,3084000,3233,43.42,1.59,0.0,True,0.352374,9750.188305,100.0,3.523745,52.049467
1067525,1384039,2,61041,3084100,3233,43.23,1.88,0.0,True,0.353627,9750.535004,100.0,3.536269,52.049467


As we can see, there is a significant jump in speed when using extrapolated data at frame 49780. Data at this frame says that the time difference between previous and current frame is still only 100ms => high speed. If we ignore extrapolated frames, we basically get something very similar to the smoothened metrics 

In [14]:
def smoothSkillcornerDataFrame(tracking_file_path, lineup_path, iterations, lines_to_smooth=2000000, teamName="FC Basel"):
    partialframes = read_csv_partially(tracking_file_path, 0, lines_to_smooth)
    lineup = read_csv_partially(lineup_path, 0, 33)

    targetTrajs = getTrajectories(teamName, lineup, partialframes)
    
    smoothDataFrames = pd.DataFrame()

    for player, trajs in targetTrajs.items():
        if player == []:
            continue

        for traj in trajs:
            if(len(traj) < 2):
                continue

            new_traj = performSmoothing(datapoints=traj, iterations=iterations)
            new_traj = pd.DataFrame(new_traj)
            new_traj[5] = player
            traj = pd.DataFrame(traj)
            for idx, row in traj.iterrows():
                if(isnan(row[4])):
                    print('nan')

            new_traj[4] = traj[4]

            smoothDataFrames = pd.concat([smoothDataFrames, new_traj], axis=0, ignore_index=True)

    origDF = pd.DataFrame(partialframes[1:])

    smoothDataFrames[4] = smoothDataFrames[4].astype(int)

    smoothDataFrames.columns = ['x', 'y', 'arc_length', 'time', 'frame_id', 'object_id']
    origDF.columns = ['match_id', 'half', 'frame_id', 'timestamp', 'object_id', 'x', 'y', 'z', 'extrapolated']

    origDF['frame_id'] = origDF['frame_id'].astype(int)
    mergedDF = origDF.merge(smoothDataFrames, how='left', on=['object_id', 'frame_id'])
    mergedDF['speed'] = mergedDF['arc_length']*10

   
    return mergedDF


In [16]:
resultDF = smoothSkillcornerDataFrame(file_path_skillcorner, lineup_path, 10, 2000000, "FC Basel")
resultDF

Unnamed: 0,match_id,half,frame_id,timestamp,object_id,x_x,y_x,z,extrapolated,x_y,y_y,arc_length,time
0,1384039,1,100,0,-1,-0.71,0.86,0.23,True,,,,NaT
1,1384039,1,100,0,34469,-39.27,0.41,0.0,True,,,,NaT
2,1384039,1,100,0,22148,-20.31,-2.7,0.0,True,,,,NaT
3,1384039,1,100,0,644216,-19.77,6.54,0.0,True,,,,NaT
4,1384039,1,100,0,59898,-21.07,-12.92,0.0,True,,,,NaT
...,...,...,...,...,...,...,...,...,...,...,...,...,...
1067563,1384039,2,61042,3084200,30524,41.17,3.73,0.0,False,,,,NaT
1067564,1384039,2,61042,3084200,5270,30.09,6.81,0.0,False,,,,NaT
1067565,1384039,2,61042,3084200,174962,42.3,29.71,0.0,False,,,,NaT
1067566,1384039,2,61042,3084200,397985,42.84,-13.33,0.0,True,,,,NaT
