# Importation des données

In [1]:
import sqlite3
import pandas as pd

In [2]:
connect = sqlite3.connect("../Databases/raw-database.db")
cursor = connect.cursor()

In [3]:
req = cursor.execute(f"SELECT * FROM tracking_data")
res = req.fetchall()
desc = req.description
tracking_data = pd.DataFrame(res)
tracking_data.columns = [i[0] for i in desc]

In [4]:
nb_tracking_data = len(tracking_data)
nb_tracking_data

467914

---
# Etudes des colonnes comportant des valeurs Nan

In [5]:
tracking_data.isna().any()

track_id            False
trackable_object    False
is_visible          False
x                   False
y                   False
vx                   True
vy                   True
speed_norm           True
accel_norm           True
z                    True
frame               False
match_id_SKC        False
dtype: bool

---
# Etude des colonnes "vx", "vy", "speed_norm" et "accel_norm" pour le ballon (trackable_object 55)

In [6]:

print(tracking_data[tracking_data.trackable_object == 55].vx.unique(),
      tracking_data[tracking_data.trackable_object == 55].vy.unique(),
      tracking_data[tracking_data.trackable_object == 55].speed_norm.unique(),
      tracking_data[tracking_data.trackable_object == 55].accel_norm.unique())

[nan] [nan] [nan] [nan]


On remarque que ces colonnes ne contiennent que des valeurs nan pour le ballon : on ne lui attribue ni vitesse, ni acceleration

---
# Etudes des valeurs Nan pour la colonne "vx" et "vy" hormis pour le ballon

In [8]:
# Index des tracks correspondants a des valeurs Nan
index = tracking_data.index[((tracking_data.vx.isna()) | (tracking_data.vy.isna())) & (tracking_data.trackable_object != 55)]

In [9]:
tracking_data.loc[index]

Unnamed: 0,track_id,trackable_object,is_visible,x,y,vx,vy,speed_norm,accel_norm,z,frame,match_id_SKC
250828,33027.0,33027.0,0,41.1,5.89,,,,,,72949,1377411
264174,34965.0,34965.0,0,-31.76,-1.28,,,,,,72579,1385352
264175,33027.0,33027.0,0,0.39,-14.57,,,,,,72579,1385352
264176,20484.0,20484.0,0,2.38,-5.36,,,,,,72579,1385352
264177,13578.0,13578.0,0,11.29,-3.05,,,,,,72579,1385352
264178,7387.0,7387.0,1,35.86,-20.01,,,,,,72579,1385352
264179,25486.0,25486.0,0,17.5,6.91,,,,,,72579,1385352
264180,7350.0,7350.0,0,19.82,2.87,,,,,,72579,1385352
264181,18582.0,18582.0,1,21.49,-15.21,,,,,,72579,1385352
264182,28840.0,28840.0,1,38.98,-9.61,,,,,,72579,1385352


Nous n'avons pas beaucoup d'informations concernant les valeurs manquantes pour ces colonnes.
Cependant, grâce à la documentation ci-dessous, nous pouvons deviner que ces valeurs "nan" font référence à des tracks d'objet dont on dispose moins de 5 frames, donc Skill Corner ne peut pas calculer la vitesse de l'objet sur le track en question.

Source : https://skillcorner.crunch.help/en/glossaries/coordinates-distance-speeds-and-directions