# Importation des données

In [3]:
import sqlite3
import pandas as pd

In [4]:
connect = sqlite3.connect("../raw-database.db")
cursor = connect.cursor()

In [5]:
req = cursor.execute(f"SELECT * FROM freeze_frames")
res = req.fetchall()
desc = req.description
freeze_frames = pd.DataFrame(res)
freeze_frames.columns = [i[0] for i in desc]

---
# Etudes des colonnes comportant des valeurs Nan

In [6]:
freeze_frames.isna().any()

frame              False
timestamp           True
period              True
event_id           False
event_x            False
event_y            False
is_matched         False
match_id_SKC       False
group               True
tackable_object     True
dtype: bool

---
# Etudes des valeurs Nan pour la colonne "timestamp"

In [None]:
# Index des frames correspondants a des valeurs Nan
index = freeze_frames.index[freeze_frames.timestamp.isna()]

In [None]:
# Index des frames correspondant au valeur Nan, ainsi que les frames précédentes et suivantes de ces dernières.
freeze_frames.loc[index.tolist() + (index - 1).tolist() + (index + 1).tolist()].sort_index()

Unnamed: 0,frame,timestamp,period,event_id,event_x,event_y,is_matched,match_id_SKC,group,tackable_object
12999,27215,2024-11-26 00:44:53.500000,1.0,cc95d2bf-4f94-47dc-aa8c-82cd509905f9,-39.8125,20.06,1,1404545,away team,
13000,27888,,,37b9dbf6-1cbb-42b8-b5c3-e1b3934b596e,unknown,unknown,0,1404545,away team,25539.0
13001,37834,2024-11-26 00:47:00.400000,2.0,2d818cdd-e513-405c-bbeb-dd57f8081774,-46.725,-21.505,1,1404545,home team,9451.0
16739,33105,2024-11-26 00:44:52.500000,1.0,c1ad7f3b-7ec6-45c3-9745-ae79cacee74b,34.0375,20.06,1,1483175,away team,
16740,43538,,,f5f29ce5-9e11-465c-85ac-4c69a1b30379,unknown,unknown,0,1483175,,28768.0
16741,45712,2024-11-26 00:48:37.200000,2.0,985ec082-3c00-4ab7-bb39-4e0f5979d445,-20.125,28.645,1,1483175,away team,
17761,25577,2024-11-26 00:32:19.700000,1.0,b5d8596c-ae2a-4061-929b-6e9df0f95ff4,-49.6125,-10.03,1,1499186,home team,
17762,33842,,,5dc500e7-876c-41bd-b353-139debc91e50,unknown,unknown,0,1499186,,
17763,44797,2024-11-26 00:47:01.700000,2.0,914419e5-7a05-4e95-8a8c-7d72eb1efbd1,-42.2625,-26.775,0,1499186,away team,
20175,73572,2024-11-26 01:32:06.200000,2.0,99d2acb7-3a12-4c50-b129-9ffeca155293,-21.875,29.07,1,1547882,home team,


In [None]:
# Importation des events SB correspondants aux frames Nan
params = freeze_frames.loc[index.tolist() + (index - 1).tolist() + (index + 1).tolist()].sort_index().event_id.tolist()
stat = f"SELECT * FROM events WHERE event_id IN ({', '.join('?' * len(params))})"
req = cursor.execute(stat, params)
res = req.fetchall()
desc = req.description
events = pd.DataFrame(res)
events.columns = [i[0] for i in desc]

In [58]:
events.drop(["index_event", "shot_outcome", "y_loc", "z_loc", "y_pass", "y_shot", "z_shot"], axis = 1).sort_values(by = ["match_id_SB", "minute"])

Unnamed: 0,event_id,shot_type,type,match_id_SB,period,possession,pass_cross,pass_type,minute,pass_body_part,player_id,x_loc,x_pass,x_shot
1,cc95d2bf-4f94-47dc-aa8c-82cd509905f9,,Pressure,3894262,1,68,,,44,,5507.0,14.5,,
2,37b9dbf6-1cbb-42b8-b5c3-e1b3934b596e,,Half End,3894262,1,71,,,45,,,,,
0,2d818cdd-e513-405c-bbeb-dd57f8081774,,Pass,3894262,2,75,1.0,,46,Left Foot,7212.0,113.4,104.2,
4,c1ad7f3b-7ec6-45c3-9745-ae79cacee74b,,Pass,3894316,1,74,1.0,,44,Right Foot,4909.0,98.9,120.0,
3,f5f29ce5-9e11-465c-85ac-4c69a1b30379,,Half Start,3894316,2,77,,,45,,,,,
5,985ec082-3c00-4ab7-bb39-4e0f5979d445,,Carry,3894316,2,86,,,48,,46009.0,83.0,,
7,b5d8596c-ae2a-4061-929b-6e9df0f95ff4,,Clearance,3894335,1,66,,,32,,22137.0,3.3,,
8,5dc500e7-876c-41bd-b353-139debc91e50,,Half End,3894335,1,92,,,46,,,,,
6,914419e5-7a05-4e95-8a8c-7d72eb1efbd1,,Pass,3894335,2,97,1.0,,47,Right Foot,23682.0,108.3,106.7,
9,99d2acb7-3a12-4c50-b129-9ffeca155293,,Pass,3894368,2,184,1.0,,92,Right Foot,3363.0,85.0,105.8,


Les valeurs Nan de cette colonne semblent correspondre a des évènements qui référencient une fin de période

---
# Etudes des valeurs Nan pour la colonne "period"

In [None]:
# Index des frames correspondants a des valeurs Nan
index = freeze_frames.index[freeze_frames.period.isna()]

In [None]:
# Index des frames correspondant au valeur Nan, ainsi que les frames précédentes et suivantes de ces dernières.
freeze_frames.loc[index.tolist() + (index - 1).tolist() + (index + 1).tolist()].sort_index()

Unnamed: 0,frame,timestamp,period,event_id,event_x,event_y,is_matched,match_id_SKC,group,tackable_object
12999,27215,2024-11-26 00:44:53.500000,1.0,cc95d2bf-4f94-47dc-aa8c-82cd509905f9,-39.8125,20.06,1,1404545,away team,
13000,27888,,,37b9dbf6-1cbb-42b8-b5c3-e1b3934b596e,unknown,unknown,0,1404545,away team,25539.0
13001,37834,2024-11-26 00:47:00.400000,2.0,2d818cdd-e513-405c-bbeb-dd57f8081774,-46.725,-21.505,1,1404545,home team,9451.0
16739,33105,2024-11-26 00:44:52.500000,1.0,c1ad7f3b-7ec6-45c3-9745-ae79cacee74b,34.0375,20.06,1,1483175,away team,
16740,43538,,,f5f29ce5-9e11-465c-85ac-4c69a1b30379,unknown,unknown,0,1483175,,28768.0
16741,45712,2024-11-26 00:48:37.200000,2.0,985ec082-3c00-4ab7-bb39-4e0f5979d445,-20.125,28.645,1,1483175,away team,
17761,25577,2024-11-26 00:32:19.700000,1.0,b5d8596c-ae2a-4061-929b-6e9df0f95ff4,-49.6125,-10.03,1,1499186,home team,
17762,33842,,,5dc500e7-876c-41bd-b353-139debc91e50,unknown,unknown,0,1499186,,
17763,44797,2024-11-26 00:47:01.700000,2.0,914419e5-7a05-4e95-8a8c-7d72eb1efbd1,-42.2625,-26.775,0,1499186,away team,
20175,73572,2024-11-26 01:32:06.200000,2.0,99d2acb7-3a12-4c50-b129-9ffeca155293,-21.875,29.07,1,1547882,home team,


In [61]:
# Importation des events SB correspondants aux frames Nan
params = freeze_frames.loc[index.tolist() + (index - 1).tolist() + (index + 1).tolist()].sort_index().event_id.tolist()
stat = f"SELECT * FROM events WHERE event_id IN ({', '.join('?' * len(params))})"
req = cursor.execute(stat, params)
res = req.fetchall()
desc = req.description
events = pd.DataFrame(res)
events.columns = [i[0] for i in desc]

In [62]:
events.drop(["index_event", "shot_outcome", "y_loc", "z_loc", "y_pass", "y_shot", "z_shot"], axis = 1).sort_values(by = ["match_id_SB", "minute"])

Unnamed: 0,event_id,shot_type,type,match_id_SB,period,possession,pass_cross,pass_type,minute,pass_body_part,player_id,x_loc,x_pass,x_shot
1,cc95d2bf-4f94-47dc-aa8c-82cd509905f9,,Pressure,3894262,1,68,,,44,,5507.0,14.5,,
2,37b9dbf6-1cbb-42b8-b5c3-e1b3934b596e,,Half End,3894262,1,71,,,45,,,,,
0,2d818cdd-e513-405c-bbeb-dd57f8081774,,Pass,3894262,2,75,1.0,,46,Left Foot,7212.0,113.4,104.2,
4,c1ad7f3b-7ec6-45c3-9745-ae79cacee74b,,Pass,3894316,1,74,1.0,,44,Right Foot,4909.0,98.9,120.0,
3,f5f29ce5-9e11-465c-85ac-4c69a1b30379,,Half Start,3894316,2,77,,,45,,,,,
5,985ec082-3c00-4ab7-bb39-4e0f5979d445,,Carry,3894316,2,86,,,48,,46009.0,83.0,,
7,b5d8596c-ae2a-4061-929b-6e9df0f95ff4,,Clearance,3894335,1,66,,,32,,22137.0,3.3,,
8,5dc500e7-876c-41bd-b353-139debc91e50,,Half End,3894335,1,92,,,46,,,,,
6,914419e5-7a05-4e95-8a8c-7d72eb1efbd1,,Pass,3894335,2,97,1.0,,47,Right Foot,23682.0,108.3,106.7,
9,99d2acb7-3a12-4c50-b129-9ffeca155293,,Pass,3894368,2,184,1.0,,92,Right Foot,3363.0,85.0,105.8,


De même que pour la colonne "timestamp", les frames Nan correspondent à des débuts/fins de périodes

---
# Etudes des valeurs Nan pour la colonne "group"

In [None]:
# Index des frames correspondants a des valeurs Nan
index = freeze_frames.index[freeze_frames.period.isna()]

In [None]:
# Index des frames correspondant au valeur Nan, ainsi que les frames précédentes et suivantes de ces dernières.
freeze_frames.loc[index.tolist() + (index - 1).tolist() + (index + 1).tolist()].sort_index()

Unnamed: 0,frame,timestamp,period,event_id,event_x,event_y,is_matched,match_id_SKC,group,tackable_object
12999,27215,2024-11-26 00:44:53.500000,1.0,cc95d2bf-4f94-47dc-aa8c-82cd509905f9,-39.8125,20.06,1,1404545,away team,
13000,27888,,,37b9dbf6-1cbb-42b8-b5c3-e1b3934b596e,unknown,unknown,0,1404545,away team,25539.0
13001,37834,2024-11-26 00:47:00.400000,2.0,2d818cdd-e513-405c-bbeb-dd57f8081774,-46.725,-21.505,1,1404545,home team,9451.0
16739,33105,2024-11-26 00:44:52.500000,1.0,c1ad7f3b-7ec6-45c3-9745-ae79cacee74b,34.0375,20.06,1,1483175,away team,
16740,43538,,,f5f29ce5-9e11-465c-85ac-4c69a1b30379,unknown,unknown,0,1483175,,28768.0
16741,45712,2024-11-26 00:48:37.200000,2.0,985ec082-3c00-4ab7-bb39-4e0f5979d445,-20.125,28.645,1,1483175,away team,
17761,25577,2024-11-26 00:32:19.700000,1.0,b5d8596c-ae2a-4061-929b-6e9df0f95ff4,-49.6125,-10.03,1,1499186,home team,
17762,33842,,,5dc500e7-876c-41bd-b353-139debc91e50,unknown,unknown,0,1499186,,
17763,44797,2024-11-26 00:47:01.700000,2.0,914419e5-7a05-4e95-8a8c-7d72eb1efbd1,-42.2625,-26.775,0,1499186,away team,
20175,73572,2024-11-26 01:32:06.200000,2.0,99d2acb7-3a12-4c50-b129-9ffeca155293,-21.875,29.07,1,1547882,home team,


In [None]:
# Importation des events SB correspondants aux frames Nan
params = freeze_frames.loc[index.tolist() + (index - 1).tolist() + (index + 1).tolist()].sort_index().event_id.tolist()
stat = f"SELECT * FROM events WHERE event_id IN ({', '.join('?' * len(params))})"
req = cursor.execute(stat, params)
res = req.fetchall()
desc = req.description
events = pd.DataFrame(res)
events.columns = [i[0] for i in desc]

In [None]:
events.drop(["index_event", "shot_outcome", "y_loc", "z_loc", "y_pass", "y_shot", "z_shot"], axis = 1).sort_values(by = ["match_id_SB", "minute"])

Unnamed: 0,event_id,shot_type,type,match_id_SB,period,possession,pass_cross,pass_type,minute,pass_body_part,player_id,x_loc,x_pass,x_shot
1,cc95d2bf-4f94-47dc-aa8c-82cd509905f9,,Pressure,3894262,1,68,,,44,,5507.0,14.5,,
2,37b9dbf6-1cbb-42b8-b5c3-e1b3934b596e,,Half End,3894262,1,71,,,45,,,,,
0,2d818cdd-e513-405c-bbeb-dd57f8081774,,Pass,3894262,2,75,1.0,,46,Left Foot,7212.0,113.4,104.2,
4,c1ad7f3b-7ec6-45c3-9745-ae79cacee74b,,Pass,3894316,1,74,1.0,,44,Right Foot,4909.0,98.9,120.0,
3,f5f29ce5-9e11-465c-85ac-4c69a1b30379,,Half Start,3894316,2,77,,,45,,,,,
5,985ec082-3c00-4ab7-bb39-4e0f5979d445,,Carry,3894316,2,86,,,48,,46009.0,83.0,,
7,b5d8596c-ae2a-4061-929b-6e9df0f95ff4,,Clearance,3894335,1,66,,,32,,22137.0,3.3,,
8,5dc500e7-876c-41bd-b353-139debc91e50,,Half End,3894335,1,92,,,46,,,,,
6,914419e5-7a05-4e95-8a8c-7d72eb1efbd1,,Pass,3894335,2,97,1.0,,47,Right Foot,23682.0,108.3,106.7,
9,99d2acb7-3a12-4c50-b129-9ffeca155293,,Pass,3894368,2,184,1.0,,92,Right Foot,3363.0,85.0,105.8,


De même que pour la colonne "timestamp", les frames Nan correspondent à des débuts/fins de périodes