# Importation des données

In [1]:
import sqlite3
import pandas as pd

In [2]:
connect = sqlite3.connect("../Databases/raw-database.db")
cursor = connect.cursor()

In [3]:
req = cursor.execute(f"SELECT * FROM freeze_frames")
res = req.fetchall()
desc = req.description
freeze_frames = pd.DataFrame(res)
freeze_frames.columns = [i[0] for i in desc]

---
# Etudes des colonnes comportant des valeurs Nan

In [4]:
freeze_frames.isna().any()

frame              False
timestamp           True
period              True
event_id           False
event_x            False
event_y            False
is_matched         False
match_id_SKC       False
group               True
tackable_object     True
dtype: bool

---
# Etudes des valeurs Nan pour la colonne "timestamp"

In [5]:
# Index des frames correspondants a des valeurs Nan
index = freeze_frames.index[freeze_frames.timestamp.isna()]

In [6]:
# Index des frames correspondant au valeur Nan, ainsi que les frames précédentes et suivantes de ces dernières.
freeze_frames.loc[index.tolist() + (index - 1).tolist() + (index + 1).tolist()].sort_index()

Unnamed: 0,frame,timestamp,period,event_id,event_x,event_y,is_matched,match_id_SKC,group,tackable_object
13055,64553,2025-01-12 01:31:32.300000,2.0,53d756de-e99d-4ae1-9874-0ec491b7ed85,50.925,-14.28,1,1404545,,
13056,27888,,,37b9dbf6-1cbb-42b8-b5c3-e1b3934b596e,unknown,unknown,0,1404545,away team,25539.0
13057,14115,2025-01-12 00:22:53.500000,1.0,4b0f3b1d-24ab-4814-b90e-138745171671,44.7125,-17.68,1,1404546,away team,
16781,73124,2025-01-12 01:34:18.400000,2.0,e9f6514b-b26a-481e-88e7-f4c400e0f73d,41.3,3.145,0,1483175,home team,
16782,43538,,,f5f29ce5-9e11-465c-85ac-4c69a1b30379,unknown,unknown,0,1483175,,28768.0
16783,9451,2025-01-12 00:05:28.100000,1.0,3571e2a8-90e2-4f28-b483-9c4401e295d0,-52.0625,-24.225,1,1483176,away team,
17781,71749,2025-01-12 01:31:56.900000,2.0,33825d0d-539a-4162-ad18-e12ea00c39f9,-41.475,5.61,0,1499186,away team,
17782,33842,,,5dc500e7-876c-41bd-b353-139debc91e50,unknown,unknown,0,1499186,,
17783,6444,2025-01-12 00:00:31.400000,1.0,1774317d-b1b8-4fda-8dd6-ee9954c380da,18.725,-30.43,1,1499189,away team,
20175,73572,2025-01-12 01:32:06.200000,2.0,99d2acb7-3a12-4c50-b129-9ffeca155293,-21.875,29.07,1,1547882,home team,


In [7]:
# Importation des events SB correspondants aux frames Nan
params = freeze_frames.loc[index.tolist() + (index - 1).tolist() + (index + 1).tolist()].sort_index().event_id.tolist()
stat = f"SELECT * FROM events WHERE event_id IN ({', '.join('?' * len(params))})"
req = cursor.execute(stat, params)
res = req.fetchall()
desc = req.description
events = pd.DataFrame(res)
events.columns = [i[0] for i in desc]

In [8]:
events.drop(["index_event", "shot_outcome", "y_loc", "z_loc", "y_pass", "y_shot", "z_shot"], axis = 1).sort_values(by = ["match_id_SB", "minute"])

Unnamed: 0,event_id,shot_type,type,match_id_SB,period,possession,pass_cross,pass_type,minute,pass_body_part,player_id_SB,team_id_SB,x_loc,x_pass,x_shot
0,37b9dbf6-1cbb-42b8-b5c3-e1b3934b596e,,Half End,3894262,1,71,,,45,,,136,,,
1,53d756de-e99d-4ae1-9874-0ec491b7ed85,,Ball Receipt*,3894262,2,156,,,90,,97751.0,136,118.2,,
2,4b0f3b1d-24ab-4814-b90e-138745171671,,Pressure,3894263,1,33,,,22,,32607.0,156,8.9,,
3,f5f29ce5-9e11-465c-85ac-4c69a1b30379,,Half Start,3894316,2,77,,,45,,,156,,,
4,e9f6514b-b26a-481e-88e7-f4c400e0f73d,,Ball Receipt*,3894316,2,169,,,94,,212489.0,156,107.2,,
5,3571e2a8-90e2-4f28-b483-9c4401e295d0,,Carry,3894318,1,14,,,5,,5480.0,137,119.5,,
6,5dc500e7-876c-41bd-b353-139debc91e50,,Half End,3894335,1,92,,,46,,,168,,,
7,33825d0d-539a-4162-ad18-e12ea00c39f9,,Clearance,3894335,2,180,,,91,,11820.0,168,12.6,,
8,1774317d-b1b8-4fda-8dd6-ee9954c380da,,Carry,3894336,1,3,,,0,,3235.0,134,81.4,,
9,99d2acb7-3a12-4c50-b129-9ffeca155293,,Pass,3894368,2,184,1.0,,92,Right Foot,3363.0,144,85.0,105.8,


Les valeurs Nan de cette colonne semblent correspondre a des évènements qui référencient une fin de période

---
# Etudes des valeurs Nan pour la colonne "period"

In [9]:
# Index des frames correspondants a des valeurs Nan
index = freeze_frames.index[freeze_frames.period.isna()]

In [10]:
# Index des frames correspondant au valeur Nan, ainsi que les frames précédentes et suivantes de ces dernières.
freeze_frames.loc[index.tolist() + (index - 1).tolist() + (index + 1).tolist()].sort_index()

Unnamed: 0,frame,timestamp,period,event_id,event_x,event_y,is_matched,match_id_SKC,group,tackable_object
13055,64553,2025-01-12 01:31:32.300000,2.0,53d756de-e99d-4ae1-9874-0ec491b7ed85,50.925,-14.28,1,1404545,,
13056,27888,,,37b9dbf6-1cbb-42b8-b5c3-e1b3934b596e,unknown,unknown,0,1404545,away team,25539.0
13057,14115,2025-01-12 00:22:53.500000,1.0,4b0f3b1d-24ab-4814-b90e-138745171671,44.7125,-17.68,1,1404546,away team,
16781,73124,2025-01-12 01:34:18.400000,2.0,e9f6514b-b26a-481e-88e7-f4c400e0f73d,41.3,3.145,0,1483175,home team,
16782,43538,,,f5f29ce5-9e11-465c-85ac-4c69a1b30379,unknown,unknown,0,1483175,,28768.0
16783,9451,2025-01-12 00:05:28.100000,1.0,3571e2a8-90e2-4f28-b483-9c4401e295d0,-52.0625,-24.225,1,1483176,away team,
17781,71749,2025-01-12 01:31:56.900000,2.0,33825d0d-539a-4162-ad18-e12ea00c39f9,-41.475,5.61,0,1499186,away team,
17782,33842,,,5dc500e7-876c-41bd-b353-139debc91e50,unknown,unknown,0,1499186,,
17783,6444,2025-01-12 00:00:31.400000,1.0,1774317d-b1b8-4fda-8dd6-ee9954c380da,18.725,-30.43,1,1499189,away team,
20175,73572,2025-01-12 01:32:06.200000,2.0,99d2acb7-3a12-4c50-b129-9ffeca155293,-21.875,29.07,1,1547882,home team,


In [11]:
# Importation des events SB correspondants aux frames Nan
params = freeze_frames.loc[index.tolist() + (index - 1).tolist() + (index + 1).tolist()].sort_index().event_id.tolist()
stat = f"SELECT * FROM events WHERE event_id IN ({', '.join('?' * len(params))})"
req = cursor.execute(stat, params)
res = req.fetchall()
desc = req.description
events = pd.DataFrame(res)
events.columns = [i[0] for i in desc]

In [12]:
events.drop(["index_event", "shot_outcome", "y_loc", "z_loc", "y_pass", "y_shot", "z_shot"], axis = 1).sort_values(by = ["match_id_SB", "minute"])

Unnamed: 0,event_id,shot_type,type,match_id_SB,period,possession,pass_cross,pass_type,minute,pass_body_part,player_id_SB,team_id_SB,x_loc,x_pass,x_shot
0,37b9dbf6-1cbb-42b8-b5c3-e1b3934b596e,,Half End,3894262,1,71,,,45,,,136,,,
1,53d756de-e99d-4ae1-9874-0ec491b7ed85,,Ball Receipt*,3894262,2,156,,,90,,97751.0,136,118.2,,
2,4b0f3b1d-24ab-4814-b90e-138745171671,,Pressure,3894263,1,33,,,22,,32607.0,156,8.9,,
3,f5f29ce5-9e11-465c-85ac-4c69a1b30379,,Half Start,3894316,2,77,,,45,,,156,,,
4,e9f6514b-b26a-481e-88e7-f4c400e0f73d,,Ball Receipt*,3894316,2,169,,,94,,212489.0,156,107.2,,
5,3571e2a8-90e2-4f28-b483-9c4401e295d0,,Carry,3894318,1,14,,,5,,5480.0,137,119.5,,
6,5dc500e7-876c-41bd-b353-139debc91e50,,Half End,3894335,1,92,,,46,,,168,,,
7,33825d0d-539a-4162-ad18-e12ea00c39f9,,Clearance,3894335,2,180,,,91,,11820.0,168,12.6,,
8,1774317d-b1b8-4fda-8dd6-ee9954c380da,,Carry,3894336,1,3,,,0,,3235.0,134,81.4,,
9,99d2acb7-3a12-4c50-b129-9ffeca155293,,Pass,3894368,2,184,1.0,,92,Right Foot,3363.0,144,85.0,105.8,


De même que pour la colonne "timestamp", les frames Nan correspondent à des débuts/fins de périodes

---
# Etudes des valeurs Nan pour la colonne "group"

In [13]:
# Index des frames correspondants a des valeurs Nan
index = freeze_frames.index[freeze_frames.group.isna()]

In [14]:
freeze_frames.loc[index].event_id.head(10)

3      c1a7a598-9690-4a32-a733-b7b3b585df83
4      d3b91062-3398-4235-8bbc-3571d2fd2d9c
27     b94a4ca1-d637-4cfc-a6ad-4fb6f71c95c6
53     f9bde630-3159-4e49-9dc3-93873bdb37ee
96     4e6b6d44-2c5c-4585-bd86-1aef4aeacbbb
111    bae5c26c-8572-46cf-afca-b10d3693bbd5
131    c87aaee5-70a3-4cb3-8fda-b462a52109cf
225    a80fb33a-1e68-4654-a241-8771822d23b3
226    d8bf82d2-6674-4bf7-b5c8-9915d40121bf
227    d5a94c29-f3c9-4aad-a4a5-3d3720cb56d9
Name: event_id, dtype: object

In [15]:
# Index des frames correspondant au valeur Nan, ainsi que les frames précédentes et suivantes de ces dernières.
multi_frames = freeze_frames.loc[index.tolist() + (index - 1).tolist() + (index + 1).tolist()].sort_index().drop_duplicates()

In [16]:
multi_frames

Unnamed: 0,frame,timestamp,period,event_id,event_x,event_y,is_matched,match_id_SKC,group,tackable_object
2,4854,2025-01-12 00:04:32.400000,1.0,3ea6ecd9-bc39-4ce5-9b47-1eda239f2705,44.1,1.615,1,1020089,away team,
3,5752,2025-01-12 00:06:02.200000,1.0,c1a7a598-9690-4a32-a733-b7b3b585df83,-43.4,-27.455,1,1020089,,
4,5755,2025-01-12 00:06:02.500000,1.0,d3b91062-3398-4235-8bbc-3571d2fd2d9c,-43.75,-29.24,0,1020089,,
5,5796,2025-01-12 00:06:06.600000,1.0,1ea8cb69-9095-4a7e-bc1a-e1ca8c4984b6,-48.825,28.475,1,1020089,home team,
26,27860,2025-01-12 00:42:53,1.0,c18d6b9e-4113-4157-a57d-aee79d0f2f51,46.55,10.71,1,1020089,home team,12327.0
...,...,...,...,...,...,...,...,...,...,...
20478,69055,2025-01-12 01:26:25.500000,2.0,1b48bd45-91c9-44ca-a4d2-5cc26510b359,26.95,-22.185,1,1547887,,
20479,69067,2025-01-12 01:26:26.700000,2.0,55cebbd1-a06e-4595-8d4d-db9e9f74d529,42.35,-4.165,1,1547887,home team,
20538,50122,2025-01-12 00:52:46.200000,2.0,c7284660-2243-4117-8e27-f6ae465e38f3,40.1625,-30.175,0,1547888,home team,
20539,50177,2025-01-12 00:52:51.700000,2.0,f5234205-3b40-4aa4-a39d-2aae0f7c99cd,48.2125,33.915,0,1547888,,


In [17]:
# Importation des events SB correspondants aux frames Nan
params = multi_frames.event_id.tolist()
stat = f"SELECT * FROM events WHERE event_id IN ({', '.join('?' * len(params))})"
req = cursor.execute(stat, params)
res = req.fetchall()
desc = req.description
events = pd.DataFrame(res)
events.columns = [i[0] for i in desc]

In [18]:
events.drop(["index_event", "shot_outcome", "y_loc", "z_loc", "y_pass", "y_shot", "z_shot"], axis = 1)[events.event_id.isin(freeze_frames.loc[index].event_id)]

Unnamed: 0,event_id,shot_type,type,match_id_SB,period,possession,pass_cross,pass_type,minute,pass_body_part,player_id_SB,team_id_SB,x_loc,x_pass,x_shot
1,4e6b6d44-2c5c-4585-bd86-1aef4aeacbbb,,Player Off,3894037,1,68,,,33,,316514.0,147,,,
4,bae5c26c-8572-46cf-afca-b10d3693bbd5,,Substitution,3894037,2,121,,,59,,212489.0,156,,,
7,c87aaee5-70a3-4cb3-8fda-b462a52109cf,,Ball Receipt*,3894037,2,170,,,88,,3712.0,147,104.0,,
9,e4ec3204-6ddb-4202-9743-6ce36a43bd2a,,Injury Stoppage,3894038,2,82,,,46,,4448.0,141,,,
12,0dfc0964-d330-45e3-af1a-74e053e59a4e,,Ball Receipt*,3894038,2,130,,,81,,28238.0,141,91.6,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1493,9d6ff703-8ce0-4aa9-a650-da819e1d142b,,Ball Receipt*,3894370,1,31,,,18,,3451.0,137,114.6,,
1496,700c61dd-2608-4497-8355-fc143d2c2043,,Pass,3894370,1,49,,Goal Kick,27,Right Foot,2947.0,137,6.0,12.4,
1499,873a5f6b-ff24-43db-a983-a88164800daf,,Pressure,3894371,1,36,,,17,,29649.0,156,105.3,,
1502,1b48bd45-91c9-44ca-a4d2-5cc26510b359,,Pass,3894373,2,174,1.0,Free Kick,86,Right Foot,4413.0,164,90.8,108.5,


Les valeurs Nan de cette colonne correspondent à des events ou le ballon est en dehors du terrain

---
# Etudes des valeurs Nan pour la colonne "tackable_object"

In [19]:
# Index des frames correspondants a des valeurs Nan
index = freeze_frames.index[freeze_frames.tackable_object.isna()]

In [20]:
freeze_frames.loc[index].event_id

0        54cbad0c-1f05-4708-9b7f-2cadeae0636b
2        3ea6ecd9-bc39-4ce5-9b47-1eda239f2705
3        c1a7a598-9690-4a32-a733-b7b3b585df83
4        d3b91062-3398-4235-8bbc-3571d2fd2d9c
5        1ea8cb69-9095-4a7e-bc1a-e1ca8c4984b6
                         ...                 
20571    4667ef16-2566-41b8-98d9-4606aabf5d05
20572    ece56aa9-a9c7-48b5-8dbb-aa043cdec502
20573    4b410e07-5965-4dc0-b4e0-4664b770413b
20574    bf588ed7-391f-48d6-ab20-cd38b3e571da
20575    5954f7ff-0805-41e3-af67-b277cb4343ce
Name: event_id, Length: 15473, dtype: object

In [21]:
# Index des frames correspondant au valeur Nan.
multi_frames = freeze_frames.loc[index.tolist()].sort_index()

Les valeurs Nan de cette colonne correspondent à des events ou aucune joueur n'a le ballon