In [1]:
# Importation des bibliothèques nécessaires
import polars as pl
import psycopg2
import os
from minio import Minio

In [None]:
# Variables d'environnement
MINIO_ENDPOINT = os.getenv("MINIO_ENDPOINT", "http://localhost:9000")
POSTGRES_HOST = os.getenv("POSTGRES_HOST", "localhost")
POSTGRES_USER = os.getenv("POSTGRES_USER", "user")
POSTGRES_PASSWORD = os.getenv("POSTGRES_PASSWORD", "password")
POSTGRES_DB = os.getenv("POSTGRES_DB", "accidents")


In [3]:
# Variables de configuration
MINIO_ENDPOINT = os.getenv("MINIO_ENDPOINT", "http://localhost:9000")
ACCESS_KEY = "minioadmin"
SECRET_KEY = "minioadmin"
BUCKET_NAME = "accidents"
FILE_NAME = "Motor.csv"
LOCAL_FILE_PATH = f"../data/{FILE_NAME}"
DOWNLOAD_PATH = f"./data/{FILE_NAME}_downloaded.csv"


In [4]:
# Connexion à Minio
minio_client = Minio(
    MINIO_ENDPOINT.replace("http://", "").replace("https://", ""),
    access_key="minioadmin",
    secret_key="minioadmin",
    secure=False
)

In [5]:
# Vérifier si le bucket existe, sinon le créer
if not minio_client.bucket_exists(BUCKET_NAME):
    minio_client.make_bucket(BUCKET_NAME)
    print(f"Bucket '{BUCKET_NAME}' créé avec succès.")
else:
    print(f"Bucket '{BUCKET_NAME}' déjà existant.")


MaxRetryError: HTTPConnectionPool(host='minio', port=9000): Max retries exceeded with url: /accidents?location= (Caused by NameResolutionError("<urllib3.connection.HTTPConnection object at 0x0000017735263A10>: Failed to resolve 'minio' ([Errno 11001] getaddrinfo failed)"))

In [None]:
# Upload du fichier dans MinIO
minio_client.fput_object(BUCKET_NAME, FILE_NAME, LOCAL_FILE_PATH)
print(f" Fichier {FILE_NAME} uploadé avec succès dans MinIO.")

📤 Fichier Motor_Vehicle_Crashes_-_Vehicle_Information__Three_Year_Window(1).csv uploadé avec succès dans MinIO.


In [None]:
# Télécharger le fichier depuis MinIO
minio_client.fget_object(BUCKET_NAME, FILE_NAME, DOWNLOAD_PATH)
print(f" Fichier {FILE_NAME} téléchargé depuis MinIO sous '{DOWNLOAD_PATH}'.")

📥 Fichier Motor_Vehicle_Crashes_-_Vehicle_Information__Three_Year_Window(1).csv téléchargé depuis MinIO sous './data/Motor_Vehicle_Crashes_-_Vehicle_Information__Three_Year_Window(1).csv_downloaded.csv'.


In [None]:
# Lister les fichiers dans le bucket
objects = minio_client.list_objects(BUCKET_NAME)
print(" Fichiers dans MinIO:")
for obj in objects:
    print(f"- {obj.object_name}")


📂 Fichiers dans MinIO:
- Motor_Vehicle_Crashes_-_Vehicle_Information__Three_Year_Window(1).csv


In [None]:
# Charger et afficher les données avec Polars
df = pl.read_csv(DOWNLOAD_PATH)
print(" Aperçu des données:")
print(df.head())

🔍 Aperçu des données:
shape: (5, 19)
┌──────┬─────────────────┬───────────────────┬───────────────────────┬───┬──────────────┬───────────────────────┬──────────────────────┬───────────────────┐
│ Year ┆ Case Vehicle ID ┆ Vehicle Body Type ┆ Registration Class    ┆ … ┆ Contributing ┆ Contributing Factor 2 ┆ Event Type           ┆ Partial VIN       │
│ ---  ┆ ---             ┆ ---               ┆ ---                   ┆   ┆ Factor 2     ┆ Descript…             ┆ ---                  ┆ ---               │
│ i64  ┆ i64             ┆ str               ┆ str                   ┆   ┆ ---          ┆ ---                   ┆ str                  ┆ str               │
│      ┆                 ┆                   ┆                       ┆   ┆ str          ┆ str                   ┆                      ┆                   │
╞══════╪═════════════════╪═══════════════════╪═══════════════════════╪═══╪══════════════╪═══════════════════════╪══════════════════════╪═══════════════════╡
│ 2021 ┆ 18127660    

In [10]:
df.shape

(3551191, 19)

In [11]:
df.schema

Schema([('Year', Int64),
        ('Case Vehicle ID', Int64),
        ('Vehicle Body Type', String),
        ('Registration Class', String),
        ('Action Prior to Accident', String),
        ('Type / Axles of Truck or Bus', String),
        ('Direction of Travel', String),
        ('Fuel Type', String),
        ('Vehicle Year', Int64),
        ('State of Registration', String),
        ('Number of Occupants', Int64),
        ('Engine Cylinders', Int64),
        ('Vehicle Make', String),
        ('Contributing Factor 1', String),
        ('Contributing Factor 1 Description', String),
        ('Contributing Factor 2', String),
        ('Contributing Factor 2 Description', String),
        ('Event Type', String),
        ('Partial VIN', String)])

In [30]:
df.null_count().sum()

Year,Case Vehicle ID,Vehicle Body Type,Registration Class,Action Prior to Accident,Type / Axles of Truck or Bus,Direction of Travel,Fuel Type,Vehicle Year,State of Registration,Number of Occupants,Engine Cylinders,Vehicle Make,Contributing Factor 1,Contributing Factor 1 Description,Contributing Factor 2,Contributing Factor 2 Description,Event Type,Partial VIN
u32,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32
0,0,0,0,0,0,0,0,647036,310389,363097,851040,643121,0,0,0,0,71325,720590


In [32]:
df.describe()

statistic,Year,Case Vehicle ID,Vehicle Body Type,Registration Class,Action Prior to Accident,Type / Axles of Truck or Bus,Direction of Travel,Fuel Type,Vehicle Year,State of Registration,Number of Occupants,Engine Cylinders,Vehicle Make,Contributing Factor 1,Contributing Factor 1 Description,Contributing Factor 2,Contributing Factor 2 Description,Event Type,Partial VIN
str,f64,f64,str,str,str,str,str,str,f64,str,f64,f64,str,str,str,str,str,str,str
"""count""",3551191.0,3551191.0,"""3551191""","""3551191""","""3551191""","""3551191""","""3551191""","""3551191""",2904155.0,"""3240802""",3188094.0,2700151.0,"""2908070""","""3551191""","""3551191""","""3551191""","""3551191""","""3479866""","""2830601"""
"""null_count""",0.0,0.0,"""0""","""0""","""0""","""0""","""0""","""0""",647036.0,"""310389""",363097.0,851040.0,"""643121""","""0""","""0""","""0""","""0""","""71325""","""720590"""
"""mean""",2020.960034,18634000.0,,,,,,,2013.284109,,1.231648,5.115332,,,,,,,
"""std""",1.450807,1291100.0,,,,,,,6.374365,,1.222841,1.464921,,,,,,,
"""min""",2019.0,15350252.0,"""2 DOOR SEDAN""","""AGRICULTURAL COMMERCIAL""","""Avoiding Object in Roadway""","""2 axle box trailer and 3 axle …","""East""","""Compressed Natural Gas""",20.0,"""AB""",0.0,0.0,"""A""","""ENVMT""","""Accelerator Defective""","""ENVMT""","""Accelerator Defective""","""Animal, Collision With""","""0"""
"""25%""",2020.0,17518829.0,,,,,,,2009.0,,1.0,4.0,,,,,,,
"""50%""",2021.0,18657414.0,,,,,,,2015.0,,1.0,4.0,,,,,,,
"""75%""",2022.0,19747770.0,,,,,,,2018.0,,1.0,6.0,,,,,,,
"""max""",2023.0,21380596.0,"""WELL SERVICING RIG""","""VAS VOLUNTEER AMBULANCE""","""Unknown""","""Unknown""","""West""","""Propane""",2024.0,"""ZS""",687.0,16.0,"""ZZ""","""VEHICLE""","""Windshield Inadequate""","""VEHICLE""","""Windshield Inadequate""","""Unknown""","""yv1ah99x81054962"""


In [33]:
df.drop_nulls()

Year,Case Vehicle ID,Vehicle Body Type,Registration Class,Action Prior to Accident,Type / Axles of Truck or Bus,Direction of Travel,Fuel Type,Vehicle Year,State of Registration,Number of Occupants,Engine Cylinders,Vehicle Make,Contributing Factor 1,Contributing Factor 1 Description,Contributing Factor 2,Contributing Factor 2 Description,Event Type,Partial VIN
i64,i64,str,str,str,str,str,str,i64,str,i64,i64,str,str,str,str,str,str,str
2021,18126564,"""SUBURBAN""","""PASSENGER OR SUBURBAN""","""Parked""","""Not Entered""","""West""","""Gas""",2020,"""NY""",0,4,"""MAZDA""","""HUMAN""","""Not Entered""","""HUMAN""","""Not Entered""","""Other Motor Vehicle, Collision…","""JM3KFBCM4L0771428"""
2021,18126574,"""SUBURBAN""","""PASSENGER OR SUBURBAN""","""Other""","""Not Entered""","""Northwest""","""Gas""",2020,"""NY""",1,4,"""HYUND""","""HUMAN""","""Not Entered""","""HUMAN""","""Not Entered""","""Not Entered""","""KM8J3CA40LU281000"""
2021,18126576,"""SUBURBAN""","""PASSENGER OR SUBURBAN""","""Parked""","""Not Entered""","""South""","""Gas""",2009,"""NY""",3,6,"""HONDA""","""HUMAN""","""Not Entered""","""HUMAN""","""Not Entered""","""Not Entered""","""5FNRL386X9B027780"""
2021,18126580,"""4 DOOR SEDAN""","""PASSENGER OR SUBURBAN""","""Going Straight Ahead""","""Not Entered""","""South""","""Gas""",2020,"""NY""",1,4,"""NISSA""","""HUMAN""","""Not Entered""","""HUMAN""","""Not Entered""","""Not Entered""","""1N4BL4CV5LC134543"""
2021,18126578,"""4 DOOR SEDAN""","""PASSENGER OR SUBURBAN""","""Going Straight Ahead""","""Not Entered""","""South""","""Gas""",2011,"""NY""",1,4,"""CHEVR""","""HUMAN""","""Not Entered""","""HUMAN""","""Not Entered""","""Not Entered""","""1G1RC6E49BU103290"""
…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…
2023,21363354,"""SUBURBAN""","""PASSENGER OR SUBURBAN""","""Going Straight Ahead""","""Not Entered""","""West""","""Gas""",2019,"""NY""",2,6,"""HONDA""","""HUMAN""","""Not Applicable""","""ENVMT""","""Animal's Action""","""Not Applicable""","""5FNRL6H57KB133134"""
2023,21366619,"""4 DOOR SEDAN""","""OMNIBUS - TAXI""","""Unknown""","""Not Entered""","""Unknown""","""Gas""",2022,"""NY""",2,4,"""TOYOT""","""HUMAN""","""Not Entered""","""HUMAN""","""Not Entered""","""Not Entered""","""4T1T11AK2NU700503"""
2023,21368709,"""SUBURBAN""","""PASSENGER OR SUBURBAN""","""Unknown""","""Not Entered""","""Unknown""","""Gas""",2021,"""NY""",1,4,"""ME/BE""","""HUMAN""","""Not Entered""","""HUMAN""","""Not Entered""","""Not Entered""","""4JGFB4KB3MA568328"""
2023,21371790,"""SUBURBAN""","""PASSENGER OR SUBURBAN""","""Unknown""","""Not Entered""","""Unknown""","""Gas""",2019,"""NY""",1,4,"""HONDA""","""HUMAN""","""Not Entered""","""HUMAN""","""Not Entered""","""Not Entered""","""JHLRW2H51KX006677"""


## verification de valeur manquante


In [13]:
missing_values = df.select([pl.col(c).is_null().sum().alias(c) for c in df.columns])
print(missing_values)

shape: (1, 19)
┌──────┬─────────────────┬───────────────────┬────────────────────┬───┬───────────────────────┬─────────────────────────────────┬────────────┬─────────────┐
│ Year ┆ Case Vehicle ID ┆ Vehicle Body Type ┆ Registration Class ┆ … ┆ Contributing Factor 2 ┆ Contributing Factor 2 Descript… ┆ Event Type ┆ Partial VIN │
│ ---  ┆ ---             ┆ ---               ┆ ---                ┆   ┆ ---                   ┆ ---                             ┆ ---        ┆ ---         │
│ u32  ┆ u32             ┆ u32               ┆ u32                ┆   ┆ u32                   ┆ u32                             ┆ u32        ┆ u32         │
╞══════╪═════════════════╪═══════════════════╪════════════════════╪═══╪═══════════════════════╪═════════════════════════════════╪════════════╪═════════════╡
│ 0    ┆ 0               ┆ 0                 ┆ 0                  ┆ … ┆ 0                     ┆ 0                               ┆ 71325      ┆ 720590      │
└──────┴─────────────────┴─────────────────

### les valeur dupliquées

In [16]:
df.unique()

Year,Case Vehicle ID,Vehicle Body Type,Registration Class,Action Prior to Accident,Type / Axles of Truck or Bus,Direction of Travel,Fuel Type,Vehicle Year,State of Registration,Number of Occupants,Engine Cylinders,Vehicle Make,Contributing Factor 1,Contributing Factor 1 Description,Contributing Factor 2,Contributing Factor 2 Description,Event Type,Partial VIN
i64,i64,str,str,str,str,str,str,i64,str,i64,i64,str,str,str,str,str,str,str
2019,16361279,"""SUBURBAN""","""OMNIBUS - TAXI""","""Making Left Turn""","""Not Entered""","""South""","""Gas""",2018,"""NY""",1,6,"""TOYOT""","""HUMAN""","""Unknown""","""HUMAN""","""Not Applicable""","""Not Entered""","""5TDJZRFH7JS526233"""
2020,17555250,"""4 DOOR SEDAN""","""PASSENGER OR SUBURBAN""","""Going Straight Ahead""","""Not Entered""","""North""","""Gas""",2012,"""NY""",2,4,"""NISSA""","""HUMAN""","""Not Applicable""","""HUMAN""","""Not Applicable""","""Not Applicable""","""3N1BC1CP6CK273328"""
2022,19934353,"""PICKUP TRUCK""","""PASSENGER OR SUBURBAN""","""Starting in Traffic""","""Not Entered""","""West""","""Gas""",2017,"""NY""",1,6,"""RAM""","""HUMAN""","""Not Applicable""","""HUMAN""","""Following Too Closely""","""Not Applicable""","""1C6RR7FG3HS793442"""
2023,20036110,"""SUBURBAN""","""PASSENGER OR SUBURBAN""","""Parked""","""Not Entered""","""South""","""Gas""",2020,"""NY""",0,6,"""HONDA""","""HUMAN""","""Not Applicable""","""HUMAN""","""Not Applicable""","""Not Entered""","""5FNYF6H79LB043313"""
2019,17238183,"""PICKUP TRUCK""","""COMMERCIAL""","""Going Straight Ahead""","""Not Entered""","""East""","""Diesel""",2015,"""NY""",1,8,"""GMC""","""ENVMT""","""Animal's Action""","""HUMAN""","""Not Applicable""","""Not Applicable""","""1GT424E86FF605714"""
…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…
2020,17922685,"""BICYCLIST""","""Not Entered""","""Going Straight Ahead""","""Not Entered""","""Northwest""","""Not Entered""",,,1,,,"""HUMAN""","""Not Applicable""","""HUMAN""","""Unknown""","""Not Entered""",
2020,17595602,"""UNKNOWN VEHICLE""","""Not Entered""","""Going Straight Ahead""","""Not Entered""","""South""","""Not Entered""",,,,,,"""HUMAN""","""Not Applicable""","""HUMAN""","""Passing or Lane Usage Improper""","""Not Applicable""",
2022,19516227,"""4 DOOR SEDAN""","""PASSENGER OR SUBURBAN""","""Overtaking/Passing""","""Not Entered""","""West""","""Gas""",2017,"""NY""",1,4,"""FORD""","""HUMAN""","""Not Applicable""","""HUMAN""","""Passing or Lane Usage Improper""","""Not Applicable""","""3FA6P0T95HR268432"""
2019,16664839,"""4 DOOR SEDAN""","""PASSENGER OR SUBURBAN""","""Entering Parked Position""","""Not Entered""","""North""","""Gas""",2006,"""NY""",2,4,"""TOYOT""","""HUMAN""","""Driver Inattention/Distraction…","""HUMAN""","""Not Applicable""","""Building/Wall, Collision With …","""4T1BE30K36U692037"""


In [49]:
### suppression de colonne qui ne contient rien
df = df.drop("Type / Axles of Truck or Bus")

## selection de type de colonnes

In [17]:

numeric_cols = df.select([col for col in df.columns if df[col].dtype in [pl.Int32, pl.Int64, pl.Float32, pl.Float64]])


print(numeric_cols)

In [25]:
categorical_cols = df.select([col for col in df.columns if df[col].dtype == pl.Utf8])

In [24]:
print(categorical_cols)

shape: (3_551_191, 14)
┌───────────────────┬──────────────┬─────────────────┬─────────────────┬───┬──────────────┬──────────────────────────┬─────────────┬───────────────────┐
│ Vehicle Body Type ┆ Registration ┆ Action Prior to ┆ Type / Axles of ┆ … ┆ Contributing ┆ Contributing Factor 2    ┆ Event Type  ┆ Partial VIN       │
│ ---               ┆ Class        ┆ Accident        ┆ Truck or Bus    ┆   ┆ Factor 2     ┆ Descript…                ┆ ---         ┆ ---               │
│ str               ┆ ---          ┆ ---             ┆ ---             ┆   ┆ ---          ┆ ---                      ┆ str         ┆ str               │
│                   ┆ str          ┆ str             ┆ str             ┆   ┆ str          ┆ str                      ┆             ┆                   │
╞═══════════════════╪══════════════╪═════════════════╪═════════════════╪═══╪══════════════╪══════════════════════════╪═════════════╪═══════════════════╡
│ UNKNOWN VEHICLE   ┆ Not Entered  ┆ Going Straight  ┆ Not 

## comptages de valeur uniques de type de vehicule

In [40]:
unique_vehicle_make_count = df["Vehicle Make"].n_unique()
print(f"Nombre de valeurs uniques dans 'Vehicle Make' : {unique_vehicle_make_count}")

Nombre de valeurs uniques dans 'Vehicle Make' : 3317


# analyse temporelle 

In [41]:
accidents_per_year = df.group_by("Year").agg(pl.count().alias("accident_count"))

print(accidents_per_year)

shape: (5, 2)
┌──────┬────────────────┐
│ Year ┆ accident_count │
│ ---  ┆ ---            │
│ i64  ┆ u32            │
╞══════╪════════════════╡
│ 2020 ┆ 598987         │
│ 2022 ┆ 720559         │
│ 2019 ┆ 835976         │
│ 2021 ┆ 691442         │
│ 2023 ┆ 704227         │
└──────┴────────────────┘


  accidents_per_year = df.group_by("Year").agg(pl.count().alias("accident_count"))


## analyse de distribution de donnée catégorielles

In [50]:
for column in categorical_cols:
    distribution = df.group_by(column).agg(pl.count().alias(f"{column}_count"))
    print(f"Distribution de la colonne '{column}':")
    print(distribution)
    print("\n")

  distribution = df.group_by(column).agg(pl.count().alias(f"{column}_count"))


Distribution de la colonne 'shape: (3_551_191,)
Series: 'Vehicle Body Type' [str]
[
	"UNKNOWN VEHICLE"
	"SUBURBAN"
	"SUBURBAN"
	"SEDAN"
	"SUBURBAN"
	…
	"SUBURBAN"
	"SUBURBAN"
	"UNKNOWN VEHICLE"
	"TRACTOR"
	"4 DOOR SEDAN"
]':
shape: (64, 2)
┌───────────────────────────┬─────────────────────┐
│ Vehicle Body Type         ┆ shape: (3_551_191,) │
│ ---                       ┆ Series: 'V…         │
│ str                       ┆ ---                 │
│                           ┆ u32                 │
╞═══════════════════════════╪═════════════════════╡
│ UTILITY                   ┆ 9351                │
│ BUS (OMNIBUS)             ┆ 27794               │
│ ALL TERRAIN VEHICLE       ┆ 483                 │
│ SNOW PLOW                 ┆ 192                 │
│ LOW SPEED VEHICLE - TRUCK ┆ 2                   │
│ …                         ┆ …                   │
│ LOCOMOTIVE                ┆ 31                  │
│ TRUCK W/ SMALL WHEELS     ┆ 1104                │
│ 2 DOOR SEDAN              ┆ 77

## type d'evennement apres l'accident

In [52]:
event_type_distribution = df.select("Event Type").group_by("Event Type").agg(pl.count().alias("count"))

print(event_type_distribution)

  event_type_distribution = df.select("Event Type").group_by("Event Type").agg(pl.count().alias("count"))


shape: (36, 2)
┌─────────────────────────────────┬───────┐
│ Event Type                      ┆ count │
│ ---                             ┆ ---   │
│ str                             ┆ u32   │
╞═════════════════════════════════╪═══════╡
│ Submersion, Non-Collision       ┆ 250   │
│ Fence, Collision With Fixed Ob… ┆ 4792  │
│ Other Pedestrian                ┆ 465   │
│ Bicyclist, Collision With       ┆ 575   │
│ Overturned, Non-Collision       ┆ 6687  │
│ …                               ┆ …     │
│ Ran Off Roadway Only, Non-Coll… ┆ 72    │
│ null                            ┆ 71325 │
│ Earth Embankment/Rock Cut/Ditc… ┆ 12647 │
│ Animal, Collision With          ┆ 629   │
│ Light Support/Utility Pole, Co… ┆ 9724  │
└─────────────────────────────────┴───────┘


## relation entre 'Event Type' et 'Vehicle Body Type'


In [59]:
relationship = df.group_by("Event Type").agg(pl.col("Vehicle Body Type").unique().alias("vehicle_body_types"))

print(relationship)

shape: (36, 2)
┌─────────────────────────────────┬─────────────────────────────────┐
│ Event Type                      ┆ vehicle_body_types              │
│ ---                             ┆ ---                             │
│ str                             ┆ list[str]                       │
╞═════════════════════════════════╪═════════════════════════════════╡
│ Culver/Head Wall, Collision Wi… ┆ ["2 DOOR SEDAN", "POLICE VEHIC… │
│ Other*, Non-Collision           ┆ ["TRACTOR", "TOW TRUCK", … "2 … │
│ Sign Post, Collision With Fixe… ┆ ["TRACTOR", "4 DOOR SEDAN", … … │
│ Not Entered                     ┆ ["CEMENT MIXER", "2 DOOR SEDAN… │
│ Bicyclist, Collision With       ┆ ["CONVERTIBLE", "UNKNOWN TRUCK… │
│ …                               ┆ …                               │
│ Tree, Collision With Fixed Obj… ┆ ["DUMP", "SEDAN", … "UNKNOWN T… │
│ Overturned, Non-Collision       ┆ ["MOPED", "VAN TRUCK", … "TRAC… │
│ Animal, Collision With          ┆ ["4 DOOR SEDAN", "UNKNOWN TRUC… │
│ Not

##  Calculer le nombre d'accidents pour chaque type de véhicule.

In [60]:
df_filtered = df.select(["Vehicle Body Type", "Case Vehicle ID"])

In [62]:
accidents_by_vehicle_type = df_filtered.group_by("Vehicle Body Type").agg(pl.count("Case Vehicle ID").alias("accident_count"))


In [68]:
accidents_by_vehicle_type_sorted = accidents_by_vehicle_type.sort("accident_count", descending=True)


In [69]:
print(accidents_by_vehicle_type_sorted)

shape: (64, 2)
┌───────────────────────────┬────────────────┐
│ Vehicle Body Type         ┆ accident_count │
│ ---                       ┆ ---            │
│ str                       ┆ u32            │
╞═══════════════════════════╪════════════════╡
│ SUBURBAN                  ┆ 1381738        │
│ 4 DOOR SEDAN              ┆ 1094922        │
│ PICKUP TRUCK              ┆ 256761         │
│ UNKNOWN VEHICLE           ┆ 220999         │
│ SEDAN                     ┆ 110017         │
│ …                         ┆ …              │
│ WELL SERVICING RIG        ┆ 14             │
│ WELL DRILLER              ┆ 9              │
│ HEARSE - INVALID          ┆ 5              │
│ FEED PROCESSING MACHINE   ┆ 2              │
│ LOW SPEED VEHICLE - TRUCK ┆ 2              │
└───────────────────────────┴────────────────┘


## l'analyse des occupants par catégorie de véhicule

In [72]:
df_filter = df.select(["Vehicle Body Type", "Number of Occupants"])


In [88]:
occupants_by_vehicle_type = df.group_by("Vehicle Body Type").agg(
    pl.col("Number of Occupants").sum().alias("total_occupants")
)

In [90]:
occupants_by_vehicle_type_sorted = occupants_by_vehicle_type.sort("total_occupants", descending=True)


In [91]:
print(occupants_by_vehicle_type_sorted)

shape: (64, 2)
┌───────────────────────────┬─────────────────┐
│ Vehicle Body Type         ┆ total_occupants │
│ ---                       ┆ ---             │
│ str                       ┆ i64             │
╞═══════════════════════════╪═════════════════╡
│ SUBURBAN                  ┆ 1683936         │
│ 4 DOOR SEDAN              ┆ 1264589         │
│ PICKUP TRUCK              ┆ 286722          │
│ SEDAN                     ┆ 108316          │
│ BUS (OMNIBUS)             ┆ 100294          │
│ …                         ┆ …               │
│ HEARSE - INVALID          ┆ 6               │
│ FEED PROCESSING MACHINE   ┆ 2               │
│ LOW SPEED VEHICLE - TRUCK ┆ 1               │
│ PEDESTRIAN                ┆ 0               │
│ OTHER PEDESTRIAN          ┆ 0               │
└───────────────────────────┴─────────────────┘
