In [24]:
import pandas as pd

## For Emissions CSV

In [25]:
df_emissions = pd.read_csv(r"..\outputs\csvs\emissions.csv", sep=";")
df_emissions.head()

Unnamed: 0,timestep_time,vehicle_CO,vehicle_CO2,vehicle_HC,vehicle_NOx,vehicle_PMx,vehicle_angle,vehicle_eclass,vehicle_electricity,vehicle_fuel,vehicle_id,vehicle_lane,vehicle_noise,vehicle_pos,vehicle_route,vehicle_speed,vehicle_type,vehicle_waiting,vehicle_x,vehicle_y
0,0.0,112.58,2298.06,0.56,0.96,0.09,90.0,HBEFA3/PC_G_EU6,0.0,733.06,flow_0.0,A4B4_0,55.94,4.6,r_upper,0.0,car,0.0,7.8,398.4
1,1.0,102.77,3237.74,0.54,1.38,0.09,90.0,HBEFA3/PC_G_EU6,0.0,1032.87,flow_0.0,A4B4_0,66.54,6.82,r_upper,2.22,car,0.0,10.02,398.4
2,2.0,101.6,4718.86,0.58,2.05,0.12,90.0,HBEFA3/PC_G_EU6,0.0,1505.4,flow_0.0,A4B4_0,68.89,11.55,r_upper,4.73,car,0.0,14.75,398.4
3,3.0,97.84,5741.3,0.59,2.51,0.13,90.0,HBEFA3/PC_G_EU6,0.0,1831.62,flow_0.0,A4B4_0,69.16,18.62,r_upper,7.07,car,0.0,21.82,398.4
4,4.0,99.44,6887.45,0.63,3.02,0.15,90.0,HBEFA3/PC_G_EU6,0.0,2197.29,flow_0.0,A4B4_0,70.0,27.97,r_upper,9.36,car,0.0,31.17,398.4


In [26]:
# Order by vehicle_id and timestep

# Creates a new column 'car_id' as the integer after the first dot, avoids recurrent flagging
df_emissions["car_id"] = df_emissions["vehicle_id"].str.split(".", n=1).str[1].astype(int)
df_emissions["flow_id"] = df_emissions["vehicle_id"].str.extract(r"flow_(\d+)\.\d+").astype(int)

# Sort by car_id and timestep_time
df_emissions = df_emissions.sort_values(by=["flow_id", "car_id", "timestep_time"], ascending=[True, True, True])

df_emissions.head()
df_emissions.head()

Unnamed: 0,timestep_time,vehicle_CO,vehicle_CO2,vehicle_HC,vehicle_NOx,vehicle_PMx,vehicle_angle,vehicle_eclass,vehicle_electricity,vehicle_fuel,...,vehicle_noise,vehicle_pos,vehicle_route,vehicle_speed,vehicle_type,vehicle_waiting,vehicle_x,vehicle_y,car_id,flow_id
0,0.0,112.58,2298.06,0.56,0.96,0.09,90.0,HBEFA3/PC_G_EU6,0.0,733.06,...,55.94,4.6,r_upper,0.0,car,0.0,7.8,398.4,0,0
1,1.0,102.77,3237.74,0.54,1.38,0.09,90.0,HBEFA3/PC_G_EU6,0.0,1032.87,...,66.54,6.82,r_upper,2.22,car,0.0,10.02,398.4,0,0
2,2.0,101.6,4718.86,0.58,2.05,0.12,90.0,HBEFA3/PC_G_EU6,0.0,1505.4,...,68.89,11.55,r_upper,4.73,car,0.0,14.75,398.4,0,0
3,3.0,97.84,5741.3,0.59,2.51,0.13,90.0,HBEFA3/PC_G_EU6,0.0,1831.62,...,69.16,18.62,r_upper,7.07,car,0.0,21.82,398.4,0,0
4,4.0,99.44,6887.45,0.63,3.02,0.15,90.0,HBEFA3/PC_G_EU6,0.0,2197.29,...,70.0,27.97,r_upper,9.36,car,0.0,31.17,398.4,0,0


In [27]:
def checkMissing(df):
    for f in set(df_emissions["flow_id"].values):
        print("Checking missing values for flow_id",f)

        df_check = df[df["flow_id"] == f]
        
        car_ids = list(sorted(set(df_check["car_id"])))

        missing_ids = [i for i in range(car_ids[0], car_ids[-1] + 1) if i not in car_ids]

        missing_info = [{"missing_id": m,
                        "prev_id": m-1 if m-1 in car_ids else None,
                        "next_id": m+1 if m+1 in car_ids else None} for m in missing_ids]

        print("Missing IDs:", missing_ids)
        print("Details:", missing_info)

In [28]:
checkMissing(df_emissions)

Checking missing values for flow_id 0
Missing IDs: []
Details: []


In [31]:
# Group by vehicle_id and compute average CO2 per vehicle
avg_df_emissions = df_emissions.groupby("vehicle_id", as_index=False).agg({"vehicle_CO2": "mean"}).rename(columns={"vehicle_CO2": "avg_co2"})
avg_df_emissions.head(20)

Unnamed: 0,vehicle_id,avg_co2
0,flow_0.0,2883.211356
1,flow_0.1,2607.713651
2,flow_0.10,2748.313906
3,flow_0.100,2799.264667
4,flow_0.101,2700.032903
5,flow_0.102,2732.728254
6,flow_0.103,2662.146452
7,flow_0.104,2523.215909
8,flow_0.105,2711.455323
9,flow_0.106,2819.286066


Let's organize them by descending order of car spawn in each flow:


In [32]:
def orderSpawn(df):
    # Extract flow number and car number
    df["flow_id"] = df["vehicle_id"].str.extract(r"flow_(\d+)\.\d+").astype(int)
    df["car_num"] = df["vehicle_id"].str.split(".", n=1).str[1].astype(int)

    # Sort by flow_id first, then by car_num
    df = df.sort_values(by=["flow_id", "car_num"], ascending=[True, True]).drop(columns=["car_num"])

    return df

In [34]:
orderSpawn(avg_df_emissions)

Unnamed: 0,vehicle_id,avg_co2,flow_id
0,flow_0.0,2883.211356,0
1,flow_0.1,2607.713651,0
112,flow_0.2,2855.893167,0
223,flow_0.3,2844.316230,0
334,flow_0.4,2715.087500,0
...,...,...,...
551,flow_0.595,2789.989333,0
552,flow_0.596,2870.550000,0
553,flow_0.597,3182.498889,0
554,flow_0.598,3973.378333,0


## For Tripinfo CSV

In [35]:
df_tripinfo = pd.read_csv(r"..\outputs\csvs\tripinfo.csv", sep=";")
df_tripinfo.head()

Unnamed: 0,tripinfo_arrival,tripinfo_arrivalLane,tripinfo_arrivalPos,tripinfo_arrivalSpeed,tripinfo_depart,tripinfo_departDelay,tripinfo_departLane,tripinfo_departPos,tripinfo_departSpeed,tripinfo_devices,...,tripinfo_vaporized,tripinfo_waitingCount,tripinfo_waitingTime,emissions_CO2_abs,emissions_CO_abs,emissions_HC_abs,emissions_NOx_abs,emissions_PMx_abs,emissions_electricity_abs,emissions_fuel_abs
0,59.0,E2D2_0,85.6,13.64,0.0,0.0,A4B4_0,4.6,0.0,tripinfo_flow_0.0 emissions_flow_0.0,...,,0,0.0,172925.16,1879.17,12.67,69.42,2.97,0.0,55171.16
1,69.0,E2D2_0,85.6,12.17,6.0,0.0,A4B4_0,4.6,0.0,tripinfo_flow_0.1 emissions_flow_0.1,...,,0,0.0,161987.89,1609.13,10.99,62.59,2.48,0.0,51682.56
2,72.0,E2D2_0,85.6,13.48,12.0,0.0,A4B4_0,4.6,0.0,tripinfo_flow_0.2 emissions_flow_0.2,...,,0,0.0,171097.7,1902.89,12.72,68.74,2.94,0.0,54587.99
3,79.0,E2D2_0,85.6,13.81,18.0,0.0,A4B4_0,4.6,0.0,tripinfo_flow_0.3 emissions_flow_0.3,...,,0,0.0,174507.68,1843.26,12.51,69.02,2.85,0.0,55676.42
4,84.0,E2D2_0,85.6,13.48,24.0,0.0,A4B4_0,4.6,0.0,tripinfo_flow_0.4 emissions_flow_0.4,...,,0,0.0,164820.74,1749.41,11.83,65.58,2.74,0.0,52585.7


Works the same as `vehicle_id`, just gotta adapt to a global variable or request as input.