In [1]:
import pandas as pd
import geopandas as gpd
import osmnx as ox

In [2]:
path = "/users/eleves-b/2023/wenrui.dai/Desktop/environment/Helsinki/Locomizer_OD_hexes_R9_45_2023_i4_dw5.csv"
td = pd.read_csv(path)
path1 = "/users/eleves-b/2023/wenrui.dai/Desktop/environment/Helsinki/Helsinki_H3_r5rDI_2025-07-01_TD_414.csv"
path2 = "/users/eleves-b/2023/wenrui.dai/Desktop/environment/Helsinki/Helsinki_H3_r5rDI_2025-07-01_TD_414_n.csv"
path3 = "/users/eleves-b/2023/wenrui.dai/Desktop/environment/Helsinki/Helsinki_H3_r5rDI_2025-07-02_TD_415.csv"
path4 = "/users/eleves-b/2023/wenrui.dai/Desktop/environment/Helsinki/Helsinki_H3_r5rDI_2025-07-02_TD_415_n.csv"

In [3]:
m14 = pd.read_csv(path1)
n14 = pd.read_csv(path2)
m15 = pd.read_csv(path3)
n15 = pd.read_csv(path4)

In [4]:
emission_factors = {
    "WALK": 0,
    "BUS": 57.6,
    "SUBWAY": 25,
    "RAIL": 25,
    "TRAM": 25,
    "FERRY": 36
}

In [5]:
def compute_mode_emissions(trips: pd.DataFrame, factors: dict) -> pd.DataFrame:
    df = trips.copy()
    df["emission"] = (df["distance"] / 1000.0) * df["mode"].map(factors).fillna(0)

    # 总排放（所有方式合计）
    total = (df.groupby(["to_id", "from_id"], as_index=False)["emission"]
               .sum()
               .rename(columns={"emission": "carbon_emission"}))

    # 分方式排放 -> 宽表，列名改为 carbon_emission_<mode>
    by_mode_wide = (df.groupby(["to_id", "from_id", "mode"], as_index=False)["emission"].sum()
                      .pivot(index=["to_id", "from_id"], columns="mode", values="emission")
                      .fillna(0))
    by_mode_wide.columns = [f"carbon_emission_{str(c).lower()}" for c in by_mode_wide.columns]
    by_mode_wide = by_mode_wide.reset_index()

    out = total.merge(by_mode_wide, on=["to_id", "from_id"], how="left")

    # 确保这三列存在
    for col in ["carbon_emission_ferry", "carbon_emission_bus", "carbon_emission_rail"]:
        if col not in out.columns:
            out[col] = 0.0

    return out


In [6]:
df1 = compute_mode_emissions(m14, emission_factors)
df2 = compute_mode_emissions(n14, emission_factors)
df3 = compute_mode_emissions(m15, emission_factors)
df4 = compute_mode_emissions(n15, emission_factors)

In [7]:
def merge_with_users_and_totals(em_df: pd.DataFrame, users_df: pd.DataFrame) -> pd.DataFrame:
    out = em_df.merge(users_df, on=["from_id", "to_id"], how="left")
    users = out["EXTRAPOLATED_NUMBER_OF_USERS"].fillna(0)

    out["total_emission"] = out["carbon_emission"].fillna(0) * users

    for col in [c for c in out.columns if c.startswith("carbon_emission_") and c != "carbon_emission"]:
        mode = col[len("carbon_emission_"):] 
        out[f"total_emission_{mode}"] = out[col].fillna(0) * users

    if "Unnamed: 0" in out.columns:
        out = out.drop(columns=["Unnamed: 0"])

    return out

td_renamed = td.rename(columns={"ORIGIN_CODE9": "from_id", "DESTINATION_CODE9": "to_id"})

In [8]:
merged_df1 = merge_with_users_and_totals(df1, td_renamed)
merged_df2 = merge_with_users_and_totals(df2, td_renamed)
merged_df3 = merge_with_users_and_totals(df3, td_renamed)
merged_df4 = merge_with_users_and_totals(df4, td_renamed)

In [9]:
merged_df1 = merged_df1.dropna(subset=["EXTRAPOLATED_NUMBER_OF_USERS"])
merged_df2 = merged_df2.dropna(subset=["EXTRAPOLATED_NUMBER_OF_USERS"])
merged_df3 = merged_df3.dropna(subset=["EXTRAPOLATED_NUMBER_OF_USERS"])
merged_df4 = merged_df4.dropna(subset=["EXTRAPOLATED_NUMBER_OF_USERS"])

In [10]:
def group_totals(df):
    cols = [c for c in df.columns if c.startswith("total_emission")]
    return df.groupby("from_id", as_index=False)[cols].sum()

DF1 = group_totals(merged_df1)
DF2 = group_totals(merged_df2)
DF3 = group_totals(merged_df3)
DF4 = group_totals(merged_df4)

In [11]:
path5 = "/users/eleves-b/2023/wenrui.dai/Desktop/environment/Helsinki/h3_polygons_Helsinki.gpkg"
h3 = gpd.read_file(path5)
h3_renamed = h3.rename(columns={"ID": "from_id"})

GDF1 = pd.merge(DF1, h3_renamed, on="from_id", how="left")
GDF2 = pd.merge(DF2, h3_renamed, on="from_id", how="left")
GDF3 = pd.merge(DF3, h3_renamed, on="from_id", how="left")
GDF4 = pd.merge(DF4, h3_renamed, on="from_id", how="left")
GDF1 = gpd.GeoDataFrame(GDF1, geometry="geometry", crs=h3.crs)
GDF2 = gpd.GeoDataFrame(GDF2, geometry="geometry", crs=h3.crs)
GDF3 = gpd.GeoDataFrame(GDF3, geometry="geometry", crs=h3.crs)
GDF4 = gpd.GeoDataFrame(GDF4, geometry="geometry", crs=h3.crs)

In [16]:
GDF1

Unnamed: 0,from_id,total_emission,total_emission_bus,total_emission_rail,total_emission_subway,total_emission_tram,total_emission_walk,total_emission_ferry,geometry
0,890899610dbffff,0.0000,0.0000,0.0,0.0000,0.0,0.0,0.0,"POLYGON ((24.5931 60.24495, 24.59578 60.24427,..."
1,8908996125bffff,0.0000,0.0000,0.0,0.0000,0.0,0.0,0.0,"POLYGON ((24.62679 60.22659, 24.62947 60.22591..."
2,89089961267ffff,0.0000,0.0000,0.0,0.0000,0.0,0.0,0.0,"POLYGON ((24.64072 60.23779, 24.64341 60.23711..."
3,89089961293ffff,0.0000,0.0000,0.0,0.0000,0.0,0.0,0.0,"POLYGON ((24.60702 60.23815, 24.6097 60.23747,..."
4,8908996130bffff,0.0000,0.0000,0.0,0.0000,0.0,0.0,0.0,"POLYGON ((24.64368 60.2534, 24.64637 60.25272,..."
...,...,...,...,...,...,...,...,...,...
2477,891126dad17ffff,0.0000,0.0000,0.0,0.0000,0.0,0.0,0.0,"POLYGON ((25.06857 60.15948, 25.07125 60.15879..."
2478,891126dad87ffff,0.0000,0.0000,0.0,0.0000,0.0,0.0,0.0,"POLYGON ((25.06052 60.16156, 25.0632 60.16087,..."
2479,891126dad8fffff,55941.4053,39068.0928,0.0,16873.3125,0.0,0.0,0.0,"POLYGON ((25.06342 60.15917, 25.06611 60.15847..."
2480,891126dadabffff,0.0000,0.0000,0.0,0.0000,0.0,0.0,0.0,"POLYGON ((25.07081 60.1622, 25.07349 60.1615, ..."


In [17]:
output_dir = "/users/eleves-b/2023/wenrui.dai/Desktop/environment/Helsinki"
GDF1.to_file(f"{output_dir}/TD_414M.gpkg", driver="GPKG")
GDF2.to_file(f"{output_dir}/TD_414N.gpkg", driver="GPKG")
GDF3.to_file(f"{output_dir}/TD_415M.gpkg", driver="GPKG")
GDF4.to_file(f"{output_dir}/TD_415N.gpkg", driver="GPKG")