In [5]:
import pandas as pd

# 基本合并（按列名对齐）
df1 = pd.read_csv("DC_bike/202509-tripdata.csv")
df2 = pd.read_csv("DC_bike/202510-tripdata.csv")
out = pd.concat([df1, df2], ignore_index=True)

# 可选：去重（按全部列去重）
out = out.drop_duplicates()

# 按 started_at 生序排列
out = out.sort_values(by="started_at", ascending=True)

# 重置索引（可选）
out = out.reset_index(drop=True)
# 保存
out.to_csv("DC_bike/20250910-tripdata.csv", index=False)

In [18]:
import pandas as pd

df = pd.read_csv("DC_bike/202510-tripdata.csv", usecols=[
    "started_at", "ended_at",
    "start_lat", "start_lng",
    "end_lat", "end_lng"
])

print(f"line counts before cleaning: {len(df)}")
df = df.dropna(subset=["started_at", "ended_at", "start_lat", "start_lng", "end_lat", "end_lng"])
print(f"line counts after cleaning: {len(df)}")

df["start_location"] = df.apply(lambda row: (row["start_lng"], row["start_lat"]), axis=1)
df["end_location"] = df.apply(lambda row: (row["end_lng"], row["end_lat"]), axis=1)

df = df.rename(columns={
    "started_at": "start_time",
    "ended_at":   "end_time"
})

out = df[["start_time", "start_location", "end_time", "end_location"]]
out.to_csv("DC_bike/bike_network-10.csv", index=False)

line counts before cleaning: 624869
line counts after cleaning: 624527


In [27]:
import pandas as pd
import geopandas as gpd
import ast
import networkx as nx
from shapely.geometry import Point
from networkx.algorithms.community import louvain_communities

def run_louvain_from_data(trans_data, shp_path, out_path="partition.csv"):
    df = trans_data
    def parse_loc(x):
        if isinstance(x, str):
            return tuple(ast.literal_eval(x))
        return x

    df["start_location"] = df["start_location"].apply(parse_loc)
    df["end_location"] = df["end_location"].apply(parse_loc)

    # 例如：
    gdf_streets = gpd.read_file(shp_path).to_crs(epsg=4326)
    name_col = "NAME"

    gdf_start = gpd.GeoDataFrame(
        geometry=[Point(xy) for xy in df["start_location"]],
        crs="EPSG:4326"
    )
    gdf_end = gpd.GeoDataFrame(
        geometry=[Point(xy) for xy in df["end_location"]],
        crs="EPSG:4326"
    )

    gdf_start = gpd.sjoin(gdf_start, gdf_streets, how="left", predicate="intersects")
    gdf_end = gpd.sjoin(gdf_end, gdf_streets, how="left", predicate="intersects")

    df["start_anc"] = gdf_start[name_col].values
    df["end_anc"] = gdf_end[name_col].values

    df = df.dropna(subset=["start_anc", "end_anc"])

    G = nx.Graph()
    for _, row in df.iterrows():
        u, v = row["start_anc"], row["end_anc"]
        if u == v:
            continue
        if G.has_edge(u, v):
            G[u][v]["weight"] += 1
        else:
            G.add_edge(u, v, weight=1)

    print(f"Nodes: {G.number_of_nodes()}, edges: {G.number_of_edges()}")

    communities = louvain_communities(G, weight="weight", seed=42, resolution=2.0)
    partition = {node: i for i, comm in enumerate(communities) for node in comm}

    # 写出结果
    partition_df = pd.DataFrame(list(partition.items()), columns=["anc", "community_id"])
    partition_df.to_csv(out_path, index=False, encoding="utf-8-sig")

    print(f"number of {len(communities)} communities, saved to {out_path}")
    return partition_df


trans_data = pd.read_csv("DC_bike/bike_network-10.csv")
partition_df = run_louvain_from_data(
    trans_data=trans_data,
    shp_path="DC_bike/gis/Advisory_Neighborhood_Commissions_from_2023.shp",
    out_path="output/DC_bike/anc_partition.csv"
)

Nodes: 46, edges: 866
number of 11 communities, saved to output/DC_bike/anc_partition.csv


In [None]:
import geopandas as gpd
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.patches as mpatches
import distinctipy
import itertools

def plot_community_map(
    streets_shp_path,              # 街道 shapefile（用于社区上色）
    partition_csv_path,            # partition.csv（street, community_id）
    out_path=None,
    draw_district_labels=True      # 是否在图上标注区名
):
    # 1) 读数据
    gdf_streets = gpd.read_file(streets_shp_path)
    df_partition = pd.read_csv(partition_csv_path)
    gdf_districts = gpd.read_file(districts_shp_path)

    # 2) CRS 对齐
    if gdf_districts.crs != gdf_streets.crs:
        gdf_districts = gdf_districts.to_crs(gdf_streets.crs)

    # 3) 合并 partition
    merged = gdf_streets.merge(df_partition, how="left", left_on="name", right_on="street")

    # 4) 颜色 + 图案
    communities = sorted(merged["community_id"].dropna().unique())
    num_comms = len(communities)
    colors = distinctipy.get_colors(num_comms) if num_comms > 0 else []
    hatches = ['/', '\\', '|', '-', '+', 'x', 'o', 'O', '.', '*']
    hatch_cycle = itertools.cycle(hatches)

    # 5) 绘图
    fig, ax = plt.subplots(figsize=(12, 12))

    # （A）先画未分配街道（灰色底+黑边）
    merged[merged["community_id"].isna()].plot(
        color="lightgray", edgecolor="black", linewidth=0.4, ax=ax, label="未分配"
    )

    legend_patches = []
    for color, cid in zip(colors, communities):
        hatch = next(hatch_cycle)
        subset = merged[merged["community_id"] == cid]
        subset.plot(
            facecolor=color,
            edgecolor="black",
            linewidth=0.5,
            hatch=hatch,
            ax=ax
        )
        legend_patches.append(
            mpatches.Patch(facecolor=color, edgecolor="black", hatch=hatch, label=f"Community {int(cid)}")
        )

    # （C）最后叠加“区级行政边界”（只画边界，不填充）
    #     注意放在最上层，线条稍粗，保证清晰
    gdf_districts.boundary.plot(ax=ax, edgecolor="red", linewidth=1.2, zorder=10)

    # （D）可选：标注区名（用代表点，避免多边形质心落在区外）
    """
    if draw_district_labels and "district" in gdf_districts.columns:
        for _, row in gdf_districts.iterrows():
            try:
                pt = row.geometry.representative_point()
                ax.text(
                    pt.x, pt.y,
                    str(row["district"]),
                    fontsize=9, fontweight="bold",
                    ha="center", va="center",
                    bbox=dict(facecolor="white", edgecolor="none", alpha=0.6, boxstyle="round,pad=0.2")
                )
            except Exception:
                pass
    """
    # 图例、标题等
    if legend_patches:
        ax.legend(
            handles=legend_patches,
            title="Community label",
            loc="upper left",
            bbox_to_anchor=(1.02, 1),
            borderaxespad=0,
            fontsize=9
        )
    ax.set_title("Static bike community", fontsize=15, fontweight="bold")
    ax.axis("off")

    plt.tight_layout()
    if out_path is not None:
        plt.savefig(out_path, dpi=300, bbox_inches="tight")
    plt.show()
    return merged

merged_gdf = plot_community_map(
    streets_shp_path="transport_data/gis/上海市社区村界矢量数据.shp",
    partition_csv_path="street_partition.csv",
    districts_shp_path="transport_data/gis/区级数据.shp",
    out_path="plot/DC_bike/community_map.png"
)

In [4]:
import geopandas as gpd


gdf = gpd.read_file("DC_bike/gis/Advisory_Neighborhood_Commissions_from_2023.shp")

gdf

Unnamed: 0,ANC_ID,NAME,WEB_URL,GIS_ID,SE_ANNO_CA,GLOBALID,CREATED,EDITED,OBJECTID,SHAPEAREA,SHAPELEN,geometry
0,1A,ANC 1A,http://anc.dc.gov/page/advisory-neighborhood-c...,ANCPly_321,,{88F50930-8B79-42D6-BF7A-C31F5A413033},2022-12-21,2022-12-21,321,0,0,"POLYGON ((-8575663.221 4710684.843, -8575663.1..."
1,1B,ANC 1B,http://anc.dc.gov/page/advisory-neighborhood-c...,ANCPly_322,,{E35D70DC-39D5-4FED-9223-20E919625E4D},2022-12-21,2022-12-21,322,0,0,"POLYGON ((-8573606.021 4709417.436, -8573616.9..."
2,1C,ANC 1C,http://anc.dc.gov/page/advisory-neighborhood-c...,ANCPly_323,,{03D85E81-71B0-49F3-A12A-DACCBDDB39DA},2022-12-21,2022-12-21,323,0,0,"POLYGON ((-8576740.225 4709415.623, -8576742.7..."
3,1D,ANC 1D,http://anc.dc.gov/page/advisory-neighborhood-c...,ANCPly_324,,{03F8CF93-E16E-4508-81B6-F935AB200196},2022-12-21,2022-12-21,324,0,0,"POLYGON ((-8575783.338 4711213.775, -8575785.1..."
4,1E,ANC 1E,http://anc.dc.gov/page/advisory-neighborhood-c...,ANCPly_325,,{1C89F951-10EE-4B79-A4FA-67BFED95FCF5},2022-12-21,2022-12-21,325,0,0,"POLYGON ((-8574038.587 4709899.279, -8574054.5..."
5,2A,ANC 2A,http://anc.dc.gov/page/advisory-neighborhood-c...,ANCPly_326,,{6B04B45A-C9A9-44CE-AF80-B79F4F349D77},2022-12-21,2022-12-21,326,0,0,"POLYGON ((-8575347.281 4706227.201, -8575348.0..."
6,2B,ANC 2B,http://anc.dc.gov/page/advisory-neighborhood-c...,ANCPly_327,,{66AB8B2C-019C-4DD3-9FE3-A4471C5BB73E},2022-12-21,2022-12-21,327,0,0,"POLYGON ((-8576793.658 4708163.251, -8576793.5..."
7,2C,ANC 2C,http://anc.dc.gov/page/advisory-neighborhood-c...,ANCPly_328,,{CBDBE6C2-4056-4E1D-88C3-5D5CC6CE4B5E},2022-12-21,2022-12-21,328,0,0,"POLYGON ((-8573290.007 4705689.299, -8573290.0..."
8,2D,ANC 2D,http://anc.dc.gov/page/advisory-neighborhood-c...,ANCPly_329,,{ECAFA288-0C8D-4881-9A0D-BCA9A55FD208},2022-12-21,2022-12-21,329,0,0,"POLYGON ((-8578066.297 4709874.677, -8578079.3..."
9,2E,ANC 2E,http://anc.dc.gov/page/advisory-neighborhood-c...,ANCPly_330,,{57B002D6-8E45-490A-9804-591BB79A60A0},2022-12-21,2022-12-21,330,0,0,"POLYGON ((-8578698.383 4706178.543, -8578724.3..."
