# Preparation

In [2]:
import pandas as pd
import networkx as nx
import netwulf as nw


In [3]:
od_f = pd.read_csv("data/NYCTAXI_OD_f.csv")

# od_f["flow"].value_counts()
od_f["origin_id"].nunique()

263

In [4]:
# od_edge = od_f.loc[od_f["flow"]>0, ["origin_id", "destination_id", "flow","cost"]]
# od_edge.reset_index(drop=True, inplace=True)
# od_edge
# od_edge.to_csv("data/NYCTAXI_OD_edge.csv",index=False)

# Construct network

In [7]:
G = nx.DiGraph()
for i, row in od_f.iterrows():
    if row["flow"] > 0:
        G.add_edge(row["origin_id"], row["destination_id"], flow=row["flow"], cost=row["cost"])
    else:
        G.add_node(row["origin_id"])
        G.add_node(row["destination_id"])

In [8]:
nx.write_gexf(G, "data/NYCTAXI_OD_f.gexf")

In [5]:
# G = nx.barabasi_albert_graph(100,m=1)
# stylized_network, config = nw.visualize(G, plot_in_cell_below=False)
# fig, ax = nw.draw_netwulf(stylized_network, figsize=(10,10))
# plt.savefig("myfigure.pdf")

# Basic statistics

In [56]:
# G.edges
# G.get_edge_data(0.0, 137.0)

In [75]:
def get_network_statistics(G):
    # 基本统计量
    num_nodes = G.number_of_nodes()  # 节点数量
    num_edges = G.number_of_edges()  # 边数量

    # 平均度
    avg_degree = sum(dict(G.degree()).values()) / num_nodes

    in_degrees = [deg for node, deg in G.in_degree()]
    out_degrees = [deg for node, deg in G.out_degree()]
    avg_in_degree = sum(in_degrees) / len(in_degrees)
    avg_out_degree = sum(out_degrees) / len(out_degrees)

    # Hubs（度数最高的节点）
    degree_dict = dict(G.degree())
    hubs = [node for node, degree in degree_dict.items() if degree == max(degree_dict.values())]

    # 最短路径长度
    shortest_path_lengths = dict(nx.shortest_path_length(G))

    # 直径（最长的最短路径长度）
    try:
        diameter = nx.diameter(G)
    except:
        diameter = "Network is not connected."

    return {
        "Number of nodes": num_nodes,
        "Number of edges": num_edges,
        "Average degree": avg_degree,
        "Average inout-degree": avg_in_degree,
        "Hubs": hubs,
        # "Shortest path lengths": shortest_path_lengths,
        "Diameter": diameter
    }

In [76]:
statistics = get_network_statistics(G)
for key, value in statistics.items():
    print(f"{key}: {value}")

Number of nodes: 263
Number of edges: 23340
Average degree: 177.49049429657794
Average inout-degree: 88.74524714828897
Hubs: [136.0]
Diameter: Network is not connected.


# Structure

在有向图中，节点i的聚类系数定义为所有可能的有向三元组（即i -> j, j -> k, k -> i）的比例，其中j和k是i的邻居。一个有向三元组是一个节点对的有序三元组。

nx.average_clustering()函数默认会将有向图视为无向图进行计算。如果你想要计算有向图的聚类系数，需要将count_zeros参数设置为False

In [79]:
avg_clustering = nx.average_clustering(G, count_zeros=False)
avg_clustering

0.6362071861121511