# Preparation

In [12]:
import pandas as pd
import networkx as nx
import netwulf as nw
import matplotlib.pyplot as plt
import seaborn as sns


In [2]:
rel = pd.read_csv("data/NYCTAXI202401-202403.rel")
rel.head()

Unnamed: 0,rel_id,type,origin_id,destination_id,cost
0,0,geo,0,0,0.0
1,1,geo,0,1,31349.0
2,2,geo,0,2,33747.0
3,3,geo,0,3,17205.0
4,4,geo,0,4,14927.0


In [3]:
df_total = pd.read_csv("data/total/NYCTAXI_OD_Total.csv")

df_morningrush = pd.read_csv("data/timeperiod/NYCTAXI_OD_Morning Rush.csv")
df_morning = pd.read_csv("data/timeperiod/NYCTAXI_OD_Morning.csv")
df_noon = pd.read_csv("data/timeperiod/NYCTAXI_OD_Noon.csv")
df_afternoon = pd.read_csv("data/timeperiod/NYCTAXI_OD_Afternoon.csv")
df_eveningrush = pd.read_csv("data/timeperiod/NYCTAXI_OD_Evening Rush.csv")
df_night = pd.read_csv("data/timeperiod/NYCTAXI_OD_Night.csv")

# df_total["flow"].value_counts()
# df_total["origin_id"].nunique()

In [22]:
df_total

Unnamed: 0,origin_id,destination_id,flow
0,0,47,1.000000
1,0,99,1.000000
2,0,186,1.000000
3,0,208,1.000000
4,0,230,1.000000
...,...,...,...
25966,262,257,1.250000
25967,262,258,1.000000
25968,262,259,1.054054
25969,262,260,1.093525


# Construct network

In [4]:
G_morningrush = nx.DiGraph()
G_morning = nx.DiGraph()
G_noon = nx.DiGraph()
G_afternoon = nx.DiGraph()
G_eveningrush = nx.DiGraph()
G_night = nx.DiGraph()

G_total = nx.DiGraph()


In [5]:
def construct_graph(G, df):
    for i, row in df.iterrows():
        G.add_edge(row["origin_id"], row["destination_id"], flow=row["flow"])
    G.add_nodes_from(rel["origin_id"].unique())
    
construct_graph(G_total, df_total)
construct_graph(G_morningrush, df_morningrush)
construct_graph(G_morning, df_morning)
construct_graph(G_noon, df_noon)
construct_graph(G_afternoon, df_afternoon)
construct_graph(G_eveningrush, df_eveningrush)
construct_graph(G_night, df_night)


In [6]:
# nx.write_gexf(G_total,"data/total/NYCTAXI_OD_Total.gexf")

# nx.write_gexf(G_morningrush,"data/timeperiod/NYCTAXI_OD_Morning Rush.gexf")
# nx.write_gexf(G_morning,"data/timeperiod/NYCTAXI_OD_Morning.gexf")
# nx.write_gexf(G_noon,"data/timeperiod/NYCTAXI_OD_Noon.gexf")
# nx.write_gexf(G_afternoon,"data/timeperiod/NYCTAXI_OD_Afternoon.gexf")
# nx.write_gexf(G_eveningrush,"data/timeperiod/NYCTAXI_OD_Evening Rush.gexf")
# nx.write_gexf(G_night,"data/timeperiod/NYCTAXI_OD_Night.gexf")

In [13]:
# 服务器上不可用
# stylized_network, config = nw.visualize(G_total, plot_in_cell_below=False)
# fig, ax = nw.draw_netwulf(stylized_network, figsize=(10,10))
# plt.savefig("myfigure.pdf")

# Basic statistics

In [20]:
def get_network_statistics(G):
    # 基本统计量
    num_nodes = G.number_of_nodes()  # 节点数量
    num_edges = G.number_of_edges()  # 边数量

    # 平均度
    avg_degree = sum(dict(G.degree()).values()) / num_nodes

    in_degrees = [deg for node, deg in G.in_degree()]
    out_degrees = [deg for node, deg in G.out_degree()]
    avg_in_degree = sum(in_degrees) / len(in_degrees)
    avg_out_degree = sum(out_degrees) / len(out_degrees)

    # Hubs（度数最高的节点）
    degree_dict = dict(G.degree())
    hubs = [node for node, degree in degree_dict.items() if degree == max(degree_dict.values())]

    # 最短路径长度
    shortest_path_lengths = dict(nx.shortest_path_length(G))

    # 直径（最长的最短路径长度）
    try:
        diameter = nx.diameter(G)
    except:
        diameter = "Network is not connected."

    return {
        "Number of nodes": num_nodes,
        "Number of edges": num_edges,
        "Average degree": avg_degree,
        "Average inout-degree": avg_in_degree,
        "Hubs": hubs,
        # "Shortest path lengths": shortest_path_lengths,
        "Diameter": diameter
    }

def print_statistics(G):
    print("-"*45)
    stats = get_network_statistics(G)
    for key, value in stats.items():
        print(f"{key}: {value}")

In [21]:
print_statistics(G_total)
print_statistics(G_morningrush)
print_statistics(G_morning)
print_statistics(G_noon)
print_statistics(G_afternoon)
print_statistics(G_eveningrush)
print_statistics(G_night)

---------------------------------------------
Number of nodes: 263
Number of edges: 25971
Average degree: 197.49809885931558
Average inout-degree: 98.74904942965779
Hubs: [131.0]
Diameter: Network is not connected.
---------------------------------------------
Number of nodes: 263
Number of edges: 10225
Average degree: 77.75665399239544
Average inout-degree: 38.87832699619772
Hubs: [131.0]
Diameter: Network is not connected.
---------------------------------------------
Number of nodes: 263
Number of edges: 12728
Average degree: 96.79087452471482
Average inout-degree: 48.39543726235741
Hubs: [131.0]
Diameter: Network is not connected.
---------------------------------------------
Number of nodes: 263
Number of edges: 11169
Average degree: 84.93536121673004
Average inout-degree: 42.46768060836502
Hubs: [131.0]
Diameter: Network is not connected.
---------------------------------------------
Number of nodes: 263
Number of edges: 11717
Average degree: 89.10266159695817
Average inout-degre

# Structure

在有向图中，节点i的聚类系数定义为所有可能的有向三元组（即i -> j, j -> k, k -> i）的比例，其中j和k是i的邻居。一个有向三元组是一个节点对的有序三元组。

nx.average_clustering()函数默认会将有向图视为无向图进行计算。如果你想要计算有向图的聚类系数，需要将count_zeros参数设置为False

In [None]:
avg_clustering = nx.average_clustering(G, count_zeros=False)
avg_clustering