In [1]:
import osmnx as ox
import networkx as nx
import igraph as ig
import pandas as pd
import numpy as np
import time
from tqdm import tqdm

In [2]:
def change_type(x):
    try:
        y = int(x)
        return y
    except:
        # print(x)
        return -1
def get_seconds(x):
    return pd.Timedelta(x).total_seconds()

In [3]:
data = pd.read_csv('./share_data/sample/sample_all2_60_15_0', names=['D', 'O', 'no_use_time'])

In [5]:
data = data[data.no_use_time.str[-5:-3] < '15']

In [6]:
data

Unnamed: 0,D,O,no_use_time
0,643,0,0 days 00:01:46
1,663,0,0 days 00:01:59
2,665,0,0 days 00:01:59
3,675,0,0 days 00:02:05
4,723,0,0 days 00:02:05
...,...,...,...
29200978,30332,26810,0 days 00:04:33
29200979,30333,26810,0 days 00:04:33
29200980,30351,26810,0 days 00:04:37
29200981,30368,26810,0 days 00:04:43


In [10]:
 r1 = len(data)
data['new_D'] = data['D'].apply(change_type)
data['new_O'] = data['O'].apply(change_type)
new_data = data[~((data['new_D'] == -1) | (data['new_O'] == -1))]
new_data = new_data.drop_duplicates(subset=['new_D', 'new_O'])
# 有效连接数
r2 = len(new_data)
# 节点数
r3 = len(set(new_data['new_D']) | set(new_data['new_O']))
new_data['no_use_time'] = new_data['no_use_time'].apply(get_seconds).astype('int32')
G = nx.DiGraph()
G.add_weighted_edges_from([(O, D, weight) for O, D, weight in zip(new_data['new_O'], new_data['new_D'], new_data['no_use_time'])])
# 拆出来的新点用原始点的字符串
new_edges = []
left, right = [], []
for edge in G.edges():
    edge_end = str(edge[1])
    new_edge = (edge[0], edge_end)
    new_edges.append(new_edge)
    left.append(edge[0])
    right.append(edge_end)
# 构建二分图（有向的）
new_G = nx.DiGraph()
new_G.add_nodes_from(left, bipartite=0)
new_G.add_nodes_from(right, bipartite=1)
new_G.add_weighted_edges_from([(O, D, weight) for (O, D), weight in zip(new_edges, new_data['no_use_time'])])
nx.is_bipartite(new_G)
# 注意此处的匹配是重复了的
match = nx.bipartite.hopcroft_karp_matching(new_G, left)
match_dict = {k:v for k,v in match.items() if isinstance(k, int)}
# 匹配数
r4 = len(match_dict)
# 最小覆盖路径数
r6 = G.number_of_nodes() - len(match_dict)
# 基于匹配的边获取调度行程线路
path = [[k, int(v)] for k,v in match_dict.items()]
while True:
    o = [r[0] for r in path]
    d = [r[-1] for r in path]
    com = set(d) & set(o)
    new_route = []
    left_route = []
    right_route = []
    if com:
        for c in com:
            lf = path[d.index(c)]
            rt = path[o.index(c)]
            if lf not in (left_route + right_route) and rt not in (left_route + right_route):
                left_route.append(lf)
                right_route.append(rt)
                new_route.append(lf[:-1] + rt)
        for i in range(len(new_route)):
            path.remove(left_route[i])
            path.remove(right_route[i])
            path.append(new_route[i])
    else:
        break
s = 0
match_nodes = []
for r in path:
    s += (len(r) -1)
    match_nodes.extend(r)
# 匹配路径数
r7 = len(path)
# 匹配节点数
r5 = len(match_nodes)
new_data.set_index([new_data.new_O, new_data.new_D], drop=True, inplace=True)
ind = [(k, int(v)) for k,v in match_dict.items()]
no_use_time_sum = new_data.loc[ind, 'no_use_time'].sum()
# 匹配节点的空驶时间
r8 = no_use_time_sum
# 匹配节点的空驶时间
r8 = no_use_time_sum
r9 = r8/r5

In [15]:
r5, r7

(30284, 4436)

In [26]:
od_match = [[r[0],r[-1]] for r in path]

In [7]:
all_data = pd.read_csv('./data/day_data/sample_data_2.csv')

In [14]:
len(all_data)

30380

In [23]:
no_match = set(all_data.index) - set(match_nodes)

In [33]:
len(no_match) + len(od_match), len(no_match)

(4532, 96)

In [34]:
no_match = list(no_match)

In [30]:
qj_data = pd.DataFrame(np.zeros((4532, 2)))

In [31]:
qj_data.columns = ['O', 'D']

In [37]:
for i,j in enumerate(no_match):
    qj_data['O'][i] = all_data['pickup_datetime'][j]
    qj_data['D'][i] = all_data['dropoff_datetime'][j]

In [39]:
for ind, (i,j) in enumerate(od_match):
    qj_data['O'][ind + 96] = all_data['pickup_datetime'][i]
    qj_data['D'][ind + 96] = all_data['dropoff_datetime'][j]

In [40]:
qj_data

Unnamed: 0,O,D
0,2014-01-01 01:13:00,2014-01-01 01:40:48
1,2014-01-01 01:06:00,2014-01-01 01:31:42
2,2014-01-01 00:12:30,2014-01-01 00:20:14
3,2014-01-01 00:06:21,2014-01-01 00:28:33
4,2014-01-01 00:12:37,2014-01-01 00:32:30
...,...,...
4527,2014-01-01 00:01:00,2014-01-01 01:12:20
4528,2014-01-01 00:02:00,2014-01-01 01:19:25
4529,2014-01-01 00:00:00,2014-01-01 01:18:34
4530,2014-01-01 00:03:34,2014-01-01 01:14:06


In [45]:
sort_data = pd.DataFrame(np.zeros((4532*2, 2)))
sort_data.columns = ['point', 'values']

In [47]:
sort_data['point'] = qj_data['O'].tolist() + qj_data['D'].tolist()

In [49]:
sort_data['values'] = [1]*4532 + [-1]*4532

In [52]:
sort_data = sort_data.sort_values(['point', 'values'])

In [54]:
sort_data.reset_index(inplace=True)

In [55]:
sort_data

Unnamed: 0,index,point,values
0,285,2014-01-01 00:00:00,1
1,288,2014-01-01 00:00:00,1
2,1110,2014-01-01 00:00:00,1
3,1228,2014-01-01 00:00:00,1
4,1268,2014-01-01 00:00:00,1
...,...,...,...
9059,6164,2014-01-01 01:47:59,-1
9060,4579,2014-01-01 01:48:51,-1
9061,7208,2014-01-01 01:49:57,-1
9062,4559,2014-01-01 01:53:16,-1


In [58]:
sort_data['values'].cumsum().max()

4337