In [1]:
import json
import geohash
import collections
import pandas as pd
from geopy.distance import geodesic
from shapely.geometry import Point
from shapely.geometry.polygon import Polygon
from shapely.ops import nearest_points

In [2]:
def grid_dict(jsonpath):
    """
    type jsonpath: String, the path where stores fenceID and its grids
    """
    f=open(jsonpath,"r")
    content=f.readline()
    f.close()
    fence_grid = json.loads(content)
    grid_fence = collections.defaultdict(set)
    for k, v in fence_grid.items():
        for grid in v:
            grid_fence[grid].add(k)
    return grid_fence

def point_grids(point):
    """
    :type point: Tuple or List, format: (Latitude, Longitude)
    :rtype: List, related geohash grids to current point
    """
    center = geohash.encode(point[0], point[1], 7)
    # 将center扩展为九宫格,避免"错误最近点"现象出现
    nears = geohash.expand(center)
    return [center] + nears

In [3]:
fences = pd.read_csv('../cleaned_data/suggestions/fences_fillall.csv')
bikes = pd.read_csv('../cleaned_data/suggestions/bikes_fillall.csv')

In [4]:
fences.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 14071 entries, 0 to 14070
Data columns (total 37 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   FENCE_ID      14071 non-null  object 
 1   LATITUDE_0    14071 non-null  float64
 2   LONGITUDE_0   14071 non-null  float64
 3   LATITUDE_1    14071 non-null  float64
 4   LONGITUDE_1   14071 non-null  float64
 5   LATITUDE_2    14071 non-null  float64
 6   LONGITUDE_2   14071 non-null  float64
 7   LATITUDE_3    14071 non-null  float64
 8   LONGITUDE_3   14071 non-null  float64
 9   LATITUDE_4    14071 non-null  float64
 10  LONGITUDE_4   14071 non-null  float64
 11  F_LATITUDE    14071 non-null  float64
 12  F_LONGITUDE   14071 non-null  float64
 13  ROAD          14071 non-null  object 
 14  AREA          14071 non-null  float64
 15  FID           14071 non-null  int64  
 16  LENGTH        14071 non-null  float64
 17  WIDTH         14071 non-null  float64
 18  21_0          14071 non-nu

我们需要一个停车围栏是否"又多又挤"的指标,所以和solution中对cluster的处理一样, 算出单个围栏FLOW与FLOW_DENSITY的zscore

In [5]:
flow_zscore = (fences['FLOW'] - fences['FLOW'].mean())/fences['FLOW'].std()
density_zscore = (fences['FLOW_DENSITY'] - fences['FLOW_DENSITY'].mean())/fences['FLOW_DENSITY'].std()
fences['MIXED_SCORE'] = flow_zscore + density_zscore

In [6]:
grid_fences = grid_dict('../cleaned_data/middlewares/fence_grids.json')

In [7]:
key = fences.sort_values("FLOW", ascending=False)['FID'].iloc[0]
a_bike = bikes[bikes['FID'] == key].iloc[0]
bike_pos = (a_bike['LATITUDE'], a_bike['LONGITUDE'])
point = Point(bike_pos)
# 由于调度范围可能大于153米,所以使用了7位geohash九宫格.这样选择跨度可以达到400米以上. 在实际调度过程中可考虑使用更低位的geohash
bike_near_grids = point_grids(bike_pos)

# 经验参数,可以调整.这里是乱填的
max_dist = 300
dist_weight = 0.5
active_day_weight = 0.2
flow_weight = 0.3

near_fences = []
for grid in bike_near_grids:
    near_fences += grid_fences[grid]
near_fences = list(set(near_fences))

near_f, mixed_score, active_days, distances, density, flow = [], [], [], [], [], []

# 因为缺乏图数据所以这里的距离只能使用地理直线距离代替,若能获取图数据,则使用Dijkstra算法
for fid in near_fences:
    fid = int(fid)
    curfence_mixed_score = fences.loc[fid, 'MIXED_SCORE']
    curfence_active_day = fences.loc[fid, 'ACTIVE_DAYS']
    curfence_position = fences.iloc[fid, 1:11]
    curfence_density = fences.loc[fid, 'FLOW_DENSITY']
    curfence_flow = fences.loc[fid, 'FLOW']
    polygon_data = []
    for i in range(0, 10, 2):
        polygon_data.append((curfence_position[i], curfence_position[i + 1]))
    polygon = Polygon(polygon_data)
    boundary_obj = nearest_points(polygon, point)[0]
    nearest_point = boundary_obj.bounds[:2]
    dist = geodesic(bike_pos, nearest_point).meters
    if dist < max_dist:
        mixed_score.append(curfence_mixed_score)
        active_days.append(curfence_active_day)
        distances.append(dist)
        near_f.append(fid)
        flow.append(curfence_flow)
        density.append(curfence_density)

In [8]:
near_df = pd.DataFrame()
near_df['FID'] = near_f
near_df['ACTIVE_DAYS'] = active_days
near_df['DENSITY'] = density
near_df['FLOW'] = flow
near_df['MIXED_SCORE'] = mixed_score
near_df['DISTANCE'] = distances


def get_zscore(dfseries):
    return (dfseries - dfseries.mean())/dfseries.std()

near_df['SCORE'] = get_zscore(near_df['ACTIVE_DAYS']) * active_day_weight + get_zscore(near_df['MIXED_SCORE']) * flow_weight + get_zscore(near_df['DISTANCE']) * dist_weight
sorted_df = near_df.sort_values("SCORE")

In [9]:
sorted_df

Unnamed: 0,FID,ACTIVE_DAYS,DENSITY,FLOW,MIXED_SCORE,DISTANCE,SCORE
12,1758,0,-0.761995,-9,-0.735531,180.466975,-0.651051
0,11440,2,0.245084,3,0.15047,173.929824,-0.489922
11,6490,5,47.920961,565,41.854054,9.412627,-0.487807
22,9678,0,-2.61564,-18,-1.849687,212.511926,-0.401907
2,9679,0,-2.221277,-17,-1.648761,214.396922,-0.379916
19,1759,3,0.259898,3,0.156535,179.992604,-0.339905
8,1752,0,-0.552064,-6,-0.531159,216.450368,-0.331223
16,9732,0,0.0,0,-0.068293,219.936623,-0.288041
21,9750,3,0.587687,4,0.330206,188.271769,-0.262832
3,9675,1,-0.128349,-1,-0.160314,213.607034,-0.248932


In [10]:
idx = sorted_df['FID'].iloc[0]
fence_name = fences['FENCE_ID'][idx]
dist = sorted_df['DISTANCE'].iloc[0]
flow_there = sorted_df['FLOW'].iloc[0]
density_there = sorted_df['DENSITY'].iloc[0]

In [11]:
print("将用户引导至{}, 距离当前用户为{}米, 该点流量为{}, 流量密度为{}".format(fence_name, dist, flow_there, density_there))

将用户引导至会展路(塔埔路至吕岭路段 )_L_2, 距离当前用户为180.466974998578米, 该点流量为-9, 流量密度为-0.7619954224846628
