In [1]:
import json
import geohash
import collections
import pandas as pd
from geopy.distance import geodesic
from shapely.geometry import Point
from shapely.geometry.polygon import Polygon
from shapely.ops import nearest_points

In [2]:
def grid_dict(jsonpath):
    """
    type jsonpath: String, the path where stores fenceID and its grids
    """
    f=open(jsonpath,"r")
    content=f.readline()
    f.close()
    fence_grid = json.loads(content)
    grid_fence = collections.defaultdict(set)
    for k, v in fence_grid.items():
        for grid in v:
            grid_fence[grid].add(k)
    return grid_fence

def point_grids(point):
    """
    :type point: Tuple or List, format: (Latitude, Longitude)
    :rtype: List, related geohash grids to current point
    """
    center = geohash.encode(point[0], point[1], 7)
    # 将center扩展为九宫格,避免"错误最近点"现象出现
    nears = geohash.expand(center)
    return [center] + nears

In [3]:
fences = pd.read_csv('../cleaned_data/suggestions/fences_fillall.csv')
bikes = pd.read_csv('../cleaned_data/suggestions/bikes_fillall.csv')

In [4]:
fences.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 14071 entries, 0 to 14070
Data columns (total 37 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   FENCE_ID      14071 non-null  object 
 1   LATITUDE_0    14071 non-null  float64
 2   LONGITUDE_0   14071 non-null  float64
 3   LATITUDE_1    14071 non-null  float64
 4   LONGITUDE_1   14071 non-null  float64
 5   LATITUDE_2    14071 non-null  float64
 6   LONGITUDE_2   14071 non-null  float64
 7   LATITUDE_3    14071 non-null  float64
 8   LONGITUDE_3   14071 non-null  float64
 9   LATITUDE_4    14071 non-null  float64
 10  LONGITUDE_4   14071 non-null  float64
 11  F_LATITUDE    14071 non-null  float64
 12  F_LONGITUDE   14071 non-null  float64
 13  ROAD          14071 non-null  object 
 14  AREA          14071 non-null  float64
 15  FID           14071 non-null  int64  
 16  LENGTH        14071 non-null  float64
 17  WIDTH         14071 non-null  float64
 18  21_0          14071 non-nu

In [5]:
grid_fences = grid_dict('../cleaned_data/middlewares/fence_grids.json')

In [6]:
key = fences.sort_values("FLOW", ascending=False)['FID'].iloc[0]
a_bike = bikes[bikes['FID'] == key].iloc[0]
bike_pos = (a_bike['LATITUDE'], a_bike['LONGITUDE'])
point = Point(bike_pos)
# 由于调度范围可能大于153米,所以使用了7位geohash九宫格.这样选择跨度可以达到400米以上. 在实际调度过程中可考虑使用更低位的geohash
bike_near_grids = point_grids(bike_pos)

# 经验参数,可以调整.这里是乱填的
max_dist = 300
dist_weight = 0.5
active_day_weight = 0.2
flow_weight = 0.3

near_fences = []
for grid in bike_near_grids:
    near_fences += grid_fences[grid]
near_fences = list(set(near_fences))

near_f, flow, active_days, distances = [], [], [], []

# 因为缺乏图数据所以这里的距离只能使用地理直线距离代替,若能获取图数据,则使用Dijkstra算法
for fid in near_fences:
    fid = int(fid)
    curfence_flow = fences.loc[fid, 'FLOW']
    curfence_active_day = fences.loc[fid, 'ACTIVE_DAYS']
    curfence_position = fences.iloc[fid, 1:11]
    polygon_data = []
    for i in range(0, 10, 2):
        polygon_data.append((curfence_position[i], curfence_position[i + 1]))
    polygon = Polygon(polygon_data)
    boundary_obj = nearest_points(polygon, point)[0]
    nearest_point = boundary_obj.bounds[:2]
    dist = geodesic(bike_pos, nearest_point).meters
    if dist < max_dist:
        flow.append(curfence_flow)
        active_days.append(curfence_active_day)
        distances.append(dist)
        near_f.append(fid)

In [7]:
near_df = pd.DataFrame()
near_df['FID'] = near_f
near_df['ACTIVE_DAYS'] = active_days
near_df['FLOW'] = flow
near_df['DISTANCE'] = distances

def get_zscore(dfseries):
    return (dfseries - dfseries.mean())/dfseries.std()

near_df['SCORE'] = get_zscore(near_df['ACTIVE_DAYS']) * active_day_weight + get_zscore(near_df['FLOW']) * flow_weight + get_zscore(near_df['DISTANCE']) * dist_weight
sorted_df = near_df.sort_values("SCORE")

In [15]:
sorted_df

Unnamed: 0,FID,ACTIVE_DAYS,FLOW,DISTANCE,SCORE
10,1758,0,-9,180.466975,-0.649823
15,11440,2,3,173.929824,-0.48818
19,6490,5,565,9.412627,-0.467737
16,9678,0,-18,212.511926,-0.388687
12,9679,0,-17,214.396922,-0.370153
23,1759,3,3,179.992604,-0.33833
1,1752,0,-6,216.450368,-0.329395
6,9732,0,0,219.936623,-0.286503
20,9750,3,4,188.271769,-0.263962
5,9675,1,-1,213.607034,-0.246936


In [19]:
idx = sorted_df['FID'].iloc[0]
fence_name = fences['FENCE_ID'][idx]
dist = sorted_df['DISTANCE'].iloc[0]
flow_there = sorted_df['FLOW'].iloc[0]

In [20]:
print("将用户引导至{}, 距离当前用户为{}米, 该点流量为{}".format(fence_name, dist, flow_there))

将用户引导至会展路(塔埔路至吕岭路段 )_L_2, 距离当前用户为180.466974998578米, 该点流量为-9
