In [6]:
from collections import namedtuple
import geopandas as gpd
import matplotlib.pyplot as plot
from shapely import geometry
import pandas as pd
import itertools
from collections import deque
import numpy as np
from tqdm import tqdm

# prepare data

In [7]:
routes = gpd.read_file('./data/nanshan_traj.shp')
routes=routes.to_crs('EPSG:4326')
routes = routes[routes['age'] != 16]
district=gpd.read_file('./data/nanshan_grid.shp')

In [8]:
routes = routes.dropna(subset=['geometry'])

In [9]:
print(set(routes['age']))
print(set(routes['gender']))

{4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0}
{1.0, 2.0}


In [10]:
young_men = routes[(routes['age'] >= 4) & (routes['age'] <= 7) & (routes['gender'] == 1)]
young_women = routes[(routes['age'] >= 4) & (routes['age'] <= 7) & (routes['gender'] == 2)]
middle_men =  routes[(routes['age'] >= 8) & (routes['age'] <= 13) & (routes['gender'] == 1)]
middle_women = routes[(routes['age'] >= 8) & (routes['age'] <= 13) & (routes['gender'] == 2)]
old_men = routes[(routes['age'] >= 14) & (routes['gender'] == 1)]
old_women = routes[(routes['age'] >= 14) & (routes['gender'] == 2)]

In [11]:
print(len(young_men),len(young_women),len(middle_men),len(middle_women),len(old_men),len(old_women))

34924 15735 29324 13808 679 458


# Function

In [12]:
def getFnidByPoint(first_point,second_point):
    """
    return the fnid of each line

    Args:
        first_point : start position of line
        second_point : end position of line

    Returns:
        fnid_list: fnid list
    """    
    fnid_list=[]
    geometry_first_point=gpd.GeoSeries(geometry.Point(first_point),crs=4326)
    geometry_second_point=gpd.GeoSeries(geometry.Point(second_point),crs=4326)
    first_point_=gpd.GeoDataFrame(geometry=geometry_first_point,crs=4326)
    second_point_=gpd.GeoDataFrame(geometry=geometry_second_point,crs=4326)
    intersect_first=gpd.overlay(df1=district,df2=first_point_,how='intersection',keep_geom_type=False)
    fnid_list.extend(list(intersect_first['fnid']))
    # if distance>250m,then interpolation,
    # the projection coordinate system of ShenZhen is UTM 50N(EPSG:3406)
    UTM_first_point = geometry_first_point.to_crs(32650)
    UTM_second_point=geometry_second_point.to_crs(32650)
    dist=UTM_first_point.distance(UTM_second_point)
    delta=int(dist//50)
    for i in range(delta):
        interpolation_x=first_point[0]+i*(second_point[0]-first_point[0])/delta
        interpolation_y=first_point[1]+i*(second_point[1]-first_point[1])/delta
        geometry_interpolation=gpd.GeoSeries(geometry.Point([interpolation_x,interpolation_y]),crs=4326)
        interpolation_point=gpd.GeoDataFrame(geometry=geometry_interpolation,crs=4326)
        intersect_=gpd.overlay(df1=district,df2=interpolation_point,how='intersection',keep_geom_type=False)
        fnid_list.extend(list(intersect_['fnid']))
    intersect_second=gpd.overlay(df1=district,df2=second_point_,how='intersection',keep_geom_type=False)
    fnid_list.extend(list(intersect_second['fnid']))
    return fnid_list

In [13]:
def routeToFnid(routes):
    length=len(routes)
    print('the length of routes is {}'.format(length))
    routes_states=[]
    for i in range(length):
        geometry_=routes.iloc[i,-1]
        q=deque(geometry_.coords)
        len_q=len(q)
        first_point=q.popleft()
        states=[]
        states_unique=[]
        while q:
            second_point=q.popleft()
            fnid_list=getFnidByPoint(first_point,second_point)
            if fnid_list:
                states.extend(fnid_list)
            first_point=second_point
        for key,_ in itertools.groupby(states):
            states_unique.append(key)
        print("the {} route finished".format(i))
        routes_states.append(states_unique)
    routes_states=np.array(routes_states)
    # np.save(save_file,routes_states) 
    return routes_states

In [14]:
actions = [0, 1, 2, 3, 4]
dirs = {0: 'r', 1: 'l', 2: 'd', 3: 'u', 4: 's'}
#        right,    left,   down,   up ,   stay
Step=namedtuple('Step',['state','action'])
def getActionOfStates(route_state):
    state_action=[]
    length=len(route_state)
    first_state=route_state[0]
    if length==1:
        step=Step(state=first_state,action=4)
        state_action.append(step)
        return state_action
    for i in range(1,length):
        second_state=route_state[i]

        def getAction(first_s,second_s):
            if second_s-first_s==1:
                return 0
            elif second_s- first_s==-1:
                return 1
            elif second_s-first_s==357:
                return 3
            elif second_s-first_s==-357:
                return 2

        idx_minux=second_state-first_state
        if idx_minux==358:
            second_state=second_state-1
            act=getAction(first_state,second_state)
            step=Step(state=first_state,action=act)
            state_action.append(step)
            first_state=second_state
            second_state+=1
        elif idx_minux==356:
            second_state=second_state+1
            act=getAction(first_state,second_state)
            step=Step(state=first_state,action=act)
            state_action.append(step)
            first_state=second_state
            second_state-=1
        elif idx_minux==-358:
            second_state=second_state+1
            act=getAction(first_state,second_state)
            step=Step(state=first_state,action=act)
            state_action.append(step)
            first_state=second_state
            second_state-=1
        elif idx_minux==-356:
            second_state=second_state-1
            act=getAction(first_state,second_state)
            step=Step(state=first_state,action=act)
            state_action.append(step)
            first_state=second_state
            second_state+=1
        act=getAction(first_state,second_state)
        step=Step(state=first_state,action=act)
        state_action.append(step)
        first_state=second_state
    step=Step(state=second_state,action=4)
    state_action.append(step)
    return state_action

In [23]:
import random

def randomSelectLines(geodataframe, percent_to_select=1.0):
    total_lines = len(geodataframe)
    if total_lines<10000:
        random_line_indices = random.sample(range(total_lines), 100)
        randomly_selected_lines = geodataframe.iloc[random_line_indices]
        return randomly_selected_lines

    num_lines_to_select = int(total_lines * (percent_to_select / 100))
    random_line_indices = random.sample(range(total_lines), num_lines_to_select)
    randomly_selected_lines = geodataframe.iloc[random_line_indices]
    return randomly_selected_lines

In [17]:
def routeToTuple(routes,save_file):
    routes=randomSelectLines(routes)
    routes_states=routeToFnid(routes)
    state_action_tuple=[]
    for route_state in tqdm(routes_states):
        sta_act=getActionOfStates(route_state)
        state_action_tuple.append(sta_act)
    print(state_action_tuple[0])
    state_action_tuple=np.array(state_action_tuple)
    np.save(save_file,state_action_tuple) 

# main function

In [24]:
# 第一个0男，1女；第二个0青，1中，2老
routeToTuple(young_men,'./data/routes_states/0_0_states_tuple.npy')

the length of routes is 349
the 0 route finished
the 1 route finished
the 2 route finished
the 3 route finished
the 4 route finished
the 5 route finished
the 6 route finished
the 7 route finished
the 8 route finished
the 9 route finished
the 10 route finished
the 11 route finished
the 12 route finished
the 13 route finished
the 14 route finished
the 15 route finished
the 16 route finished
the 17 route finished
the 18 route finished
the 19 route finished
the 20 route finished
the 21 route finished
the 22 route finished
the 23 route finished
the 24 route finished
the 25 route finished
the 26 route finished
the 27 route finished
the 28 route finished
the 29 route finished
the 30 route finished
the 31 route finished
the 32 route finished
the 33 route finished
the 34 route finished
the 35 route finished
the 36 route finished
the 37 route finished
the 38 route finished
the 39 route finished
the 40 route finished
the 41 route finished
the 42 route finished
the 43 route finished
the 44 route f



the 348 route finished


100%|██████████| 349/349 [00:00<00:00, 58150.09it/s]

[Step(state=12205.0, action=0), Step(state=12206.0, action=2), Step(state=11849.0, action=0), Step(state=11850.0, action=1), Step(state=11849.0, action=0), Step(state=11850.0, action=2), Step(state=11493.0, action=0), Step(state=11494.0, action=2), Step(state=11137.0, action=0), Step(state=11138.0, action=2), Step(state=10781.0, action=4)]



  if __name__ == '__main__':


In [25]:
routeToTuple(middle_men,'./data/routes_states/0_1_states_tuple.npy')

the length of routes is 293
the 0 route finished
the 1 route finished
the 2 route finished
the 3 route finished
the 4 route finished
the 5 route finished
the 6 route finished
the 7 route finished
the 8 route finished
the 9 route finished
the 10 route finished
the 11 route finished
the 12 route finished
the 13 route finished
the 14 route finished
the 15 route finished
the 16 route finished
the 17 route finished
the 18 route finished
the 19 route finished
the 20 route finished
the 21 route finished
the 22 route finished
the 23 route finished
the 24 route finished
the 25 route finished
the 26 route finished
the 27 route finished
the 28 route finished
the 29 route finished
the 30 route finished
the 31 route finished
the 32 route finished
the 33 route finished
the 34 route finished
the 35 route finished
the 36 route finished
the 37 route finished
the 38 route finished
the 39 route finished
the 40 route finished
the 41 route finished
the 42 route finished
the 43 route finished
the 44 route f



the 292 route finished


100%|██████████| 293/293 [00:00<00:00, 58609.84it/s]

[Step(state=7931.0, action=2), Step(state=7574.0, action=2), Step(state=7217.0, action=2), Step(state=6860.0, action=1), Step(state=6859.0, action=1), Step(state=6858.0, action=2), Step(state=6501.0, action=1), Step(state=6500.0, action=1), Step(state=6499.0, action=1), Step(state=6498.0, action=3), Step(state=6855.0, action=1), Step(state=6854.0, action=1), Step(state=6853.0, action=3), Step(state=7210.0, action=2), Step(state=6853.0, action=1), Step(state=6852.0, action=4)]



  if __name__ == '__main__':


In [26]:
routeToTuple(old_men,'./data/routes_states/0_2_states_tuple.npy')

the length of routes is 100
the 0 route finished
the 1 route finished
the 2 route finished
the 3 route finished
the 4 route finished
the 5 route finished
the 6 route finished
the 7 route finished
the 8 route finished
the 9 route finished
the 10 route finished
the 11 route finished
the 12 route finished
the 13 route finished
the 14 route finished
the 15 route finished
the 16 route finished
the 17 route finished
the 18 route finished
the 19 route finished
the 20 route finished
the 21 route finished
the 22 route finished
the 23 route finished
the 24 route finished
the 25 route finished
the 26 route finished
the 27 route finished
the 28 route finished
the 29 route finished
the 30 route finished
the 31 route finished
the 32 route finished
the 33 route finished
the 34 route finished
the 35 route finished
the 36 route finished
the 37 route finished
the 38 route finished
the 39 route finished
the 40 route finished
the 41 route finished
the 42 route finished
the 43 route finished
the 44 route f



the 99 route finished


100%|██████████| 100/100 [00:00<00:00, 49967.88it/s]

[Step(state=12211.0, action=2), Step(state=11854.0, action=2), Step(state=11497.0, action=2), Step(state=11140.0, action=2), Step(state=10783.0, action=2), Step(state=10426.0, action=2), Step(state=10069.0, action=2), Step(state=9712.0, action=4)]



  if __name__ == '__main__':


In [27]:
routeToTuple(young_women,'./data/routes_states/1_0_states_tuple.npy')

the length of routes is 157
the 0 route finished
the 1 route finished
the 2 route finished
the 3 route finished
the 4 route finished
the 5 route finished
the 6 route finished
the 7 route finished
the 8 route finished
the 9 route finished
the 10 route finished
the 11 route finished
the 12 route finished
the 13 route finished
the 14 route finished
the 15 route finished
the 16 route finished
the 17 route finished
the 18 route finished
the 19 route finished
the 20 route finished
the 21 route finished
the 22 route finished
the 23 route finished
the 24 route finished
the 25 route finished
the 26 route finished
the 27 route finished
the 28 route finished
the 29 route finished
the 30 route finished
the 31 route finished
the 32 route finished
the 33 route finished
the 34 route finished
the 35 route finished
the 36 route finished
the 37 route finished
the 38 route finished
the 39 route finished
the 40 route finished
the 41 route finished
the 42 route finished
the 43 route finished
the 44 route f



the 156 route finished


100%|██████████| 157/157 [00:00<00:00, 78449.57it/s]

[Step(state=10428.0, action=1), Step(state=10427.0, action=1), Step(state=10426.0, action=2), Step(state=10069.0, action=1), Step(state=10068.0, action=2), Step(state=9711.0, action=1), Step(state=9710.0, action=1), Step(state=9709.0, action=1), Step(state=9708.0, action=0), Step(state=9709.0, action=2), Step(state=9352.0, action=1), Step(state=9351.0, action=4)]



  if __name__ == '__main__':


In [28]:
routeToTuple(middle_women,'./data/routes_states/1_1_states_tuple.npy')

the length of routes is 138
the 0 route finished
the 1 route finished
the 2 route finished
the 3 route finished
the 4 route finished
the 5 route finished
the 6 route finished
the 7 route finished
the 8 route finished
the 9 route finished
the 10 route finished
the 11 route finished
the 12 route finished
the 13 route finished
the 14 route finished
the 15 route finished
the 16 route finished
the 17 route finished
the 18 route finished
the 19 route finished
the 20 route finished
the 21 route finished
the 22 route finished
the 23 route finished
the 24 route finished
the 25 route finished
the 26 route finished
the 27 route finished
the 28 route finished
the 29 route finished
the 30 route finished
the 31 route finished
the 32 route finished
the 33 route finished
the 34 route finished
the 35 route finished
the 36 route finished
the 37 route finished
the 38 route finished
the 39 route finished
the 40 route finished
the 41 route finished
the 42 route finished
the 43 route finished
the 44 route f



the 137 route finished


100%|██████████| 138/138 [00:00<00:00, 68980.33it/s]

[Step(state=13283.0, action=2), Step(state=12926.0, action=2), Step(state=12569.0, action=1), Step(state=12568.0, action=1), Step(state=12567.0, action=1), Step(state=12566.0, action=1), Step(state=12565.0, action=1), Step(state=12564.0, action=3), Step(state=12921.0, action=1), Step(state=12920.0, action=2), Step(state=12563.0, action=1), Step(state=12562.0, action=1), Step(state=12561.0, action=1), Step(state=12560.0, action=2), Step(state=12203.0, action=2), Step(state=11846.0, action=1), Step(state=11845.0, action=2), Step(state=11488.0, action=1), Step(state=11487.0, action=4)]



  if __name__ == '__main__':


In [29]:
routeToTuple(old_women,'./data/routes_states/1_2_states_tuple.npy')

the length of routes is 100
the 0 route finished
the 1 route finished
the 2 route finished
the 3 route finished
the 4 route finished
the 5 route finished
the 6 route finished
the 7 route finished
the 8 route finished
the 9 route finished
the 10 route finished
the 11 route finished
the 12 route finished
the 13 route finished
the 14 route finished
the 15 route finished
the 16 route finished
the 17 route finished
the 18 route finished
the 19 route finished
the 20 route finished
the 21 route finished
the 22 route finished
the 23 route finished
the 24 route finished
the 25 route finished
the 26 route finished
the 27 route finished
the 28 route finished
the 29 route finished
the 30 route finished
the 31 route finished
the 32 route finished
the 33 route finished
the 34 route finished
the 35 route finished
the 36 route finished
the 37 route finished
the 38 route finished
the 39 route finished
the 40 route finished
the 41 route finished
the 42 route finished
the 43 route finished
the 44 route f



the 99 route finished


100%|██████████| 100/100 [00:00<00:00, 49991.70it/s]

[Step(state=13643.0, action=0), Step(state=13644.0, action=2), Step(state=13287.0, action=2), Step(state=12930.0, action=2), Step(state=12573.0, action=2), Step(state=12216.0, action=1), Step(state=12215.0, action=1), Step(state=12214.0, action=2), Step(state=11857.0, action=4)]



  if __name__ == '__main__':
