In [None]:
import geopandas as gpd
import matplotlib.pyplot as plot
from shapely import geometry
from shapely.geometry import LineString, Point, box
import geopandas as gpd
import pandas as pd
import itertools
from collections import deque,namedtuple
import numpy as np
import concurrent.futures
from tqdm import tqdm

# prepare data

In [None]:
routes = gpd.read_file('./data/nanshan_traj.shp')
routes=routes.to_crs('EPSG:4326')
routes = routes[routes['age'] != 16]
district=gpd.read_file('./data/nanshan_grid.shp')

In [None]:
routes = routes.dropna(subset=['geometry'])

In [None]:
print(set(routes['age']))
print(set(routes['gender']))

In [None]:
young_men = routes[(routes['age'] >= 4) & (routes['age'] <= 7) & (routes['gender'] == 1)]
young_women = routes[(routes['age'] >= 4) & (routes['age'] <= 7) & (routes['gender'] == 2)]
middle_men =  routes[(routes['age'] >= 8) & (routes['age'] <= 13) & (routes['gender'] == 1)]
middle_women = routes[(routes['age'] >= 8) & (routes['age'] <= 13) & (routes['gender'] == 2)]
old_men = routes[(routes['age'] >= 14) & (routes['gender'] == 1)]
old_women = routes[(routes['age'] >= 14) & (routes['gender'] == 2)]

In [None]:
print(len(young_men),len(young_women),len(middle_men),len(middle_women),len(old_men),len(old_women))

# Function

In [None]:
def getFnidByPoint(first_point, second_point):
    fnid_list = []

    # Create LineString from first_point to second_point
    line = LineString([first_point, second_point])
    line_series = gpd.GeoSeries([line], crs=4326)
    line_utm = line_series.to_crs(32650)

    dist = line_utm.length.iloc[0]
    delta = int(dist // 50)

    x_diff = second_point[0] - first_point[0]
    y_diff = second_point[1] - first_point[1]
    x_values = np.linspace(first_point[0], second_point[0], delta + 1)
    y_values = np.linspace(first_point[1], second_point[1], delta + 1)
    interpolation_points = [Point(x, y) for x, y in zip(x_values, y_values)]

    # Perform overlay analysis for each point
    intersect = gpd.overlay(district, gpd.GeoDataFrame(geometry=interpolation_points, crs=4326), how='intersection', keep_geom_type=False)
    fnid_list.extend(intersect['fnid'])

    return fnid_list

In [None]:
def routeToFnid(routes):
    def process_route(route):
        geometry_ = route
        q = deque(geometry_.coords)
        first_point = q.popleft()
        states = set()  # 使用集合来存储状态，以去重
        while q:
            second_point = q.popleft()
            fnid_list = getFnidByPoint(first_point, second_point)
            if fnid_list:
                states.update(fnid_list)
            first_point = second_point
        return list(states)

    print('the length of routes is {}'.format(len(routes)))
    routes_states = []
    with concurrent.futures.ThreadPoolExecutor() as executor:
        results = list(tqdm(executor.map(process_route, routes.geometry), total=len(routes)))
        for i, states_unique in enumerate(results):
            routes_states.append(states_unique)

    routes_states = np.array(routes_states)
    return routes_states

In [None]:
actions = [0, 1, 2, 3, 4]
dirs = {0: 'r', 1: 'l', 2: 'd', 3: 'u', 4: 's'}
Step=namedtuple('Step',['state','action'])

def getActionOfStates(route_state):
    state_action = []
    length = len(route_state)

    if length == 1:
        step = Step(state=route_state[0], action=4)
        state_action.append(step)
        return state_action

    diff = np.diff(route_state)  # 计算相邻状态之间的差值

    def getAction(diff_value):
        if diff_value == 1:
            return 0
        elif diff_value == -1:
            return 1
        elif diff_value == 357:
            return 3
        elif diff_value == -357:
            return 2
        else:
            return 4  # 默认停留

    actions = np.vectorize(getAction)(diff)  # 使用矢量化操作获取所有动作

    for i, action in enumerate(actions):
        step = Step(state=route_state[i], action=action)
        state_action.append(step)

    step = Step(state=route_state[-1], action=4)
    state_action.append(step)

    return state_action


In [None]:
import random

# If the amount of data is too large, downsampling can be performed
def randomSelectLines(geodataframe, percent_to_select=0.7):
    total_lines = len(geodataframe)
    if total_lines<1000:
        num_lines_to_select = int(total_lines * 1)
        random_line_indices = random.sample(range(total_lines), num_lines_to_select)
        randomly_selected_lines = geodataframe.iloc[random_line_indices]
        return randomly_selected_lines

    num_lines_to_select = int(total_lines * (percent_to_select))
    random_line_indices = random.sample(range(total_lines), num_lines_to_select)
    randomly_selected_lines = geodataframe.iloc[random_line_indices]
    return randomly_selected_lines

In [None]:
def routeToTuple(routes,save_file):
    routes=randomSelectLines(routes)
    routes_states=routeToFnid(routes)
    state_action_tuple=[]
    for route_state in tqdm(routes_states):
        sta_act=getActionOfStates(route_state)
        state_action_tuple.append(sta_act)
    print(state_action_tuple[0])
    state_action_tuple=np.array(state_action_tuple)
    np.save(save_file,state_action_tuple) 

# main function

In [None]:
# 第一个0男，1女；第二个0青，1中，2老
routeToTuple(young_men,'./data/routes_states/0_0_states_tuple.npy')

In [None]:
routeToTuple(middle_men,'./data/routes_states/0_1_states_tuple.npy')

In [None]:
routeToTuple(old_men,'./data/routes_states/0_2_states_tuple.npy')

In [None]:
routeToTuple(young_women,'./data/routes_states/1_0_states_tuple.npy')

In [None]:
routeToTuple(middle_women,'./data/routes_states/1_1_states_tuple.npy')

In [None]:
routeToTuple(old_women,'./data/routes_states/1_2_states_tuple.npy')

In [None]:
import numpy as np 
np.load('./data/routes_states/1_2_states_tuple.npy',allow_pickle=True)