In [1]:
import numpy as np

In [2]:
def generate_node(num_node, node_length = 1):
    # assume map is grid and distance between adjacent node is 1
    num_side = int(np.sqrt(num_node))
    nodes = np.zeros((num_node, 2))
    for i in range(num_side):
        for j in range(num_side):
            index = i + j * num_side
            nodes[index, 0] = i * node_length + 1
            nodes[index, 1] = j * node_length + 1
    return nodes

In [3]:
def generate_stay(num_stay, num_node):
    # generate stay location not to be adjacent to each other
    adjacent = adjacent_node(num_node)
    result = np.zeros((num_stay, 1))
    choice_set = [x for x in range(num_node)]
    for i in range(num_stay):
        if i == 0:
            result[i, 0] = np.random.choice(choice_set, 1)
        else:
            choice_set = list(set(choice_set) - set(adjacent[int(result[i - 1, 0])]))
            result[i, 0] = np.random.choice(choice_set, 1)
    return result
            

In [4]:
def adjacent_node(num_node):
    # find adjacent node which can be approached within 1 time step ( assume everyone velocity is constant)
    nodes = generate_node(num_node)
    threshold = np.sqrt(3)
    result = [];
    for i in range(num_node):
        distance = np.zeros((num_node, 1))
        for j in range(num_node):
            distance[j, 0] = np.sqrt((nodes[i, 0] - nodes[j, 0])**2 + (nodes[i, 1] - nodes[j, 1])**2)
        index = np.where(distance <= threshold)[0]
        result.append(index)
    return result

In [5]:
def generate_route(origin, destination, num_node):
    # given origin and destination generate route according to the distance to destination
    origin = int(origin)
    destination = int(destination)
    nodes = generate_node(num_node)
    adjacent = adjacent_node(num_node)
    route = []
    current = origin
    route.append(current)
    while current != destination:
        if len(route) == 1:
            possible_set = list(adjacent[current])
        else:
            possible_set = list(adjacent[current])
            possible_set = list(set(possible_set) - set(route[-2:]))
        
        probability = []
        for i in range(len(possible_set)):
            d = np.sqrt((nodes[possible_set[i], 0] - nodes[destination, 0])**2 + (nodes[possible_set[i], 1] - nodes[destination, 1])**2)
            probability.append(np.exp(- d **2))
        probability = probability/sum(probability)
        next_step = int(np.random.choice(possible_set, 1, p = probability))
        route.append(next_step)
        current = next_step
    return route
            

In [6]:
def generate_trajectory(time_length, num_node, num_stay):
    #generate the trajectory from the given number of stay
    nodes = generate_node(num_node)
    adjacent = adjacent_node(num_node)
    stays = generate_stay(num_stay, num_node)
    
    trajectory = np.zeros(time_length)
    total_route = []
    route_time = 0
    for i in range(num_stay):
        if i == num_stay - 1:
            route = generate_route(stays[i, 0], stays[0, 0], num_node)
        else:
            route = generate_route(stays[i, 0], stays[i + 1, 0], num_node)
        route_time = route_time + len(route)
        total_route.append(route)
    left_time = time_length - route_time
    
    stay_time = np.zeros(num_stay + 1)
    stay_time[0] = int(np.floor(left_time * 0.4))
    stay_time[1] = int(np.floor(left_time * 0.4))
    left_time = left_time - stay_time[0] - stay_time[1]
    probability = np.random.uniform(low = num_stay/(left_time - 1), high = 1, size = num_stay - 1)
    probability = probability / np.sum(probability)
    
    for i in range(2, num_stay + 1):
        stay_time[i] = int(np.floor(left_time * probability[i - 2]))
    remain_time = left_time - np.sum(stay_time[2:])
    stay_time[0] = stay_time[0] + remain_time
    
    index = 0
    for i in range(num_stay):
        trajectory[index : int(stay_time[i]) + index] = stays[i, 0] * np.ones(int(stay_time[i]))
        index = index + int(stay_time[i])
        route = total_route[i]
        trajectory[index : index + len(route)] = route
        index = index + len(route)
    trajectory[index : int(stay_time[i + 1]) + index] = stays[0, 0] * np.ones(int(stay_time[i + 1]) )
    return trajectory, stays, stay_time

In [7]:
def generate_observation_probability(num_node, population):
    # generate observation probability (relationship between observed data and actual data)
    # row is the actual state, column is the observed data
    nodes = generate_node(num_node)
    result = np.zeros((num_node, num_node))
    coeff_min = 1
    coeff_max = 5
    max_population = np.max(population)
    min_population = np.min(population)
    population = (population - min_population) / (max_population - min_population)
    
    for i in range(num_node):
        for j in range(num_node):
            distance = np.sqrt((nodes[i, 0] - nodes[j, 0])**2 + (nodes[i, 1] - nodes[j, 1])**2)
            coeff = - (coeff_max - coeff_min) * population[i] + coeff_max
            result[i, j] = np.exp(- coeff * distance ** 2)
        result[i, :] = result[i, :]/ np.sum(result[i, :])
    return result

In [8]:
def generate_observed(num_node, trajectory, population):
    #generate observed data
    choice_set = [x for x in range(num_node)]
    
    result = np.zeros(len(trajectory))
    for i in range(len(trajectory)):
        probability = generate_observation_probability(num_node, population[:, i])
        choice_prob = probability[int(trajectory[i]), :]
        result[i] =  np.random.choice(choice_set, 1, p = choice_prob)
    return result

In [9]:
def generate_population(num_node, time_length):
    #generate population data for observation probability
    result = np.random.randint(1, 100, (num_node, time_length))
    
    return result

In [10]:
num_node = 64
num_stay = 5
time_length = 100
num_people = 50

In [11]:
population = generate_population(num_node, time_length)
stay_times = []
for i in range(num_people):
    trajectory, stay, stay_time = generate_trajectory(time_length, num_node, num_stay)
    observation = generate_observed(num_node, trajectory, population)
    stay_times.append(stay_time)
    if i == 0:
        trajectories = trajectory
        observations = observation
        stays = stay
    else:
        trajectories = np.vstack((trajectories, trajectory))
        observations = np.vstack((observations, observation))
        stays = np.hstack((stays, stay))

In [17]:
num_people = 500
for i in range(num_people):
    trajectory, stay, stay_time = generate_trajectory(time_length, num_node, num_stay)
    observation = generate_observed(num_node, trajectory, population)
    if i == 0:
        trajectories = trajectory
        observations = observation
    else:
        trajectories = np.vstack((trajectories, trajectory))
        observations = np.vstack((observations, observation))

In [12]:
def extract_od(stay, num_node):
    #row : origin, column :destination
    stay = list(stay)
    num_row = len(stay)
    result = np.zeros((num_node, num_node))
    for i in range(num_row):
        person = stay[i]
        person = person.astype(int)
        for j in range(len(person) - 1):
            if person[j] != person[j + 1]:
                result[person[j], person[j + 1]] = result[person[j], person[j + 1]] + 1
        result[person[j + 1], person[0]] = result[person[j + 1], person[0]] + 1
    
    return result

In [13]:
def organize_stay_time(stay, stay_time, num_node):
    stay = list(stay)
    result = []
    num_row = len(stay)
    for i in range(num_node):
        result.append([])
    for i in range(num_row):
        person_stay = stay[i].astype(int)
        person_stay_time = stay_time[i].astype(int)
        for j in range(len(person_stay)):
            result[person_stay[j]].append(person_stay_time[j])
        result[person_stay[0]].append(person_stay_time[j + 1])
    return result
        

In [14]:
time_stay = organize_stay_time(np.transpose(stays), stay_times, num_node)

In [15]:
stay_node = extract_od(np.transpose(stays), num_node)

In [18]:
np.save('trajectories_train_complex.npy', trajectories)
np.save('observations_train_complex.npy', observations)

In [16]:
np.save('trajectories_test_complex.npy', trajectories)
np.save('observations_test_complex.npy', observations)
np.save('od_test_complex.npy', stay_node)
np.save('stay_time_complex.npy', stay_times)
np.save('stay_region_complex.npy', np.transpose(stays))

multi_data

In [19]:
def generate_multi_observed(num_node, trajectory, population, percentage):
    #generate oscillation
    choice_set = [x for x in range(num_node)]
    
    result = []
    for i in range(len(trajectory)):
        probability = generate_observation_probability(num_node, population[:, i])
        choice_prob = probability[int(trajectory[i]), :]
        
        boolean = np.random.rand(1)
        if boolean < percentage:
            temp_num = np.random.randint(2, 6)
            result.append(np.random.choice(choice_set, temp_num, p = choice_prob))
        else:
            result.append(np.random.choice(choice_set, 1, p = choice_prob))
        
    return result

In [20]:
# case for 10% oscillation
num_people = 50
percentage = 0.1
observations = []
stay_times = []
for i in range(num_people):
    trajectory, stay, stay_time = generate_trajectory(time_length, num_node, num_stay)
    observation = generate_multi_observed(num_node, trajectory, population, percentage)
    observations.append(observation)
    stay_times.append(stay_time)
    if i == 0:
        trajectories = trajectory
        stays = stay
    else:
        trajectories = np.vstack((trajectories, trajectory))
        stays = np.hstack((stays, stay))

In [21]:
time_stay = organize_stay_time(np.transpose(stays), stay_times, num_node)
stay_node = extract_od(np.transpose(stays), num_node)
np.save('trajectories_test_multi1.npy', trajectories)
np.save('observations_test_multi1.npy', observations)
np.save('od_test_multi1.npy', stay_node)
np.save('stay_time_multi1.npy', stay_times)
np.save('stay_region_multi1.npy', np.transpose(stays))

In [22]:
#case for 30% oscillation
num_people = 50
percentage = 0.3
observations = []
stay_times = []
for i in range(num_people):
    trajectory, stay, stay_time = generate_trajectory(time_length, num_node, num_stay)
    observation = generate_multi_observed(num_node, trajectory, population, percentage)
    observations.append(observation)
    stay_times.append(stay_time)
    if i == 0:
        trajectories = trajectory
        stays = stay
    else:
        trajectories = np.vstack((trajectories, trajectory))
        stays = np.hstack((stays, stay))

In [23]:
time_stay = organize_stay_time(np.transpose(stays), stay_times, num_node)
stay_node = extract_od(np.transpose(stays), num_node)
np.save('trajectories_test_multi2.npy', trajectories)
np.save('observations_test_multi2.npy', observations)
np.save('od_test_multi2.npy', stay_node)
np.save('stay_time_multi2.npy', stay_times)
np.save('stay_region_multi2.npy', np.transpose(stays))

In [24]:
#case for 50% oscillation
num_people = 50
percentage = 0.5
observations = []
stay_times = []
for i in range(num_people):
    trajectory, stay, stay_time = generate_trajectory(time_length, num_node, num_stay)
    observation = generate_multi_observed(num_node, trajectory, population, percentage)
    observations.append(observation)
    stay_times.append(stay_time)
    if i == 0:
        trajectories = trajectory
        stays = stay
    else:
        trajectories = np.vstack((trajectories, trajectory))
        stays = np.hstack((stays, stay))

In [25]:
time_stay = organize_stay_time(np.transpose(stays), stay_times, num_node)
stay_node = extract_od(np.transpose(stays), num_node)
np.save('trajectories_test_multi3.npy', trajectories)
np.save('observations_test_multi3.npy', observations)
np.save('od_test_multi3.npy', stay_node)
np.save('stay_time_multi3.npy', stay_times)
np.save('stay_region_multi3.npy', np.transpose(stays))

In [26]:
np.save('population.npy', population)