In [2]:
from skmob.models.epr import DensityEPR, SpatialEPR
import skmob
import geopandas as gpd
import pandas as pd
import numpy as np
from tqdm import tqdm_notebook as tqdm

In [3]:
# load a spatial tesellation on which to perform the simulation
url = skmob.utils.constants.NY_COUNTIES_2011
tessellation = gpd.read_file(url)
# starting and end times of the simulation
start_time = pd.to_datetime('2020/08/13 00:00:00')
true_start_time = pd.to_datetime('2020/08/20 00:00:00')
end_time = pd.to_datetime('2020/09/03 00:00:00')

## DensityEPR

In [6]:
## override if necessary
class ConstantTimeDensityEPR(DensityEPR):
    def __init__(self):
        super().__init__()
        
        
def apply_minute(tdf, minutes=10):
    unix_minutes = minutes * 60
    return tdf['time'].apply(lambda x: x - (x % unix_minutes))

def gauusian_bridge(tdf, uid, random_state, minutes):
    unix_minutes = minutes * 60
    tmp_dict = {"uid": [], "time": [], "lat": [], "lng": []}
    table_size = len(tdf)
    for i in range(-1, table_size):
        if (i == table_size - 1):
            time_diff = (int(end_time.timestamp()) - tdf['time'].iloc[i])
            curr_time = tdf['time'].iloc[i]
            i -= 10
        elif (i == -1):
            time_diff = tdf['time'].iloc[0] - int(true_start_time.timestamp())
            curr_time = int(true_start_time.timestamp())
            i += 10
        else:
            time_diff = (tdf['time'].iloc[i+1] - tdf['time'].iloc[i])
            curr_time = tdf['time'].iloc[i]
        
        time_times = time_diff // unix_minutes
        lat_diff = (tdf['lat'].iloc[i+1] - tdf['lat'].iloc[i]) / time_times
        lng_diff = (tdf['lng'].iloc[i+1] - tdf['lng'].iloc[i]) / time_times
        
        curr_lat = tdf['lat'].iloc[i]
        curr_lng = tdf['lng'].iloc[i]
        
        random_walk = random_state.random(time_times)
        lat_noise = random_state.normal(lat_diff, np.abs(lat_diff/10), time_times+1)
        curr_lat += lat_noise[-1]
        lng_noise = random_state.normal(lng_diff, np.abs(lng_diff/10), time_times+1)
        curr_lng += lng_noise[-1]
        
        for j in range(time_times):
            tmp_dict["uid"].append(uid)
            tmp_dict["time"].append(curr_time)
            tmp_dict["lat"].append(curr_lat)
            tmp_dict["lng"].append(curr_lng)
            
            curr_time += unix_minutes
            if random_walk[j] < 0.5:
                curr_lat += lat_noise[j]
                curr_lng += lng_noise[j]

    df = pd.DataFrame.from_dict(tmp_dict)
    return df

def for_all_user(tdf, random_state, minutes=10):
    df_list = []
    for uid in tdf.groupby('uid').groups.keys():
        df_list.append(gauusian_bridge(tdf[tdf['uid'] == uid], uid, random_state, minutes))
    return pd.concat(df_list, axis=0)

## Server data

In [7]:
agents = 100
seed = 1
minutes = 10

state = np.random.RandomState(seed)
start_locations = list(state.choice(list(range(0,62)), agents, True))
depr = ConstantTimeDensityEPR()

# start the simulation
tdf = depr.generate(start_time, end_time, tessellation, relevance_column='population', n_agents=agents, random_state=seed, show_progress=True, starting_locations=start_locations)

tdf = tdf[tdf['datetime'] >= true_start_time]
tdf['time'] = tdf['datetime'].apply(lambda x: int(x.timestamp()))
tdf['time'] = apply_minute(tdf, minutes=minutes)
tdf = for_all_user(tdf, state, minutes)
tdf[['time', 'lat', 'lng']].to_csv(f'NY-DensityEPR-{minutes}-{seed}-{agents}.csv', index=False)
tdf

  return np.power(x, exponent)
100%|██████████| 100/100 [00:07<00:00, 13.83it/s]
  lat_diff = (tdf['lat'].iloc[i+1] - tdf['lat'].iloc[i]) / time_times
  lng_diff = (tdf['lng'].iloc[i+1] - tdf['lng'].iloc[i]) / time_times
  lat_diff = (tdf['lat'].iloc[i+1] - tdf['lat'].iloc[i]) / time_times
  lng_diff = (tdf['lng'].iloc[i+1] - tdf['lng'].iloc[i]) / time_times
  lat_diff = (tdf['lat'].iloc[i+1] - tdf['lat'].iloc[i]) / time_times
  lng_diff = (tdf['lng'].iloc[i+1] - tdf['lng'].iloc[i]) / time_times
  lat_diff = (tdf['lat'].iloc[i+1] - tdf['lat'].iloc[i]) / time_times
  lng_diff = (tdf['lng'].iloc[i+1] - tdf['lng'].iloc[i]) / time_times
  lat_diff = (tdf['lat'].iloc[i+1] - tdf['lat'].iloc[i]) / time_times
  lng_diff = (tdf['lng'].iloc[i+1] - tdf['lng'].iloc[i]) / time_times
  lat_diff = (tdf['lat'].iloc[i+1] - tdf['lat'].iloc[i]) / time_times
  lng_diff = (tdf['lng'].iloc[i+1] - tdf['lng'].iloc[i]) / time_times


Unnamed: 0,uid,time,lat,lng
0,1,1597881600,40.813568,-74.037325
1,1,1597882200,40.862907,-74.109727
2,1,1597882800,40.897742,-74.192340
3,1,1597883400,40.932384,-74.262690
4,1,1597884000,40.970831,-74.329183
...,...,...,...,...
2011,100,1599088200,40.732907,-74.005670
2012,100,1599088800,40.732907,-74.005670
2013,100,1599089400,41.772838,-75.035869
2014,100,1599090000,41.772838,-75.035869


## Client data

In [None]:
client_size = 1000
agents = 1
seed = 1
minutes = 10

state = np.random.RandomState(seed)

for i in tqdm(range(client_size)):
    start_locations = list(state.choice(list(range(0,62)), agents, True))
    depr = ConstantTimeDensityEPR()
    # start the simulation
    tdf = depr.generate(start_time, end_time, tessellation, relevance_column='population', n_agents=agents, random_state=seed, show_progress=True, starting_locations=start_locations)

    tdf = tdf[tdf['datetime'] >= true_start_time]
    tdf['time'] = tdf['datetime'].apply(lambda x: int(x.timestamp()))
    tdf['time'] = apply_minute(tdf, minutes=minutes)
    tdf = for_all_user(tdf, state, minutes)
    tdf[['time', 'lat', 'lng']].to_csv(f'client/client-{i}-NY-DensityEPR-{minutes}-{seed}-{agents}.csv', index=False)
tdf