In [1]:
import numpy as np
import pandas as pd
import json


In [4]:
outputdir = 'output/origin_data'

dataurl = 'input/origin_data.csv'
dataname = outputdir+'/origin_data'

# Read origin data
data = pd.read_csv(dataurl)

# Group data by timestamp
grouped = data.groupby(['timestamp'])

# Assign unique geo_ids to (grid_x, grid_y) pairs
geo_ids = {}
current_geo_id = 0
for _, group_data in grouped:
    for _, row in group_data.iterrows():
        grid_x, grid_y = row['grid_x'], row['grid_y']
        if (grid_x, grid_y) not in geo_ids:
            geo_ids[(grid_x, grid_y)] = current_geo_id
            current_geo_id += 1

# Create geo file
geo = []
for (grid_x, grid_y), geo_id in geo_ids.items():
    geo.append([geo_id, 'Point', '['+str(grid_x)+', '+str(grid_y)+']'])
geo = pd.DataFrame(geo, columns=['geo_id', 'type', 'coordinates'])
geo.to_csv(dataname+'.geo', index=False)

# Create rel file
rel = []
for _, group_data in grouped:
    origin_id = geo_ids[(group_data.iloc[0]['grid_x'], group_data.iloc[0]['grid_y'])]
    rel.append([origin_id, 'geo', origin_id, origin_id, 1])
rel = pd.DataFrame(rel, columns=['rel_id', 'type', 'origin_id', 'destination_id', 'cost'])
rel.to_csv(dataname+'.rel', index=False)

# Create dyna file
dyna = []
dyna_id = 0
dyna_file = open(dataname+'.dyna', 'w')
dyna_file.write('dyna_id' + ',' + 'type' + ',' + 'time' + ',' + 'entity_id' + ',' + 'traffic_speed' + '\n')
for _, group_data in grouped:
    origin_id = geo_ids[(group_data.iloc[0]['grid_x'], group_data.iloc[0]['grid_y'])]
    for _, row in group_data.iterrows():
        time = row['timestamp'][:-3] + 'Z'
        dyna_file.write(str(dyna_id) + ',' + 'state' + ',' + str(time)
                        + ',' + str(origin_id) + ',' + str(row['num']) + '\n')
        dyna_id += 1
dyna_file.close()

# Create config file
config = dict()
config['geo'] = dict()
config['geo']['including_types'] = ['Point']
config['geo']['Point'] = {}
config['rel'] = dict()
config['rel']['including_types'] = ['geo']
config['rel']['geo'] = {'cost': 'num'}
config['dyna'] = dict()
config['dyna']['including_types'] = ['state']
config['dyna']['state'] = {'entity_id': 'geo_id', 'traffic_speed': 'num'}
config['info'] = dict()
config['info']['data_col'] = 'traffic_speed'
config['info']['weight_col'] = 'cost'
config['info']['data_files'] = ['origin_data']
config['info']['geo_file'] = 'origin_data'
config['info']['rel_file'] = 'origin_data'
config['info']['output_dim'] = 1(cont.)

config['info']['time_intervals'] = 300
config['info']['init_weight_inf_or_zero'] = 'inf'
config['info']['set_weight_link_or_dist'] = 'dist'
config['info']['calculate_weight_adj'] = True
config['info']['weight_adj_epsilon'] = 0.1
json.dump(config, open(outputdir+'/config.json', 'w', encoding='utf-8'), ensure_ascii=False)

Unnamed: 0,vID,tripDay,timestamp,lon,lat
0,13331,20211001,2021-10-01 01:32:29,134.127721,34.645477
1,13331,20211001,2021-10-01 11:23:06,133.582159,34.535361
2,15654,20211001,2021-10-01 05:46:38,133.468763,34.514123
3,15654,20211001,2021-10-01 13:46:32,133.794252,34.693720
4,15654,20211001,2021-10-01 16:13:16,133.794362,34.693800
...,...,...,...,...,...
7398465,5257041,20211031,2021-10-31 17:40:06,132.481984,34.395539
7398466,5257868,20211031,2021-10-31 15:25:15,132.397730,34.415830
7398467,5272247,20211031,2021-10-31 12:40:18,132.325772,34.353216
7398468,5282988,20211031,2021-10-31 10:41:05,132.088970,34.416080
