In [1]:
import numpy as np
import pandas as pd
import osmnx as ox
from Data import Data 

In [31]:
in_fname = 'sample.csv'

with open(in_fname, "r") as f:
    temp = f.readlines()
    
rows = temp[1:]

In [33]:
# header includes: id, type, traveled_d, avg_speed
H = 4 # header length

# data includes: lat, lon, speed, lon_acc, lat_acc, time
D = 6 # data length

idx_names = ['id', 'time']
col_names = ['lat', 'lon', 'speed', 'lon_acc', 'lat_acc']


graph = ox.graph_from_address('Athens, Municipality of Athens, Regional Unit of Central Athens, Attica, 10667, Greece')

def process(row_str):
    parts = row_str.strip().strip(";").split(";")
    header = parts[:H]
    data = np.array(parts[H:], dtype=np.float64)
    data = data.reshape(-1, D)
    
    nodes = ox.distance.get_nearest_nodes(graph, data[:, 0], data[:, 1], method=None)
    edges = ox.distance.get_nearest_edges(graph, data[:, 0], data[:, 1], method=None)

    # create MultiIndex from id and time
    timesteps = data[:,-1]
    id_arr = np.full(timesteps.shape, int(header[0].strip()))
    tups = list(zip(id_arr, timesteps))
    mul = pd.MultiIndex.from_tuples(tups, names=idx_names)

    data = data[:,:-1] # exclude time from data
    df = pd.DataFrame(data, columns=col_names, index=mul)
    df = df.assign(
        type=header[1].strip(),
        traveled_d=float(header[2]),
        avg_speed=float(header[3])
    )
    df['Nearest Node'] = nodes
    df['Nearest Edge First NodeID'] =  [edge[0] for edge in edges]
    df['Nearest Edge Second NodeID'] =  [edge[1] for edge in edges] 
    return df

process(rows[0])

#print(process(rows[0]))


[[ 3.7977482e+01  2.3735405e+01  3.4020600e+01 -1.6700000e-02
   7.4000000e-03  0.0000000e+00]
 [ 3.7977485e+01  2.3735402e+01  3.4019700e+01  4.0000000e-03
   8.6000000e-03  4.0000000e-02]
 [ 3.7977488e+01  2.3735400e+01  3.4021100e+01  1.6700000e-02
   9.7000000e-03  8.0000000e-02]
 ...
 [ 3.7979884e+01  2.3733393e+01  2.5743400e+01 -2.0500000e-02
  -2.4870000e-01  3.2520000e+01]
 [ 3.7979886e+01  2.3733391e+01  2.5741200e+01 -1.9300000e-02
  -2.3280000e-01  3.2560000e+01]
 [ 3.7979888e+01  2.3733389e+01  2.5739000e+01 -1.8600000e-02
  -2.1630000e-01  3.2600000e+01]]


Unnamed: 0_level_0,Unnamed: 1_level_0,lat,lon,speed,lon_acc,lat_acc,type,traveled_d,avg_speed,Nearest Node,Nearest Edge First NodeID,Nearest Edge Second NodeID
id,time,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
1,0.00,37.977482,23.735405,34.0206,-0.0167,0.0074,Taxi,320.26,35.366264,7813658489,7813658232,7813658489
1,0.04,37.977485,23.735402,34.0197,0.0040,0.0086,Taxi,320.26,35.366264,7813658489,7813658232,7813658489
1,0.08,37.977488,23.735400,34.0211,0.0167,0.0097,Taxi,320.26,35.366264,7813658489,7813658232,7813658489
1,0.12,37.977491,23.735398,34.0245,0.0294,0.0108,Taxi,320.26,35.366264,7813658489,7813658232,7813658489
1,0.16,37.977494,23.735395,34.0305,0.0540,0.0120,Taxi,320.26,35.366264,7813658489,7813658232,7813658489
1,...,...,...,...,...,...,...,...,...,...,...,...
1,32.44,37.979880,23.733397,25.7480,-0.0222,-0.2716,Taxi,320.26,35.366264,7813658489,7813658232,7813658489
1,32.48,37.979882,23.733395,25.7457,-0.0218,-0.2617,Taxi,320.26,35.366264,7813658489,7813658232,7813658489
1,32.52,37.979884,23.733393,25.7434,-0.0205,-0.2487,Taxi,320.26,35.366264,7813658489,7813658232,7813658489
1,32.56,37.979886,23.733391,25.7412,-0.0193,-0.2328,Taxi,320.26,35.366264,7813658489,7813658232,7813658489


In [24]:
multi_index_df = Data('sample_larger.csv').df

In [25]:
multi_index_df

Unnamed: 0_level_0,Unnamed: 1_level_0,lat,lon,speed,lon_acc,lat_acc,type,traveled_d,avg_speed
id,time,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
1,0.00,37.977482,23.735405,34.0206,-0.0167,0.0074,Taxi,320.26,35.366264
1,0.04,37.977485,23.735402,34.0197,0.0040,0.0086,Taxi,320.26,35.366264
1,0.08,37.977488,23.735400,34.0211,0.0167,0.0097,Taxi,320.26,35.366264
1,0.12,37.977491,23.735398,34.0245,0.0294,0.0108,Taxi,320.26,35.366264
1,0.16,37.977494,23.735395,34.0305,0.0540,0.0120,Taxi,320.26,35.366264
...,...,...,...,...,...,...,...,...,...
498,117.24,37.976108,23.734443,15.3720,-1.6492,-0.9860,Car,141.47,8.872855
498,117.28,37.976107,23.734441,15.1933,-1.5682,-0.9499,Car,141.47,8.872855
498,117.32,37.976107,23.734440,15.0228,-1.4310,-0.9036,Car,141.47,8.872855
498,117.36,37.976107,23.734438,14.8672,-1.2553,-0.8471,Car,141.47,8.872855
