# Vehicle loader

In [108]:
import pandas as pd
import numpy as np
import networkx as nx
import matplotlib.pyplot as plt
import xml.etree.ElementTree as ET
from more_itertools import chunked
import geopandas as gpd
from shapely.geometry import MultiPoint, Point
import json
import base64
import numpy as np

pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

In [109]:
import network as net
n = net.network()
n.set_path('../pop10k-eh4-qsim2-100it/output_network.xml.gz')

n.load_nodes()
n.load_links()
n.join_network()
n.status()
network = n.return_network()

Status:
path: ../pop10k-eh4-qsim2-100it/output_network.xml.gz
links: 72485
nodes: 36047
network: 72485


In [110]:
def return_coords(coords_x, coords_y, i_x, i_y):
    new_coords = [ [-float(x), - float(y)]
                            if not np.isnan(x)
                            else [-float(a), -float(b)] 
                            for x,y,a,b in zip(coords_x, coords_y, i_x, i_y)]
    return new_coords

In [111]:
vehicle_type = "subway"
## load events
vehicles = pd.read_json("./output/events/"+vehicle_type+".json")
print("#:", vehicles.shape[0])
display(vehicles.head(2))
vehicles.sort_values("id", kind="stable", inplace=True)
vehicles.set_index("id", inplace=True)

#: 1750


Unnamed: 0,id,events
0,veh_26306_subway,"[{'event_id': 58386, 'time': 38110.0, 'type': ..."
1,veh_26307_subway,"[{'event_id': 27926, 'time': 46510.0, 'type': ..."


In [112]:
def get_locations(locations, times, show_trip = False): 
    new_locations = []
    last_loc = [-1,-1]
    last_time = -1

    for l,t in zip(locations, times):
        is_close = np.isclose(l, last_loc, rtol=1e-05, atol=1e-08, equal_nan=False)
        #print(is_close)
        if(last_time == -1):
            new_locations.append(l)
        elif (np.isnan(l[0])):
            #print("is nan")
            continue
        elif last_time != -1 and not (is_close[0] and is_close[1]) and  not (t == last_time): # time changed and place changed
            t_diff = int(t - last_time)
            x_diff = l[0] - last_loc[0] #- l[0]
            y_diff = l[1] - last_loc[1] #- l[1]
            i_x = (x_diff/np.float64(t_diff))
            i_y = (y_diff/np.float64(t_diff))
            # for each second of difference
            new_locations.extend([ [last_loc[0] + t*i_x, last_loc[1]+ t*i_y] for t in range(1,t_diff)])

        elif (is_close[0] and is_close[1]) and  not (t == last_time): # time changed but not place
            new_locations.extend([ [l[0],l[1]] for t in range(int(t - last_time))  ])

        last_loc = l
        last_time = t

    if(show_trip):
        G = nx.DiGraph() 
        last_node = -1
        for loc in new_locations:
            G.add_node(str(loc), x=float(loc[0]), y=float(loc[1]))
            if last_node != -1:
                G.add_edge(str(last_node),str(loc))
            last_node = loc
    
        positions = {}
        for idx,node in G.nodes(data=True):
            positions[idx] = [node['x'],node['y']]
    
        nx.draw_networkx_nodes(G, positions, node_color='r', alpha = 0.1, node_size = 10)
        fig_size=[5,5]
        plt.rcParams["figure.figsize"] = fig_size
        plt.axis('equal')
        plt.show()
    return new_locations


In [114]:
def extract_agent(vehicle, verbal=False):
    #display(vehicle.type.unique())
    agent = {}
    trips =[]

    in_trip = False
    in_station = False
    passengers = set()

    times = []
    locations = []
    trip = {}
    last_valid_loc = [-1,-1]

    if("coords_to" in vehicle.columns):
        vehicle = vehicle.drop(columns=["coords_from", "coords_to"])

    vehicle["coords_from"] = return_coords(vehicle.coords_x,vehicle.coords_y, vehicle.x_from, vehicle.y_from)                 
    vehicle["coords_to"] = return_coords(vehicle.coords_x, vehicle.coords_y, vehicle.x_to, vehicle.y_to)

    if verbal:
        G = nx.DiGraph()

    ##sort vehicle by time and type
    vehicle['type'] = pd.Categorical(vehicle['type'], 
        ["VehicleArrivesAtFacility", "PersonLeavesVehicle","PersonEntersVehicle","VehicleDepartsAtFacility"])
    
    vehicle = vehicle.sort_values(["time","type"],kind="stable")

    for e, row in vehicle.iterrows():
        A = row.coords_from
        B = row.coords_to
        time = row.time

        #end trip
        if not in_station and (row.type == "VehicleArrivesAtFacility"):
            #print("arrives")
            in_station = True
            if(verbal):
                if(vehicle.iloc[np.where(vehicle.facility == row.facility)].shape[0] < 2):
                    print("vehicle does not depart facility:", row.faclity)
                lvl = last_valid_loc
                if(lvl[0] != -1):
                    G.add_node(str(lvl), x = float(lvl[0]), y = float(lvl[1]),label=row.link, act_type=150, act_color='y')
            #save trip
            if in_trip and (len(trip) > -1):
                locations.append(B)
                times.append(int(time))
                trip['destination'] = str(row.facility)
                trip['locations'] = locations
                trip['times'] = times
                if(np.isnan(B[0])):
                    trip['to'] = last_valid_loc
                else:
                    trip['to'] = B
                trips.append(trip)
                trip = {}

            locations = []
            times = []
            in_trip = False
            in_station = True

        if (row.type == "PersonEntersVehicle"):
             #check if passenger leaves vehicle
            if(vehicle.iloc[np.where(vehicle.person_id == row.person_id)].shape[0] > 1) and row.person_id.isnumeric():
                passengers.add(int(row.person_id))
            elif row.person_id.isnumeric():
                print("Passenger:", row.person_id,"does not leave vehicle.")

        if (row.type == "PersonLeavesVehicle"):
            if(row.person_id.isnumeric()):
                passengers.remove(int(row.person_id))

        #start trip
        if in_station and (row.type == "VehicleDepartsAtFacility"):
            #print("\tdeparts")
            in_station = False
            if(verbal):
                lvl = last_valid_loc
                if(lvl[0] != -1):
                    G.add_node(str(lvl), x = float(lvl[0]), y = float(lvl[1]), label=row.link, act_type=150, act_color='k')
            #print("trip begins")
            trip = {}
            if(np.isnan(A[0])):
                trip['from'] = last_valid_loc
            else:
                trip['from'] = A
            trip['start'] = int(time)
            locations = []
            times = []
            trip["from"] = str(row.facility)
            trip["passengers"] = list(passengers)
            in_trip = True

        if in_trip and not np.isnan(A[0]):
            times.append(int(time))
            locations.append(A)

        if(in_trip and not in_station and ((row.type == "PersonEntersVehicle") or (row.type == "PersonLeavesVehicle")) and row.person_id.isnumeric()):
            print("Vehicle not in station and there are changes in passenger list",
            row.type, row.person_id, row.time, trip["start"], row.person_id in passengers, passengers)
        
        if(not np.isnan(B[0])):
            last_valid_loc = B

    if(verbal):
        for trip in trips:
            trip['locations'] = get_locations(trip['locations'], trip['times'], False)
            if(len(trip["locations"]) > 2):
                first = trip["locations"][0]
                last = trip["locations"][-1]
                if( first[0] != np.nan):
                    G.add_node(str(first), x=float(first[0]), y=float(first[1]), label=row.link, act_type=80, act_color='blue')

                if( last[0] != np.nan):
                    G.add_node(str(last), x=float(last[0]), y=float(last[1]), label=row.link, act_type=50, act_color='red')

                if(last[0] != np.nan and last[1] != np.nan):
                    G.add_edge(str(first),str(last), mode='k')

                for loc in trip["locations"][1:-1]:
                    if( loc[0] != np.nan):
                        G.add_node(str(loc), x=float(loc[0]), y=float(loc[1]), label="", act_type=10, act_color='g')

        positions = {}
        labels = {}
        for idx,node in G.nodes(data=True):
            positions[idx] = [node['x'],node['y']]
            labels[idx] = node['label']

        nx.draw_networkx_nodes(G, positions, node_color=[u['act_color'] for i,u in G.nodes(data=True)], alpha = 0.5, node_size = [u['act_type'] for i,u in G.nodes(data=True)])
        nx.draw_networkx_labels(G,positions,labels)
        nx.draw_networkx_edges(G, positions, edge_color=[G[u][v]['mode'] for u,v in G.edges()],alpha=0.3, arrows = True)
        fig_size=[15,15]
        plt.rcParams["figure.figsize"] = fig_size
        plt.axis('equal')
        plt.show()
    
    else:
        for trip in trips:
            trip['locations'] = get_locations(trip['locations'], trip['times'], False)

    ## add passengers
    agent["trips"] = trips
    return agent


In [115]:
def prep_agent(row,id, verbal=False):
    #prep agent
    v = pd.DataFrame.from_dict(row["events"])
    #remove left links
    drop_idx = v[
        (v['type'] == "vehicle leaves traffic") | 
        (v['type'] == "vehicle enters traffic") |
        (v['type'] == "left link")
        ].index
    v.drop(drop_idx, inplace=True)
    
    #join links and coordinates
    v = v.join(network.set_index("link"), on='link').fillna(value=np.nan)

    if("coords_to" in v.columns):
        v = v.drop(columns=["coords_from", "coords_to"])
                 
    v["coords_to"] = return_coords(v.coords_x, v.coords_y, v.x_to, v.y_to)
    v['coords_from'] = return_coords(v.coords_x,v.coords_y, v.x_from, v.y_from) 
    v.drop(["from","to","length","event_id","permlanes",'capacity','link_modes','transitLine',
                    'transitRoute','atStop','destinationStop','departure','networkMode','legMode','relativePosition'], axis=1, inplace=True)
    agent = extract_agent(v, verbal)
    agent['id'] = str(id)
    return agent

In [116]:
#TEST
limit = 1
k = 0
for i, row in  vehicles.iterrows():
    print(i)
    agent = prep_agent(row,i,False)
    print("# trips:",len(agent['trips']))
    k+=1
    if(k > limit):
        break

veh_26306_subway
# trips: 16
veh_26307_subway
# trips: 16


In [117]:
def save_output(df, ids, path, verbal = False):
    agents = []
    
    for id in ids:
        #try:
        agent = prep_agent(df.loc[id],id) #extract_agent(pd.DataFrame(df.loc[id].events),verbal)
        if(len(agent)>0):
            out = agent #prepare_agent(agent, verbal)
            agents.append(out)

    with open(path, 'w') as f:
        json.dump(agents, f)
        f.close()

    print("Agents saved to:",path)
    return

In [118]:
# export
def npfloat32_to_buffer(data):
    return base64.b64encode(data.astype(np.float32)).decode('utf-8')


def npint32_to_buffer(data):
    return base64.b64encode(data.astype(np.int32)).decode('utf-8')


# import
def base64_to_type(b64data, type):
    bdata = base64.b64decode(b64data)
    data = np.frombuffer(bdata, dtype=type)
    return data


def base64_to_float32(b64data):
    return base64_to_type(b64data, np.float32)


def base64_to_int32(b64data):
    return base64_to_type(b64data, np.int32)

In [119]:
def prepare_geotrips(agent):
    start_times = []
    passengers = []
    geometries = []
    times = []

    #print("prepping geotrips")
    for trip in agent["trips"]:
        start_times.append(trip['start'])
        passengers.append(npint32_to_buffer(np.array(trip['passengers'])))
        times.append(npint32_to_buffer(np.array(trip['times'])))
        
        geometries.append(MultiPoint([(a[0],a[1]) for a in trip['locations']]))
    
    agent_geotrips = gpd.GeoDataFrame(data={
        'start': start_times,
        'passengers':passengers,
        'times':times,
        'geometry': geometries,
        'vehicle_id': agent['id']})

    return agent_geotrips

In [120]:
def save_output_shp(df, ids, path, verbal = False):
    gdf_trips = gpd.GeoDataFrame(columns=['start','passengers','times','geometry', 'vehicle_id'])
    for id in ids:
        #try:
        agent = prep_agent(df.loc[id],id)
        if(len(agent)>0):
            gdf_trips = gdf_trips.append(prepare_geotrips(agent))

    #reset trip_index
    gdf_trips = gdf_trips.reset_index()
    #save GeoDataFrame as .SHP
    gdf_trips.to_file(filename=path)

    print("Agents saved to:",path)
    return

In [121]:

CHUNK_SIZE = 500
chunk_i = 0

vehicle_types = ["subway","funicular","tram","bus"]

for vehicle_type in vehicle_types[:1]:
    ## load processed events json
    vehicles = pd.read_json("./output/events/"+vehicle_type+".json")
    print(vehicle_type,"#:", vehicles.shape[0])

    vehicles.sort_values("id", kind="stable", inplace=True)
    vehicles.set_index("id", inplace=True)

    index_chunks = chunked(vehicles.index, CHUNK_SIZE)

    for ii in index_chunks:
        #print(len(ii), vehicle_type, chunk_i)
        path =  './output/matsim_vehicles/chunks'+str(CHUNK_SIZE)+'/'+vehicle_type+'_sec_'+str(chunk_i)+'.json'
        path_shp = './output/matsim_vehicles_shp/chunks'+str(CHUNK_SIZE)+'/'+vehicle_type+'_sec_'+str(chunk_i)+'.shp'
        #save_output(vehicles, ii, path)
        save_output_shp(vehicles,ii, path_shp)
        
        chunk_i +=1
        print("chunk saved to:",path)
        if chunk_i > 1:
            break

subway #: 1750
Agents saved to: ./output/matsim_vehicles_shp/chunks500/subway_sec_0.shp
chunk saved to: ./output/matsim_vehicles/chunks500/subway_sec_0.json
Agents saved to: ./output/matsim_vehicles_shp/chunks500/subway_sec_1.shp
chunk saved to: ./output/matsim_vehicles/chunks500/subway_sec_1.json
Agents saved to: ./output/matsim_vehicles_shp/chunks500/subway_sec_2.shp
chunk saved to: ./output/matsim_vehicles/chunks500/subway_sec_2.json
Agents saved to: ./output/matsim_vehicles_shp/chunks500/subway_sec_3.shp
chunk saved to: ./output/matsim_vehicles/chunks500/subway_sec_3.json


In [122]:
data = gpd.GeoDataFrame.from_file('./output/matsim_vehicles_shp/chunks500/subway_sec_0.shp')

In [123]:
data.head()

Unnamed: 0,index,start,passengers,times,vehicle_id,geometry
0,0,38110,,3pQAAN+UAADhlAAA6JQAAO+UAAD0lAAA/JQAAP6UAAD/lA...,veh_26306_subway,"MULTIPOINT (-736300.004 -1045238.981, -736397...."
1,1,38230,,VpUAAFeVAABwlQAAkJUAAKGVAACjlQAAp5UAAK2VAAA=,veh_26306_subway,"MULTIPOINT (-736898.024 -1045942.400, -737024...."
2,2,38395,,+5UAAPyVAAAclgAAKpYAAC6WAAAvlgAANJYAAA==,veh_26306_subway,"MULTIPOINT (-738064.870 -1045302.640, -738163...."
3,3,38525,,fZYAAH6WAACNlgAAoZYAAKiWAACulgAA,veh_26306_subway,"MULTIPOINT (-739037.901 -1044588.514, -739141...."
4,4,38640,,8JYAAPGWAADylgAA9ZYAAAaXAAAVlwAAF5cAAB2XAAA=,veh_26306_subway,"MULTIPOINT (-739993.646 -1044428.301, -740102...."


In [125]:
data.times = data.times.apply(base64_to_int32)
data.head()

Unnamed: 0,index,start,passengers,times,vehicle_id,geometry
0,0,38110,,"[38110, 38111, 38113, 38120, 38127, 38132, 381...",veh_26306_subway,"MULTIPOINT (-736300.004 -1045238.981, -736397...."
1,1,38230,,"[38230, 38231, 38256, 38288, 38305, 38307, 383...",veh_26306_subway,"MULTIPOINT (-736898.024 -1045942.400, -737024...."
2,2,38395,,"[38395, 38396, 38428, 38442, 38446, 38447, 38452]",veh_26306_subway,"MULTIPOINT (-738064.870 -1045302.640, -738163...."
3,3,38525,,"[38525, 38526, 38541, 38561, 38568, 38574]",veh_26306_subway,"MULTIPOINT (-739037.901 -1044588.514, -739141...."
4,4,38640,,"[38640, 38641, 38642, 38645, 38662, 38677, 386...",veh_26306_subway,"MULTIPOINT (-739993.646 -1044428.301, -740102...."


In [3]:
from exporter import Exporter


exporter_m = Exporter("subway",'../pop10k-eh4-qsim2-100it/output_network.xml.gz')

Network status:
path: ../pop10k-eh4-qsim2-100it/output_network.xml.gz
links: 72485
nodes: 36047
network: 72485
Loaded agentssubway #: 1750
