In [None]:
import numpy as np
import pandas as pd
import xml.etree.ElementTree as ET

In [None]:
import os
import json
import warnings
from multiprocessing import Pool
from datetime import datetime
import gc

warnings.filterwarnings('ignore')

pd.set_option('display.max_columns', None)
output_path = "./../output/population/"
agents_path = "./../output/agents/"

In [None]:
dtypes = {
    "Unnamed: 0" : np.float64,
    "time": np.float64,
    "type": str,
    "driverId": str,
    "vehicleId": str,
    "transitLineId": str,
    "transitRouteId": str,
    "departureId": str,
    "person": str,
    "link": str,
    "legMode": 'category', #category
    "vehicle": str,
    "networkMode": str, #category
    "relativePosition": np.float64,
    "facility": str,
    "delay": np.float64,
    "x": np.float64,
    "y": np.float64,
    "actType": str,
    "computationalRoutingMode": str,
    "distance" : np.float64,
    "mode": str,
    "agent": str,
    "atStop": str
}


In [None]:


#reading all events at once, will be an issue for 100k+ population
def read_csv(path):
    #print("Reading", path)
    return pd.read_csv(path, dtype=dtypes)


args = list()

for csv in os.listdir(output_path):
    args.append(output_path+csv)

print("Files prepared:", len(args), "files")

In [None]:

def load_agent_events(row):
    event = {}
    event["event_id"] = row[0]
    event["time"] = row["time"]
    event["type"] = row["type"]
    event["link"] = row["link"]
    event["vehicle_id"] = row["vehicle"]
    event["delay"] = row["delay"]
    event["actType"] = row["actType"]
    event["legMode"] = row["legMode"]
    event["coords_x"] = row["x"]
    event["coords_y"] = row["y"]
    return event

def load_vehicle_events(row, vehicle_type):
    event = {}
    event["event_id"] = row[0]
    event["time"] = row["time"]
    event["type"] = row["type"]
    event["link"] = row["link"]
    event["person_id"] = row["person"]
    event["delay"] = row["delay"]
    event["facility"] = row['facility']
    if isinstance(row['facility'],str): 
        event['link'] = row['facility'].split(":")[-1]

    event["networkMode"] = row['networkMode']
    event["relativePosition"] = row['relativePosition']
    event["actType"] = row["actType"]
    event["legMode"] = row["legMode"]
    event["coords_x"] = row["x"]
    event["coords_y"] = row["y"]

    if(vehicle_type != "car"):
        if(event["type"] == "TransitDriverStarts"):
            event["transitLine"] = row['transitLineId']
            event["transitRoute"] = row['transitRouteId'] ## add to output
        event["departure"] = row['departureId']
        event["atStop"] = row["atStop"]
        event["destinationStop"] = row["destinationStop"]
    return event



In [64]:
def save_chunk(path,file, chunk):
    #check if exists
    if not os.path.exists(path):
        os.makedirs(path)
    #open load and append
    with open(path+file, 'w') as f:
        if(os.path.getsize(path+file) == 0):
            json.dump(chunk,f)
        else:
            saved = json.load(f)
            saved["events"].extend(chunk["events"])
            json.dump(saved,f)



def load_agents_from_population(path):
    
    events = pd.read_csv(path, dtype=dtypes)
    agents = {}
    vehicles = {}

    for i,event in events.iterrows():
        #print(event)
        #person event
        if event['person'] == event['person'] and event.person.isnumeric():
            #print(event.person)
            loaded = load_agent_events(event)
            agent_id = event["person"]

            if(agent_id in agents.keys()):
                agents[agent_id]["events"].append(loaded)
                if len(agents[agent_id]["events"]) >= 50:
                    #append to file
                    save_chunk(agents_path+"/agent","/"+str(agent_id)+".json", a )

                    #check if exists
                    if not os.path.exists(agents_path+"/agent"):
                        os.makedirs(agents_path+"/agent")

                    #open load and append
                    with open(agents_path+"/agent/"+str(agent_id)+".json", 'w') as f:
                        if(os.path.getsize(agents_path+"/agent/"+str(agent_id)+".json") == 0):
                            json.dump(agents[agent_id],f)
                        else:
                            saved = json.load(f)
                            saved["events"].extend(agents[agent_id]["events"])
                            json.dump(saved,f)

                    agents[agent_id]["events"] = []

            else:
                agents[agent_id] = { "id": agent_id, "events":[] }


        #vehicle event
        #print(event['vehicle'], event.person)
        if event.vehicle == event.vehicle: # or (event.person == event.person and not event.person.isnumeric()):
            #print(event)
            vehicle_type = ""
            if event.vehicle == event.vehicle  and (event.vehicle.isnumeric()):
                vehicle_type = "car"
            elif event.vehicle == event.vehicle :
                vehicle_type = event["vehicle"].split('_')[-1]
            else:
                pass

            vehicle_id = event["vehicle"]
            try:
                loaded = load_vehicle_events(event, vehicle_type)
            except KeyError:
                print(event)

            if(not vehicle_type in vehicles.keys()):
                vehicles[vehicle_type] = {}


            if(vehicle_id in vehicles[vehicle_type].keys()):
                vehicles[vehicle_type][vehicle_id]["events"].append(loaded)
                if len(vehicles[vehicle_type][vehicle_id]["events"]) >= 50:
                    #append to file

                    #check if exists
                    if not os.path.exists(agents_path+"/"+vehicle_type):
                        os.makedirs(agents_path+"/"+vehicle_type)

                    #open load and append
                    with open(agents_path+"/"+vehicle_type+"/"+str(vehicle_id)+".json", 'w') as f:
                        if(os.path.getsize(agents_path+"/"+vehicle_type+"/"+str(vehicle_id)+".json")== 0):
                            json.dump(vehicles[vehicle_type][vehicle_id],f)
                        else:
                            saved = json.load(f)
                            saved["events"].extend(vehicles[vehicle_type][vehicle_id]["events"])
                            json.dump(saved,f)

                    vehicles[vehicle_type][vehicle_id]["events"] = []

            else:
                vehicles[vehicle_type][vehicle_id] = { "id": vehicle_id, "events":[] }


    for agent_id in agents.keys():
        #check if exists
        if not os.path.exists(agents_path+"/agent"):
            os.makedirs(agents_path+"/agent")

        #open load and append
        with open(agents_path+"/agent/"+str(agent_id)+".json", 'w') as f:
            if(os.path.getsize(agents_path+"/agent/"+str(agent_id)+".json") == 0):
                json.dump(agents[agent_id],f)
            else:
                saved = json.load(f)
                saved["events"].extend(agents[agent_id]["events"])
                json.dump(saved,f)

    for vehicle_type in vehicles.keys():
        for vehicle_id in vehicles[vehicle_type].keys():
            #check if exists
            if not os.path.exists(agents_path+"/"+vehicle_type):
                os.makedirs(agents_path+"/"+vehicle_type)

            #open load and append
            with open(agents_path+"/"+vehicle_type+"/"+str(vehicle_id)+".json", 'w') as f:
                if(os.path.getsize(agents_path+"/"+vehicle_type+"/"+str(vehicle_id)+".json") == 0):
                    json.dump(vehicles[vehicle_type][vehicle_id],f)
                else:
                    saved = json.load(f)
                    saved["events"].extend(vehicles[vehicle_type][vehicle_id]["events"])
                    json.dump(saved,f)

    return
    

In [65]:
for csv in args[:]:
    load_agents_from_population(csv)
    

Unnamed: 0                             0.0
time                               76467.0
type                          entered link
link                                 16064
vehicle                     veh_17728_tram
facility                               NaN
delay                                  NaN
person                                 NaN
networkMode                            NaN
relativePosition                       NaN
legMode                                NaN
distance                               NaN
mode                                   NaN
x                                      NaN
y                                      NaN
actType                                NaN
driverId                               NaN
vehicleId                              NaN
transitLineId                          NaN
transitRouteId                         NaN
departureId                            NaN
computationalRoutingMode               NaN
Name: 0, dtype: object
Unnamed: 0                     

UnboundLocalError: local variable 'loaded' referenced before assignment