In [21]:
import pandas as pd
import numpy as np
import xml.etree.ElementTree as ET
import os
import shutil
import json
import warnings
from multiprocessing import Pool
from datetime import datetime
import gc

In [22]:
pd.set_option('display.max_columns', None)
warnings.filterwarnings('ignore')

In [23]:
output_path = "./../output/population/"
agents_path = "./../output/agents/"
events_path = "./../output/events/"

def clear_directory(directory):
    if not os.path.exists(directory):
        os.makedirs(directory)
    else:
        shutil.rmtree(directory)
        os.makedirs(directory)

clear_directory(events_path)

CHUNK_SIZE = 250

In [24]:
def concat_files(args):
    path, files, chunk_i, agent_type = args
    df = pd.DataFrame()
    chunk_map = {}
    if(agent_type != "agent"):
        ids = [f.split('/')[-1].split('.')[0] for f in files] #stracting ids
        for i in ids:
            chunk_map[i] = chunk_i

    for file in files:
        # load file_chunks
        agent = pd.read_json(path+file, lines=True, orient='records')
        df = df.append(agent)

    # save to file
    del agent
    if not os.path.exists(events_path+agent_type):
        os.makedirs(events_path+agent_type)
        
    df.to_json(events_path+agent_type+"/"+str(chunk_i)+".json", lines=True, orient='records') 
    del df
    return chunk_map
        

In [25]:
folders = os.listdir(agents_path)

for agent_type in folders:
    files = np.array(os.listdir(agents_path+agent_type))
    transport_map = {}

    chunks_num = len(files) / CHUNK_SIZE
    if(len(files) % CHUNK_SIZE > 0):
        chunks_num += 1
    file_chunks = np.array_split(files, chunks_num)

    args = list()
    for i,f in enumerate(file_chunks):
        args.append([agents_path+agent_type+"/",f,i, agent_type])


    with Pool(int(min(chunks_num, os.cpu_count()))) as pool:
        results = pool.map(concat_files, args)

    pool.close()
    pool.join()

    if(agent_type != "agent"):
        # concat results
        for r in results:
            transport_map.update(r)
        # save transport map
        with open(events_path+agent_type+'_map.json', 'w') as f:
            json.dump(transport_map,f)
