In [1]:
import numpy as np
import pandas as pd
from tqdm import tqdm
from collections import Counter
import os
import datetime
import warnings
warnings.filterwarnings("ignore")

In [2]:
def get_base_info(x):
    return [i.split(":")[-1] for i in x.split(" ")]

def get_road_speed(x):
    return np.array([i.split(",")[0] for i in x],dtype='float16')

def get_eta_speed(x):
    return np.array([i.split(",")[1] for i in x],dtype="float16")

def get_road_state(x):
    return np.array([i.split(",")[2] for i in x])

def get_cnt(x):
    return np.array([i.split(",")[3] for i in x],dtype="int16")

In [3]:
def get_features(input_file_path_his, input_file_path_attr,input_file_path_topo, mode):
    # his
    df = pd.read_csv(input_file_path_his, sep=";", header=None)
    df["link"] = df[0].apply(lambda x: x.split(" ")[0]).astype(int)
    df["label"] = df[0].apply(lambda x: x.split(" ") [1]).astype(int)
    df["current_slice_id"] = df[0].apply(lambda x: x.split(" ")[2]).astype(int)
    df["future_slice_id"] = df[0].apply(lambda x: x.split(" ")[3]).astype(int)
    df["time_diff"] = df["future_slice_id"] - df["current_slice_id"]
    df = df.drop([0], axis=1)

    if mode == "dataset":
        df["label"] = df["label"].map(lambda x: 3 if x >= 3 else x)
        df['label'] -= 1
    else:
        df = df.drop(["label"], axis=1)

    df["current_state_last"] = df[1].apply(lambda x: x.split(" ")[-1].split(":")[-1])
    df["current_speed"] = df["current_state_last"].apply(lambda x: x.split(",")[0])
    df["current_eat_speed"] = df["current_state_last"].apply(lambda x: x.split(",")[1])
    df["current_state"] = df["current_state_last"].apply(lambda x: x.split(",")[2])
    df["current_count"] = df["current_state_last"].apply(lambda x: x.split(",")[3])
    df = df.drop(["current_state_last"], axis=1)
    for i in tqdm(range(1, 6, 1)):
        flag = f"his_{(6-i)*7}"
        df["history_info"] = df[i].apply(get_base_info)

        # road speed
        df["his_speed"] = df["history_info"].apply(get_road_speed)
        df[f'{flag}_speed_mean'] = df["his_speed"].apply(lambda x: x.mean())

        # eta speed
        df["his_eta"] = df["history_info"].apply(get_eta_speed)
        df[f"{flag}_eta_mean"] = df["his_eta"].apply(lambda x: x.mean())

        # road state
        df["his_state"] = df["history_info"].apply(get_road_state)
        df[f"{flag}_state_max"] = df["his_state"].apply(lambda x: Counter(x).most_common()[0][0])
        df[f"{flag}_state_min"] = df["his_state"].apply(lambda x: Counter(x).most_common()[-1][0])

        # cnt: car number count
        df["his_cnt"] = df["history_info"].apply(get_cnt)
        df[f"{flag}_cnt_mean"] = df["his_cnt"].apply(lambda x: x.mean())
        df = df.drop([i, "history_info", "his_speed", "his_eta", "his_state", "his_cnt"], axis=1)
        # break

    df2 = pd.read_csv(input_file_path_attr, sep='\t',
                       names=['link', 'length', 'direction', 'path_class', 'speed_class',
                              'LaneNum', 'speed_limit', 'level', 'width'], header=None)
    df = df.merge(df2, on='link', how='left')

    if mode =="dataset":
        output_file_path =f"./input/{mode}_{input_file_path_his.split('/')[-1].split('.')[0]}" +".csv"
        df.to_csv(output_file_path,index =False,mode='w', header=True)

    else:
        output_file_path=f"./input/{input_file_path_his.split('/')[-1].split('.')[0]}" +".csv"
        df.to_csv(output_file_path,index = False,mode='w', header=True)

In [4]:
if __name__ =="__main__":
    print(datetime.datetime.now())
    # dataset
    get_features(input_file_path_his="input/traffic/merge_1/merged_2d.txt",\
                input_file_path_attr="input/attr.txt",\
                input_file_path_topo="input/topo.txt",mode="dataset")
    # test set
    get_features(input_file_path_his="input/test/test.txt",\
                #input_file_path_attr="input/attr.txt",\
                #input_file_path_topo="input/topo.txt",mode="test")
    print(datetime.datetime.now())

2020-12-14 13:23:07.584665


100%|██████████| 5/5 [43:03<00:00, 516.63s/it]


2020-12-14 14:14:27.581585
