In [1]:
import pandas as pd
import pickle
import networkx as nx
import numpy as np

from tqdm import tqdm

## 1. Get Upstream and Downstream TMC Segments

In [2]:
tmc_attr = pd.read_csv("../../data/shapefiles/tmc_attr.csv")
tmc_attr = tmc_attr.rename(columns={"Tmc": "id_tmc", "Miles":"miles_tmc"})

In [3]:
df_tmc_miles = tmc_attr[["id_tmc", "miles_tmc"]]
miles_tmc = (df_tmc_miles.set_index("id_tmc")).to_dict("index")
with open("../../data/tmc_miles.pkl", "wb") as f:
    pickle.dump(miles_tmc, f)

In [4]:
def read_shp(path):
    net = nx.read_shp(path, simplify=True)
    return net

def build_tmc_dict(net):
    tmc2edge = { }
    for key in net.edges:
        tmc_id = net.edges[key]['id']
        if tmc_id not in tmc2edge:
            tmc2edge[tmc_id] = [ ]
        tmc2edge[tmc_id].append(key)
    return tmc2edge

def get_upstream_tmc(tmc2edge, tmc_id, miles_tmc, net, n=1, d=5):
    out = set()    
    for (end_node, bgn_node) in tmc2edge[tmc_id]:
        dist = d
        for e in nx.bfs_edges(net, bgn_node ,depth_limit=n):
            prev_tmc = net.edges[e]['id']

            # avoid repeating segments or tmc_id itself
            if prev_tmc == tmc_id or prev_tmc in out:
                continue
            else:
                out.add(prev_tmc)

                # compute the range of nxt_tmc segment
                if prev_tmc in miles_tmc:
                    d = miles_tmc[prev_tmc]["miles_tmc"]
                else:
                    # if there's no record of range of prev_tmc, then compute the distance from the beginning point to the end point of prev_tmc
                    d = 0
                    for end, begin in tmc2edge[prev_tmc]:
                        temp = ((begin[0]-end[0])**2 + (begin[1]-end[1])**2)**(0.5)
                        d += temp
                    d /= len(tmc2edge[tmc_id])
                dist -= d

            if dist <= 0:
                break    
    return list(out)

def get_downstream_tmc(tmc2edge, tmc_id, miles_tmc, net, n=1, d=5):
    out = set()    
    for (end_node, bgn_node) in tmc2edge[tmc_id]:
        dist = d
        for e in nx.bfs_edges(net, end_node ,depth_limit=n, reverse=True):
            # reverve back due to reverse of graph in bfs
            r_e = (e[1],e[0])
            nxt_tmc = net.edges[r_e]['id']
            
            # avoid repeating segments or tmc_id itself
            if nxt_tmc == tmc_id or nxt_tmc in out:
                continue
            else:
                out.add(nxt_tmc)

                # compute the range of nxt_tmc segment
                if nxt_tmc in miles_tmc:
                    d = miles_tmc[nxt_tmc]["miles_tmc"]
                else:
                    d = 0
                    for end, begin in tmc2edge[nxt_tmc]:
                        temp = ((begin[0]-end[0])**2 + (begin[1]-end[1])**2)**(0.5)
                        d += temp
                    d /= len(tmc2edge[tmc_id])
                dist -= d

            if dist <= 0:
                break    
    return list(out)

def get_neighbor_tmc(tmc_id, net, n=5):
    tmc2edge = build_tmc_dict(net)
    up = get_upstream_tmc(tmc2edge, tmc_id, net, n)
    dn = get_downstream_tmc(tmc2edge, tmc_id, net, n)
    return np.concatenate((up, dn))

In [5]:
# be careful about the pwd
# if there is an error with osgeo or gdal, try cd to Traffic-Prediction % cd pipeline_v2/data_preprocessing
# this is because I installed gdal using pip inside Traffic-Prediction % cd pipeline_v2/data_preprocessing
net = read_shp('../../data/shapefiles/tmc_shape_cranberry/cranberry.shp')
tmc2edge = build_tmc_dict(net)

  net = nx.read_shp(path, simplify=True)


In [6]:
prev_tmc = {}
next_tmc = {}
for tmc in tqdm(tmc2edge):
    prev_tmc[tmc] = get_upstream_tmc(tmc2edge, tmc, miles_tmc, net, n=1, d=5)
    next_tmc[tmc] = get_downstream_tmc(tmc2edge, tmc, miles_tmc, net, n=1, d=5)

100%|██████████| 315/315 [00:00<00:00, 132292.56it/s]


In [44]:
with open("../../data/next_tmc_5_miles.pkl", "wb") as f:
    pickle.dump(next_tmc, f)

with open("../../data/prev_tmc_5_miles.pkl", "wb") as f:
    pickle.dump(prev_tmc, f)

## 2. Get Upsteam and Downstream XD Segments

In [None]:
# xd_attr = pd.read_csv("./data/shapefiles/xd_attr.csv")
# xd_attr.XDSegID = xd_attr.XDSegID.apply(int).apply(str)
# xd_attr.PreviousXD = xd_attr.PreviousXD.apply(lambda x: str(int(x)) if not np.isnan(x) else x)
# xd_attr.NextXDSegI = xd_attr.NextXDSegI.apply(lambda x: str(int(x)) if not np.isnan(x) else x)
# xd_attr = xd_attr.rename(columns={"XDSegID":"id_xd", "PreviousXD":"id_xd_prev", "NextXDSegI":"id_xd_next", "Miles":"miles_xd"})
# xd_attr.to_csv("./data/shapefiles/xd_attr.csv", index=False)

# core_xd_attr = xd_attr.loc[:,["id_xd", "id_xd_prev", "id_xd_next", "miles_xd"]]
# core_xd_attr = core_xd_attr.set_index(keys="id_xd")
# core_xd_attr.to_csv("./data/shapefiles/core_xd_attr.csv")

In [7]:
core_xd_attr = pd.read_csv("../../data/shapefiles/core_xd_attr.csv")
core_xd_attr = core_xd_attr.set_index(keys="id_xd")
dict_xd_attr = core_xd_attr.to_dict(orient="index")  # key: <id_xd>; value: dict() with keys: 'id_xd_prev', 'id_xd_next', 'miles_xd'
next_xd = {}
prev_xd = {}

In [10]:
for xd in tqdm(dict_xd_attr):
    next_dist = 5 # 5 miles
    prev_dist = 5 # 5 miles
    curr = xd
    next = dict_xd_attr[curr]['id_xd_next']
    prev = dict_xd_attr[curr]['id_xd_prev']

    next_list = []
    prev_list = []

    # find next XD segments within 5 miles
    while next_dist >= 0 and str(next) != "nan":
        next_list.append(next)
        if next not in dict_xd_attr:
            break
        else:
            next_dist -= dict_xd_attr[next]["miles_xd"]
            next = dict_xd_attr[next]['id_xd_next']
    next_xd[xd] = next_list

    # find previous XD segments within 5 miles
    while prev_dist >= 0 and str(prev) != "nan":
        prev_list.append(prev)
        if prev not in dict_xd_attr:
            break
        else:
            prev_dist -= dict_xd_attr[prev]["miles_xd"]
            prev = dict_xd_attr[prev]['id_xd_next']
    prev_xd[xd] = prev_list

100%|██████████| 117679/117679 [00:01<00:00, 105897.01it/s]


In [11]:
with open("../../data/next_xd_5_miles.pkl", "wb") as f:
    pickle.dump(next_xd, f)

with open("../../data/prev_xd_5_miles.pkl", "wb") as f:
    pickle.dump(prev_xd, f)