In [None]:
import shapefile
import pandas as pd
from pyproj import Transformer

In [None]:
class standard_nodelink:
    def __init__(self, node_path, link_path):
        self.node_path = node_path
        self.link_path = link_path
        
        node_shp = shapefile.Reader(node_path, encoding='cp949')
        link_shp = shapefile.Reader(link_path, encoding='cp949')
        
        node_col = [x[0] for x in node_shp.fields][1:]
        node_data = node_shp.records()
        node_coord = [s.points[0] for s in node_shp.shapes()]

        node_df = pd.DataFrame(node_data, columns=node_col)
        node_df = node_df.assign(coords=node_coord)

        link_col = [x[0] for x in link_shp.fields][1:]
        link_data = link_shp.records()

        link_df = pd.DataFrame(link_data, columns=link_col)
        
        transformer = Transformer.from_crs('EPSG:5186', 'EPSG:4326')
        node_df[['latitude', 'longitude']] = [[*transformer.transform(x, y)] for y, x in node_df['coords']]
        del node_df['coords']
        
        node_df = node_df[['NODE_ID','NODE_NAME','latitude','longitude']]
        link_df = link_df[['LINK_ID', 'F_NODE','T_NODE', 'MAX_SPD', 'LENGTH']]
                
        self.node = node_df
        self.link = link_df
    
    def search_nodelink(self, id_list=None, lati_range=None, long_range=None):
        node = self.node
        link = self.link

        node_bool_idx = node['NODE_ID'].map(lambda x: True)
        link_bool_idx = link['LINK_ID'].map(lambda x: True)
        
        if id_list != None:
            id_set = set(id_list)
            node_bool_idx = node_bool_idx & node['NODE_ID'].map(lambda x:x[0:3] in id_set)
            link_bool_idx = link_bool_idx & link['LINK_ID'].map(lambda x:x[0:3] in id_set)
        if lati_range != None:
            node_bool_idx = node_bool_idx & node['latitude'].map(lambda x:x > lati_range[0] and x < lati_range[1])
        if long_range != None:
            node_bool_idx = node_bool_idx & node['longitude'].map(lambda x:x > long_range[0] and x < long_range[1])
            
        selected_nodes = node[node_bool_idx]
        nodes_set = set(selected_nodes['NODE_ID'])
        link_bool_idx = link_bool_idx & link['F_NODE'].map(lambda x : x in nodes_set)
        link_bool_idx = link_bool_idx & link['T_NODE'].map(lambda x : x in nodes_set)
        selected_links = link[link_bool_idx]
        
        links_set = set(selected_links['F_NODE']).union(set(selected_links['T_NODE']))
        
        node_bool_idx = node_bool_idx & node['NODE_ID'].map(lambda x : x in links_set)

        selected_nodes = node[node_bool_idx]

        return selected_nodes.sort_values(by=['NODE_ID']).reset_index(drop=True), selected_links.sort_values(by=['LINK_ID']).reset_index(drop=True)

In [None]:
node_path = '../data/raw_data/node_link/MOCT_NODE.shp'
link_path = '../data/raw_data/node_link/MOCT_LINK.shp'

t = standard_nodelink(node_path, link_path)

In [None]:
s = [str(i) for i in range(150, 158)]
long = 128.578048176449, 128.618447013494
lati = 35.8603352683553, 35.8784099937842
t.search_nodelink(s, lati, long)

In [None]:
class its_preprocesser:
    def __init__(self, node_path, link_path):
        self.node_path = node_path
        self.link_path = link_path
        
        self.standard_nodelink = standard_nodelink(node_path, link_path)
        
        self.traffic_data_col = ['Date', 'Time', 'LINK_ID', 'Speed']
        self.traffic_data_type = {'Date':'Int32', 'Time':'Int32', 'LINK_ID':'Int64','Speed':'Float32'}
        
    def process_traffic_data(self, traffic_path_list, save_path_list, id_list=None, lati_range=None, long_range=None):
        def fixer(x):
            try: return int(x)
            except ValueError:
                return 0
        
        _, link = self.standard_nodelink.search_nodelink(id_list, lati_range, long_range)
        link['LINK_ID'] = link['LINK_ID'].map(int)
        for fidx in range(len(traffic_path_list)):
            traffic_path = traffic_path_list[fidx]

            traffic_index = pd.read_csv(traffic_path, usecols=[2], names=['LINK_ID'], converters={2:fixer}, on_bad_lines='warn')
            
            link_set = set(link['LINK_ID'])
            temp = traffic_index[traffic_index['LINK_ID'].map(lambda x: x not in link_set)].index

            traffic_data = pd.read_csv(traffic_path, usecols=[0,1,2,4], names=self.traffic_data_col, dtype=self.traffic_data_type, skiprows=temp)
            
            traffic_data = traffic_data[['Time', 'LINK_ID', 'Speed']].set_index(['LINK_ID', 'Time']).unstack(level=0)
            traffic_data.columns = [x[1] for x in traffic_data.columns.values]
            traffic_data = traffic_data.sort_index()
            
            if traffic_data.isna().sum().sum() > 0:
                print('Fill na to 0')
            traffic_data.fillna(value=0)
            print((link['LINK_ID'] == traffic_data.columns.values).sum() == len(link['LINK_ID']))
            
            l = len(traffic_data.index)
            prev = traffic_data.index[0]
            append_list = []
            idx_list = []
            pos = 1

            while pos < l:
                cur = traffic_data.index[pos]
                gap = (cur//100-prev//100)*60+cur%100-prev%100
                for i in range(5, gap, 5):
                    prev += 5
                    if prev % 100 == 60:
                        prev += 40
                    append_list.append(((gap-i)*traffic_data.iloc[pos-1]+i*traffic_data.iloc[pos])/gap)
                    idx_list.append(prev)
                pos += 1
                prev = cur
                
            traffic_data = pd.concat([traffic_data, pd.DataFrame(append_list, index=idx_list)]).sort_index()
            traffic_data.to_csv(save_path_list[fidx], index=False)

In [None]:
t = its_preprocesser(node_path, link_path)

In [None]:
traffic_path_list = ['../data/raw_data/traffic/20231124_5Min.csv', '../data/raw_data/traffic/20231123_5Min.csv']
save_path_list = ['../data/processed_data/traffic/p_20231124_5Min.csv', '../data/processed_data/traffic/p_20231123_5Min.csv']
t.process_traffic_data(traffic_path_list, save_path_list, s, lati, long)