In [4]:
import pandas as pd
import requests 
import os
from xml.etree import ElementTree
from datetime import datetime

In [5]:
TrafficEvents = None

chunk_size = 10 ** 3
max_distance_from_node = 50 # Meter
number_of_nearest_node = 5 # count

OSRM_BASIC_URL = "https://usaeta.bluebitsoft.com/"
TrafficEvents = {
    "name": "TrafficEvents",
    # "file_name": "TrafficEvents_Aug16_Dec19_Publish_Road_Construction.csv",
    "file_name": "All_Constructions_June2020_Combined_TO_StartEnd_30_100.csv",
    "path": "/home/amin/CETI/RoadConstruction/TrafficEventData/",
    "part_path": "part"
}
    
index_folder = 0
index_start_point_file = 0
index_end_point_file = 411

traffic_filters = {
    "keys": ["Type"],
    "values": ["Construction"],
    }


In [6]:
def read_big_data_by_filter_with_key_values(path, filters):
    data = None
    print("method {} is started ".format("read_big_data_filter_with_key_values"))
    print("read csv data is started with file {}".format(path))
    print("chunk size is {}".format(chunk_size))
    for key, value in zip(filters['keys'], filters['values']):
        print("the key is {} and the value is {}".format(filters['keys'], filters['values']))
    for num, df in enumerate(pd.read_csv(path, chunksize=chunk_size), start=1):
        print("continue reading file page num is {}".format(num))
        for key, value in zip(filters['keys'], filters['values']):
            df = df[df[key] == value]
        data = df.append(data)
    print("method {} finished ".format("read_big_data_filter_with_key_values"))
    return data

In [7]:
class Location(object):
    def __init__(self, osrm_data):
        self.longitude = osrm_data[0]
        self.latitude = osrm_data[1]

In [8]:
class WayPoint(object):
    def __init__(self, json_data):
        self.nodes = json_data["nodes"]
        self.hint = json_data["hint"]
        self.distance = json_data["distance"]
        self.name = json_data["name"]
        self.location = Location(json_data["location"])
    
    def validate_way_point(self):
        if self.distance < max_distance_from_node:
            return True
        return False

In [9]:
class OSRM(object):
    @staticmethod
    def get_all_nearest_nodes_of_location(location, number=1):
        params = {
            "number": number
        }
        URL = OSRM_BASIC_URL + "nearest/v1/driving/{},{}".format(location.longitude, location.latitude)
        response = requests.get(url = URL, params = params)
        return response.json() 

In [None]:
dir = os.path.join(TrafficEvents["path"], TrafficEvents["part_path"], str(index_folder))
print(dir)
if not os.path.exists(dir):
    print("does not exists {}".format(dir))
    raise("does not exists")

for file_index in range(index_start_point_file, index_end_point_file):
    print("file_index: {}".format(file_index))
    file_url = dir + "/file_{}.csv".format(file_index)
    
    traffic_events_data = read_big_data_by_filter_with_key_values(file_url, traffic_filters)
    
    nearst_nodes_of_start_point = []
    nearst_nodes_of_end_point = []
    nearst_node_ids_of_start_validate_point = []
    nearst_node_ids_of_end_validate_point = []
    
    for index, data in traffic_events_data.iterrows():
        start_location = Location([data["StartPoint_Lng"], data["StartPoint_Lat"]])
        if "EndPoint_Lng" in data and "EndPoint_Lat" in data:
            end_location = Location([data["EndPoint_Lng"], data["EndPoint_Lat"]])
        else:
            end_location = Location([data["StartPoint_Lng"], data["StartPoint_Lat"]])

        nearst_nodes_start_location = OSRM.get_all_nearest_nodes_of_location(start_location, number_of_nearest_node)
        nearst_nodes_end_location = OSRM.get_all_nearest_nodes_of_location(end_location, number_of_nearest_node)
    
    
        nearst_way_points_start_location = []
        start_ids = []
        for point in nearst_nodes_start_location["waypoints"]:
            way_point = WayPoint(point)
            if way_point.validate_way_point():
                nearst_way_points_start_location.append(way_point)
                start_ids.extend(way_point.nodes)

        nearst_way_points_end_location = []
        end_ids = []
        for point in nearst_nodes_end_location["waypoints"]:
            way_point = WayPoint(point)
            if way_point.validate_way_point():
                nearst_way_points_end_location.append(way_point)
                end_ids.extend(way_point.nodes)
            

        nearst_nodes_of_start_point.append(nearst_nodes_start_location)
        nearst_nodes_of_end_point.append(nearst_nodes_end_location)

        nearst_node_ids_of_start_validate_point.append(start_ids)
        nearst_node_ids_of_end_validate_point.append(end_ids)
    
        if len(nearst_node_ids_of_start_validate_point) % 10 == 1:
            print(datetime.now())
            print(len(nearst_nodes_of_start_point))
            print("=======")
    traffic_events_data["nearst_nodes_of_start_point"] = nearst_nodes_of_start_point
    traffic_events_data["nearst_nodes_of_end_point"] = nearst_nodes_of_end_point
    traffic_events_data["nearst_nodes_ids_of_start_point"] = nearst_node_ids_of_start_validate_point
    traffic_events_data["nearst_nodes_ids_of_end_point"] = nearst_node_ids_of_end_validate_point
    
    output_file_url = dir + "/{}_file_{}.csv".format(file_index, file_index)
    traffic_events_data.to_csv (output_file_url, index = False, header=True)
    print(len(df))
        
print("end")

/home/amin/CETI/RoadConstruction/TrafficEventData/part/0
file_index: 0
method read_big_data_filter_with_key_values is started 
read csv data is started with file /home/amin/CETI/RoadConstruction/TrafficEventData/part/0/file_0.csv
chunk size is 1000
the key is ['Type'] and the value is ['Construction']
continue reading file page num is 1
method read_big_data_filter_with_key_values finished 
2020-08-21 13:10:27.320799
1
2020-08-21 13:10:39.858994
11
2020-08-21 13:10:52.245179
21
2020-08-21 13:11:04.857048
31
2020-08-21 13:11:17.124013
41
2020-08-21 13:11:29.785906
51
2020-08-21 13:11:41.641976
61
2020-08-21 13:11:53.790476
71
2020-08-21 13:12:06.595979
81
2020-08-21 13:12:18.897941
91
2020-08-21 13:12:31.597489
101
2020-08-21 13:12:43.933338
111
2020-08-21 13:12:56.257050
121
2020-08-21 13:13:09.116031
131
2020-08-21 13:13:21.544528
141
2020-08-21 13:13:34.008289
151
2020-08-21 13:13:46.516194
161
2020-08-21 13:13:59.550217
171
2020-08-21 13:14:11.925958
181
2020-08-21 13:14:24.426023
19