In [1]:
import os
import geopandas as gpd
import pandas as pd
import numpy as np
import math
import scipy
from shapely.geometry import Point
import time

In [2]:
# Get script and dataset file paths.
SCRIPT_PATH = "/Users/Syrin/Documents/GitHub/AI4PublicHealth/"

"""# Read the node intersection geojson file via geopandas and store as a pandas dataframe.
NODE_INTERSECTION_PATH = os.path.join(SCRIPT_PATH, "Datasets/Clipped_Mzuzu_Road_Intersections.geojson")
NODE_INTERSECTION_DATA = gpd.read_file(NODE_INTERSECTION_PATH)
NODE_INTERSECTION_DATA = pd.DataFrame(NODE_INTERSECTION_DATA)"""

# Read the road points w/ elevation .shp file via geopandas and store as a pandas dataframe.
ROAD_POINT_WITH_ELEVATION_PATH = os.path.join(SCRIPT_PATH, "Datasets/MZUZU_roads_pointdata_with_elevation.shp")
ROAD_POINT_WITH_ELEVATION_DATA = gpd.read_file(ROAD_POINT_WITH_ELEVATION_PATH)
ROAD_POINT_WITH_ELEVATION_DATA = pd.DataFrame(ROAD_POINT_WITH_ELEVATION_DATA)

"""# Read the road line data .shp file via geopandas and store as a pandas dataframe.
ROAD_LINE_PATH = os.path.join(SCRIPT_PATH, "Datasets/MZUZU_roads_lines_CORRECT.shp")
ROAD_LINE_DATA = gpd.read_file(ROAD_LINE_PATH)
ROAD_LINE_DATA = pd.DataFrame(ROAD_LINE_DATA)
INCLUDE_MULTILINE = False"""

'# Read the road line data .shp file via geopandas and store as a pandas dataframe.\nROAD_LINE_PATH = os.path.join(SCRIPT_PATH, "Datasets/MZUZU_roads_lines_CORRECT.shp")\nROAD_LINE_DATA = gpd.read_file(ROAD_LINE_PATH)\nROAD_LINE_DATA = pd.DataFrame(ROAD_LINE_DATA)\nINCLUDE_MULTILINE = False'

In [3]:
radius_earth = 6378.1

def road_elevation_processing(road_elevation_df):
    """Clean the .shp file that contains the route data. Create a second pandas data frame to store a processed
        version of the original data from the .shp file. """

    # Create a secondary pandas data frame that contains the index of nodes, start/end longitude and latitude,
    # elevation, road condition, and road type.
    processed_data = []

    # TODO: Maybe there's a more efficient way to do this than to loop through the entire unprocessed data set
    for rows in range(len(road_elevation_df.index)):
        
        coordinates = list(road_elevation_df.iloc[rows, 22].coords)
        start_longitude = round(coordinates[0][0], 10)
        start_latitude = round(coordinates[0][1], 10)

        elevation = road_elevation_df.iloc[rows, 17]
        road_condition = road_elevation_df.iloc[rows, 10]
        road_type = road_elevation_df.iloc[rows, 9]
        
        # Convert distance and angle to connecting node coords
        distance = round(road_elevation_df.iloc[rows, 15], 10)
        angle = round(road_elevation_df.iloc[rows, 16], 10)
        radians = round(math.radians(angle), 10)
        
        from_long = round(math.radians(start_longitude), 10)
        from_lat = round(math.radians(start_latitude), 10)
        
        to_lat = round(math.asin( math.sin(from_lat)*math.cos(distance / radius_earth) + math.cos(from_lat)*math.sin(distance / radius_earth)*math.cos(radians)), 10)
        to_long = round((from_long + math.atan2( math.sin(radians) * math.sin(distance / radius_earth) * math.cos(from_lat), math.cos(distance / radius_earth) - math.sin(from_lat) * math.sin(to_lat))), 10)
        
        start_longitude = round(coordinates[0][0], 8)
        start_latitude = round(coordinates[0][1], 8)
        to_lat = round(math.degrees(to_lat), 8)
        to_long = round( math.degrees(to_long), 8)
        
        processed_data.append((start_longitude, start_latitude, elevation, distance, angle, to_long, to_lat, road_condition, road_type, [], []))

    processed_data = pd.DataFrame(processed_data)

    processed_data = processed_data.rename(
        columns={0: "Longitude", 1: "Latitude", 2: "Elevation", 3: "Distance", 4: "Angle", 5: "To Longitude", 6: "To Latitude", 7: "Road Condition", 8: "Road Type", 9: "Connection(s)"})

    return processed_data

road_elevation_nodes = road_elevation_processing(ROAD_POINT_WITH_ELEVATION_DATA)
road_elevation_nodes

Unnamed: 0,Longitude,Latitude,Elevation,Distance,Angle,To Longitude,To Latitude,Road Condition,Road Type,Outgoing Connection(s),Incoming Connection(s)
0,34.029856,-11.458530,1286,0.000623,143.665285,34.029860,-11.458534,,path,[],[]
1,34.028073,-11.458322,1279,0.000674,68.613613,34.028078,-11.458320,,track,[],[]
2,34.028159,-11.458254,1279,0.000784,68.912626,34.028165,-11.458251,,track,[],[]
3,34.028325,-11.458243,1277,0.000950,92.006825,34.028334,-11.458243,,track,[],[]
4,34.028443,-11.458259,1277,0.001069,116.018906,34.028452,-11.458263,,track,[],[]
...,...,...,...,...,...,...,...,...,...,...,...
58127,34.034627,-11.458959,1270,0.002183,78.742212,34.034646,-11.458956,,path,[],[]
58128,34.034729,-11.458953,1270,0.002285,94.031740,34.034749,-11.458954,,path,[],[]
58129,34.034887,-11.458986,1275,0.002447,100.120003,34.034909,-11.458990,,path,[],[]
58130,34.029513,-11.458301,1282,0.000201,102.616360,34.029515,-11.458301,,path,[],[]


In [10]:
start_time = time.time()

# road_elevation_long_list = road_elevation_nodes["Longitude"].values
road_elevation_nodes.sort_values(by=["Longitude"], ascending=False)

# road_elevation_nodes.sort_values(by=["Latitude"], ascending=False)
# road_elevation_lat_list = road_elevation_nodes["To Latitude"].values
# print(road_elevation_lat_list)


exclude_long = []
exclude_lat = []

exclude_zipong_sub = []
exclude_lat_sub = []

counter = 0

for index, row in road_elevation_nodes.iterrows():
    from_long = row[0]
    from_lat = row[1]

    if from_long not in exclude_long and from_lat not in exclude_lat:
        for sub_index, sub_row in road_elevation_nodes.iterrows():
            to_long = sub_row[5]
            to_lat = sub_row[6]

            if to_long not in exclude_long_sub and to_lat not in exclude_lat_sub:
                if from_long == to_long and from_lat == to_lat:
                    curr_connect = row[9]
                    curr_connect_sub = sub_row[9]

                    road_elevation_nodes.loc[index, "Connection(s)"] = curr_connect.append(sub_index)
                    road_elevation_nodes.loc[sub_index, "Connection(s)"] = curr_connect_sub.append(index)
                    counter += 1

    print(counter)

print("Minutes since execution:", (time.time() - start_time) / 60)

road_elevation_nodes

0
0
0
0
0
0
0
0


KeyboardInterrupt: 

In [None]:
road_elevation_nodes.to_csv("Processed_elevation_nodes_w_connection", header=["Longitude", "Latitude", "Elevation", "Distance", "Angle", "To Longitude", "To Latitude", "Road Condition", "Road Type", "Connection(s)"], index=False)

In [6]:
# Other Processing Code
---

SyntaxError: invalid syntax (161335075.py, line 2)

In [None]:
def intersection_processing(intersection_df):
    """Clean the .shp file that contains the route data. Create a second pandas data frame to store a processed
        version of the original data from the .shp file. """

    # Create a secondary pandas data frame that contains the index of nodes, start/end longitude and latitude,
    # elevation, road condition, and road type.
    processed_data = []

    # TODO: Maybe there's a more efficient way to do this than to loop through the entire unprocessed data set
    for rows in range(len(intersection_df.index)):
        coordinates_line = intersection_df.iloc[rows, 2]
        # TODO: Check what the team meant by this comment
        # Maybe take out the start lat and long here if we combine the dataframes for the line and point data
        for item in coordinates_line:
            coordinates_line = item

            coordinates_line = list(coordinates_line.coords)

            start_longitude = coordinates_line[0][0]
            start_latitude = coordinates_line[0][1]

            processed_data.append((start_longitude, start_latitude))

    processed_data = pd.DataFrame(processed_data)
    processed_data = processed_data.rename(
        columns={0: "Longitude", 1: "Latitude"})

    return processed_data

intersection_nodes = intersection_processing(NODE_INTERSECTION_DATA)
intersection_nodes.sort_values(by=["Longitude"], ascending=False)
intersection_nodes

In [None]:
def road_line_processing(road_line_df):
    """Clean the .shp file that contains the route data. Create a second pandas data frame to store a processed
        version of the original data from the .shp file. """

    processed_data_line = []

    for rows in range(len(road_line_df.index)):
        coordinates_line = road_line_df.iloc[rows, 11]
        string_type = (type(coordinates_line))

        if INCLUDE_MULTILINE:
            if str(string_type) == "<class 'shapely.geometry.linestring.LineString'>":
                coordinates_line = list(coordinates_line.coords)

                start_longitude_line = coordinates_line[0][0]
                start_latitude_line = coordinates_line[0][1]
                end_longitude_line = coordinates_line[-1][0]
                end_latitude_line = coordinates_line[-1][1]

                processed_data_line.append(
                    (start_longitude_line, start_latitude_line, end_longitude_line, end_latitude_line))

            elif str(string_type) != "<class 'shapely.geometry.linestring.MultiLineString'>":
                for item in coordinates_line:
                    coordinates_line = item

                    coordinates_line = list(coordinates_line.coords)

                    start_longitude_line = coordinates_line[0][0]
                    start_latitude_line = coordinates_line[0][1]
                    end_longitude_line = coordinates_line[-1][0]
                    end_latitude_line = coordinates_line[-1][1]

                    processed_data_line.append(
                        (start_longitude_line, start_latitude_line, end_longitude_line, end_latitude_line))

            else:
                print("There is a unique string type that is neither LineString or MultiString:")
                print("    ", string_type)

        else:
            if str(string_type) == "<class 'shapely.geometry.linestring.LineString'>":
                coordinates_line = list(coordinates_line.coords)

                start_longitude_line = coordinates_line[0][0]
                start_latitude_line = coordinates_line[0][1]
                end_longitude_line = coordinates_line[-1][0]
                end_latitude_line = coordinates_line[-1][1]

                processed_data_line.append(
                    (start_longitude_line, start_latitude_line, end_longitude_line, end_latitude_line))

            else:
                continue

    processed_data_line = pd.DataFrame(processed_data_line)
    processed_data_line = processed_data_line.rename(
        columns={0: "Start Longitude", 1: "Start Latitude", 2: "End Longitude", 3: "End Latitude"})

    return processed_data_line

road_line_nodes = road_line_processing(ROAD_LINE_DATA)
road_line_nodes

In [None]:
def road_elevation_processing(road_elevation_df, intersection_df):
    """Clean the .shp file that contains the route data. Create a second pandas data frame to store a processed
        version of the original data from the .shp file. """

    # Create a secondary pandas data frame that contains the index of nodes, start/end longitude and latitude,
    # elevation, road condition, and road type.
    processed_data = []

    intersection_longitude_list = intersection_df["Longitude"].values
    intersection_latitude_list = intersection_df["Latitude"].values

    # TODO: Maybe there's a more efficient way to do this than to loop through the entire unprocessed data set
    for rows in range(len(road_elevation_df.index)):
        fid = road_elevation_df.iloc[rows, 0]
        
        # TODO: Check what the team meant by this comment
        # Maybe take out the start lat and long here if we combine the dataframes for the line and point data
        coordinates = list(road_elevation_df.iloc[rows, 22].coords)
        start_longitude = coordinates[0][0]
        start_latitude = coordinates[0][1]

        elevation = road_elevation_df.iloc[rows, 17]
        distance = road_elevation_df.iloc[rows, 15]
        road_condition = road_elevation_df.iloc[rows, 10]
        road_type = road_elevation_df.iloc[rows, 9]
        

        if start_longitude in intersection_longitude_list:
            if start_latitude in intersection_latitude_list:
                processed_data.append((fid, start_longitude, start_latitude, elevation, distance, road_condition, road_type, True, False, False))
            
        else:
            processed_data.append((fid, start_longitude, start_latitude, elevation, distance, road_condition, road_type, False, False, False))

    processed_data = pd.DataFrame(processed_data)

    processed_data = processed_data.rename(
        columns={0: "FID", 1: "Longitude", 2: "Latitude", 3: "Elevation", 4: "Distance", 5: "Road Condition", 6: "Road Type",
                 7: "Intersection Node", 8: "Road Start", 9: "Road End"})

    return processed_data

road_elevation_nodes = road_elevation_processing(ROAD_POINT_WITH_ELEVATION_DATA, intersection_nodes)
road_elevation_nodes

In [None]:
final_list = []
# road_elevation_nodes.sort_values(by=["FID"])

road_start_long = road_line_nodes["Start Longitude"].values
road_start_lat = road_line_nodes["Start Latitude"].values
road_end_long = road_line_nodes["End Longitude"].values
road_end_lat = road_line_nodes["End Latitude"].values

for rows in range(len(road_elevation_nodes.index)):
    longitude = road_elevation_nodes.iloc[rows, 1]
    latitude = road_elevation_nodes.iloc[rows, 2]
    
    if longitude in road_start_long:
        if latitude in road_start_lat:
            road_elevation_nodes.iloc[rows, 8] = True
    if longitude in road_start_long:
        if latitude in road_start_lat:
            road_elevation_nodes.iloc[rows, 9] = True
    else:
        continue
    
road_elevation_nodes

In [None]:
start_list = []
end_list = [] 

for row in range(len(road_elevation_nodes.index)):
    if road_elevation_nodes.iloc[row, 8]:
        start_list.append(road_elevation_nodes.iloc[row])
    elif road_elevation_nodes.iloc[row, 9]:
        end_list.append(road_elevation_nodes.iloc[row])

start_list = pd.DataFrame(start_list)
end_list = pd.DataFrame(end_list)

print(len(start_list), len(end_list))
start_list

In [None]:
print(start_list['FID'].nunique())
print(end_list['FID'].nunique())

In [None]:
distance_check_list = []

start_list.sort_values(by="FID")
end_list.sort_values(by="FID")

for start_list_row in range(len(start_list.index)):
    fid_reoccurance = []
    store_fid_1 = start_list.iloc[start_list_row, 0]
    
    
    if store_fid_1 not in fid_reoccurance:
        for end_list_row in range(len(end_list.index)):
            store_fid_2 = end_list.iloc[end_list_row, 0]

            if store_fid_1 == store_fid_2 and store_fid_1 not in fid_reoccurance:
                distance_check_list.append([start_list.iloc[start_list_row, 1], start_list.iloc[start_list_row, 2],
                                            end_list.iloc[end_list_row, 1], end_list.iloc[end_list_row, 2]])
                fid_reoccurance.append(store_fid_2)
    
distance_check_list = pd.DataFrame(distance_check_list)
distance_check_list

In [None]:
intersection_confirm = []

for row in range(len(road_elevation_nodes.index)):
    if road_elevation_nodes.iloc[row][5]:
        lat = road_elevation_nodes.iloc[row, 1]
        long = road_elevation_nodes.iloc[row, 2]
        intersection_confirm.append([lat, long])
    else:
        continue

intersection_confirm = pd.DataFrame(intersection_confirm)
intersection_confirm = intersection_confirm.rename(
        columns={0: "Longitude", 1: "Latitude"})
intersection_confirm = intersection_confirm.reset_index()
intersection_confirm = intersection_confirm.drop("index", axis = 1)

intersection_confirm.sort_values(by=["Longitude"], ascending=False)
intersection_confirm

In [None]:
not_equal = []
compare_val = len(intersection_nodes.index) - len(intersection_confirm.index)

intersection_confirm_longitude_list = intersection_confirm["Longitude"].values
intersection_confirm_latitude_list = intersection_confirm["Latitude"].values

for row in range(len(intersection_nodes.index) - compare_val):
    if intersection_nodes.iloc[row, 0] in intersection_confirm_longitude_list:
        if intersection_nodes.iloc[row, 1] in intersection_confirm_latitude_list:
            continue
        else:
            not_equal.append(intersection_nodes.iloc[row])
    else:
        not_equal.append(intersection_nodes.iloc[row])

print(compare_val)
not_equal = pd.DataFrame(not_equal)
not_equal = not_equal.rename(
        columns={0: "Longitude", 1: "Latitude"})

not_equal = not_equal.reset_index()
not_equal = not_equal.drop("index", axis = 1)
not_equal.sort_values(by=["Longitude"], ascending=False)
not_equal