# General Stuff

---

In [61]:
import os
import geopandas as gpd
import pandas as pd

import numpy as np
import math
from math import sin, cos, radians, pi

from pyproj import Geod

In [62]:
SCRIPT_PATH = "/Users/rachelralph/Downloads/AI4PublicHealth-main 3"

# Read the road points w/ elevation .shp file via geopandas and store as a pandas dataframe.
ROAD_POINT_WITH_ELEVATION_PATH = os.path.join(SCRIPT_PATH, "Datasets/MZUZU_roads_pointdata_with_elevation.shp")
ROAD_POINT_WITH_ELEVATION_DATA = gpd.read_file(ROAD_POINT_WITH_ELEVATION_PATH)
ROAD_POINT_WITH_ELEVATION_DATA = pd.DataFrame(ROAD_POINT_WITH_ELEVATION_DATA)

extra_data = os.path.join(SCRIPT_PATH, "Datasets/MZUZU_roads_lines_CORRECT.shp")
extra_data = gpd.read_file(extra_data)
extra_data = pd.DataFrame(extra_data)
extra_data['geometry']

0       LINESTRING (33.98535 -11.39052, 33.98531 -11.3...
1       LINESTRING (33.95740 -11.39722, 33.95749 -11.3...
2       LINESTRING (33.96021 -11.39077, 33.96037 -11.3...
3       LINESTRING (33.98934 -11.39285, 33.98935 -11.3...
4       LINESTRING (34.04635 -11.38784, 34.04626 -11.3...
                              ...                        
5464    LINESTRING (33.97063 -11.44921, 33.97054 -11.4...
5465    LINESTRING (33.96980 -11.45031, 33.96959 -11.4...
5466    LINESTRING (33.96447 -11.43679, 33.96440 -11.4...
5467    LINESTRING (33.96737 -11.44572, 33.96719 -11.4...
5468    LINESTRING (33.96876 -11.45078, 33.96893 -11.4...
Name: geometry, Length: 5469, dtype: geometry

In [63]:
INCLUDE_MULTILINE = False

def road_line_processing(road_line_df):
    """Clean the .shp file that contains the route data. Create a second pandas data frame to store a processed
        version of the original data from the .shp file. """

    processed_data_line = []

    for rows in range(len(road_line_df.index)):
        coordinates_line = road_line_df.iloc[rows, 11]
        string_type = (type(coordinates_line))

        if INCLUDE_MULTILINE:
            if str(string_type) == "<class 'shapely.geometry.linestring.LineString'>":
                coordinates_line = list(coordinates_line.coords)

                start_longitude_line = coordinates_line[0][0]
                start_latitude_line = coordinates_line[0][1]
                end_longitude_line = coordinates_line[-1][0]
                end_latitude_line = coordinates_line[-1][1]

                processed_data_line.append(
                    (start_longitude_line, start_latitude_line, end_longitude_line, end_latitude_line))

            elif str(string_type) != "<class 'shapely.geometry.linestring.MultiLineString'>":
                for item in coordinates_line:
                    coordinates_line = item

                    coordinates_line = list(coordinates_line.coords)

                    start_longitude_line = coordinates_line[0][0]
                    start_latitude_line = coordinates_line[0][1]
                    end_longitude_line = coordinates_line[-1][0]
                    end_latitude_line = coordinates_line[-1][1]

                    processed_data_line.append(
                        (start_longitude_line, start_latitude_line, end_longitude_line, end_latitude_line))

            else:
                print("There is a unique string type that is neither LineString or MultiString:")
                print("    ", string_type)

        else:
            if str(string_type) == "<class 'shapely.geometry.linestring.LineString'>":
                coordinates_line = list(coordinates_line.coords)

                start_longitude_line = round(coordinates_line[0][0], 6)
                start_latitude_line = round(coordinates_line[0][1], 6)
                end_longitude_line = round(coordinates_line[-1][0], 6)
                end_latitude_line = round(coordinates_line[-1][1], 6)

                processed_data_line.append(
                    (start_longitude_line, start_latitude_line, end_longitude_line, end_latitude_line))

            else:
                continue

    processed_data_line = pd.DataFrame(processed_data_line)
    processed_data_line = processed_data_line.rename(
        columns={0: "Start Longitude", 1: "Start Latitude", 2: "End Longitude", 3: "End Latitude"})

    return processed_data_line

road_line_nodes = road_line_processing(extra_data)
road_line_nodes

Unnamed: 0,Start Longitude,Start Latitude,End Longitude,End Latitude
0,33.985349,-11.390521,33.984215,-11.369368
1,33.957398,-11.397216,34.048045,-11.394335
2,33.960214,-11.390768,33.962719,-11.399591
3,33.989335,-11.392850,33.992700,-11.409748
4,34.046345,-11.387836,34.044863,-11.388096
...,...,...,...,...
5458,33.970628,-11.449207,33.970089,-11.448832
5459,33.969799,-11.450311,33.969593,-11.450297
5460,33.964466,-11.436791,33.963564,-11.436120
5461,33.967368,-11.445715,33.964825,-11.444966


In [64]:
for index, row in road_line_nodes.iterrows(): 
    if row["Start Latitude"] == -11.464411:
        print(index)
        print(row)

# True values for 37757 <- From point data
# Longitude           33.978497
# Latitude           -11.464378
        
# Predicted values of 37758 from (long, lat, dist, angle) of 37757
# To Longitude        33.978497
# To Latitude        -11.464378
    
# True values for 37758 <- From point data
# Longitude           33.978517
# Latitude           -11.464411

5386
Start Longitude    33.978517
Start Latitude    -11.464411
End Longitude      33.977777
End Latitude      -11.464624
Name: 5386, dtype: float64


# Create DataFrame

---

In [65]:
test = 1

radius_earth = 6378.137

def road_elevation_processing(road_elevation_df):
    """Clean the .shp file that contains the route data. Create a second pandas data frame to store a processed
        version of the original data from the .shp file. """

    # Create a secondary pandas data frame that contains the index of nodes, start/end longitude and latitude,
    # elevation, road condition, and road type.
    processed_data = []

    
    for rows in range(len(road_elevation_df.index)):
        
        # Get the coordinates.
        coordinates = list(road_elevation_df.iloc[rows, 22].coords)
        start_latitude = coordinates[0][1] # Contains max 7 decimal points
        start_longitude = coordinates[0][0] # Contains max 7 decimal points

        # Get the elevation, road condition, and road type.
        elevation = road_elevation_df.iloc[rows, 17]
        road_condition = road_elevation_df.iloc[rows, 10]
        road_type = road_elevation_df.iloc[rows, 9]
        
        # Convert distance and angle to connecting node coords. Note the rounding is to 7 decimal points.
        distance = road_elevation_df.iloc[rows, 15] # Contains max 14 decimal points
        angle = road_elevation_df.iloc[rows, 16]# Contains max 14 decimal points
            
        if test == 1:
            radians = math.radians(angle) # Contains max 16 decimal points


            # Convert the start_longitude and latitude to radians.
            from_long = math.radians(start_longitude) # Contains max 16 data points
            from_lat = math.radians(start_latitude)# Contains 17 data points

            # print(len(str(distance)), len(str(angle)), len(str(radians)), len(str(from_long)), len(str(from_lat)))

            # to_lat has 17 decimal points. to_long has 16 decimal points
            to_lat = math.asin(sin(from_lat) * 
                               cos(distance / radius_earth) + 
                               cos(from_lat) * 
                               sin(distance / radius_earth) * 
                               math.cos(radians))
            to_long = from_long + math.atan2(sin(radians) * sin(distance / radius_earth) * cos(from_lat), 
                                             cos(distance / radius_earth) - sin(from_lat) * sin(to_lat))

            to_lat = math.degrees(to_lat) # + 0.0000428
            to_long = math.degrees(to_long) # + (-0.0000254)
    
        else:
            geod = Geod(ellps="WGS84")
            
            to_long, to_lat, _ = geod.fwd(start_longitude, start_latitude, angle, distance, radians = False)
        
        start_longitude = round(start_longitude, 7)
        start_latitude = round(start_latitude, 7)
        to_lat = round((to_lat), 7)
        to_long = round(to_long, 7)

        processed_data.append((start_longitude, start_latitude, elevation, distance, angle, to_long, to_lat, road_condition, road_type, [], []))

    processed_data = pd.DataFrame(processed_data)

    processed_data = processed_data.rename(
        columns={0: "Longitude", 1: "Latitude", 2: "Elevation", 3: "Distance", 4: "Angle", 5: "To Longitude", 6: "To Latitude", 7: "Road Condition", 8: "Road Type", 9: "Connection(s)"})

    return processed_data


road_elevation_nodes = road_elevation_processing(ROAD_POINT_WITH_ELEVATION_DATA)

road_elevation_nodes

Unnamed: 0,Longitude,Latitude,Elevation,Distance,Angle,To Longitude,To Latitude,Road Condition,Road Type,Connection(s),10
0,34.029856,-11.458530,1286,0.000623,143.665285,34.029860,-11.458534,,path,[],[]
1,34.028073,-11.458322,1279,0.000674,68.613613,34.028078,-11.458320,,track,[],[]
2,34.028159,-11.458254,1279,0.000784,68.912626,34.028165,-11.458251,,track,[],[]
3,34.028325,-11.458243,1277,0.000950,92.006825,34.028334,-11.458243,,track,[],[]
4,34.028443,-11.458259,1277,0.001069,116.018906,34.028452,-11.458263,,track,[],[]
...,...,...,...,...,...,...,...,...,...,...,...
58127,34.034627,-11.458959,1270,0.002183,78.742212,34.034646,-11.458956,,path,[],[]
58128,34.034729,-11.458953,1270,0.002285,94.031740,34.034749,-11.458954,,path,[],[]
58129,34.034887,-11.458986,1275,0.002447,100.120003,34.034909,-11.458990,,path,[],[]
58130,34.029513,-11.458301,1282,0.000201,102.616360,34.029515,-11.458301,,path,[],[]


# Check that the math is correct

---

In [66]:
# Test = 1, dp = 7
lat1 = road_elevation_nodes.loc[37758, "Latitude"]
long1 = road_elevation_nodes.loc[37758, "Longitude"]
lat2 = road_elevation_nodes.loc[37757, "To Latitude"]
long2 = road_elevation_nodes.loc[37757, "To Longitude"]

print(lat1, long1)
print(lat2, long2)
print("")
lat_diff = lat1 - lat2
long_diff = long1 - long2
print("Lat diff", f'{lat_diff:.7f}')
print("Long diff", f'{long_diff:.7f}')

-11.4644109 33.9785172
-11.4643877 33.9785039

Lat diff -0.0000232
Long diff 0.0000133


In [67]:
# Test = 2, dp = 7, 37757-37758
lat1 = road_elevation_nodes.loc[37758, "Latitude"]
long1 = road_elevation_nodes.loc[37758, "Longitude"]
lat2 = road_elevation_nodes.loc[37757, "To Latitude"]
long2 = road_elevation_nodes.loc[37757, "To Longitude"]

print(lat1, long1)
print(lat2, long2)
print("")
lat_diff = lat1 - lat2
long_diff = long1 - long2
print("Lat diff", f'{lat_diff:.7f}')
print("Long diff", f'{long_diff:.7f}')

-11.4644109 33.9785172
-11.4643877 33.9785039

Lat diff -0.0000232
Long diff 0.0000133


In [68]:
# Test = 1, dp = 7, 51658-51659
lat1 = road_elevation_nodes.loc[51659, "Latitude"]
long1 = road_elevation_nodes.loc[51659, "Longitude"]
lat2 = road_elevation_nodes.loc[51658, "To Latitude"]
long2 = road_elevation_nodes.loc[51658, "To Longitude"]

print(lat1, long1)
print(lat2, long2)
print("")
lat_diff = lat1 - lat2
long_diff = long1 - long2
print("Lat diff", f'{lat_diff:.7f}')
print("Long diff", f'{long_diff:.7f}')

-11.4712136 34.0540908
-11.4712806 34.0540154

Lat diff 0.0000670
Long diff 0.0000754


In [69]:
# Test = 2, dp = 7, 51658-51659
lat1 = road_elevation_nodes.loc[51659, "Latitude"]
long1 = road_elevation_nodes.loc[51659, "Longitude"]
lat2 = road_elevation_nodes.loc[51658, "To Latitude"]
long2 = road_elevation_nodes.loc[51658, "To Longitude"]

print(lat1, long1)
print(lat2, long2)
print("")
lat_diff = lat1 - lat2
long_diff = long1 - long2
print("Lat diff", f'{lat_diff:.7f}')
print("Long diff", f'{long_diff:.7f}')

-11.4712136 34.0540908
-11.4712806 34.0540154

Lat diff 0.0000670
Long diff 0.0000754


In [70]:
road_elevation_long_list = road_elevation_nodes["Longitude"].values
print(road_elevation_long_list)

road_elevation_lat_list = road_elevation_nodes["Latitude"].values
print(road_elevation_lat_list)

if 34.029860 in road_elevation_long_list:
    print("Is here")
if -11.4585340 in road_elevation_lat_list:
    print("Is here too")

[34.0298563 34.0280727 34.0281585 ... 34.0348868 34.029513  34.0296807]
[-11.4585297 -11.458322  -11.4582536 ... -11.4589857 -11.4583009
 -11.4583602]
Is here


In [71]:
long_unique_arr = road_elevation_nodes["Longitude"].unique()
lat_unique_arr = road_elevation_nodes["Latitude"].unique()

print(len(long_unique_arr))
print(len(lat_unique_arr))

52609
53771


# Create Dict

---

In [72]:
road_elevation_dict = {}

for index, row, in road_elevation_nodes.iterrows():
    lat_long_pair = (round(row["To Latitude"], 3), round(row["To Longitude"], 3))
    if lat_long_pair not in road_elevation_dict:
        road_elevation_dict[lat_long_pair] = [index]
    else: 
        road_elevation_dict[lat_long_pair].append(index)

road_elevation_dict

{(-11.459, 34.03): [0, 58105, 58106, 58107, 58108, 58109],
 (-11.458, 34.028): [1, 2, 3, 4, 29, 30, 517, 518],
 (-11.458, 34.029): [5, 6, 7, 8, 9, 10, 11, 57390, 57391, 57392, 57393, 57394],
 (-11.459, 34.033): [12, 13, 14, 506, 507],
 (-11.459, 34.034): [15, 58119, 58120, 58121, 58122, 58123, 58124, 58125],
 (-11.457, 34.046): [16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26],
 (-11.457, 34.047): [27, 28, 57435, 57436, 57437],
 (-11.46, 34.038): [31, 32, 33, 34, 42, 43, 44, 45, 46, 57832, 57833, 57834],
 (-11.46, 34.037): [35,
  57831,
  57986,
  57987,
  57988,
  57989,
  57990,
  57991,
  57992],
 (-11.46, 34.036): [36,
  37,
  57828,
  57829,
  57830,
  57856,
  58091,
  58092,
  58113,
  58114,
  58115,
  58116,
  58117,
  58118],
 (-11.458, 34.045): [38,
  55035,
  55055,
  55056,
  55057,
  55058,
  55059,
  55060,
  55061,
  55062,
  55063,
  55064,
  55065,
  55066,
  55325,
  55370],
 (-11.459, 34.037): [39,
  40,
  41,
  54867,
  54868,
  55394,
  55395,
  55424,
  57993,
  57994

In [78]:
for index, row in road_elevation_nodes.iterrows():
    lat_long_pair = (round(row["Latitude"], 3), round(row["Longitude"], 3))
    
    if lat_long_pair in road_elevation_dict:
        fill_dataframe = road_elevation_dict[lat_long_pair]
        data_list = road_elevation_nodes.iat[index, 9]
        data_list = np.append(data_list, fill_dataframe)
        road_elevation_nodes.iat[index, 9] = data_list
        
    

road_elevation_nodes

Unnamed: 0,Longitude,Latitude,Elevation,Distance,Angle,To Longitude,To Latitude,Road Condition,Road Type,Connection(s),10
0,34.029856,-11.458530,1286,0.000623,143.665285,34.029860,-11.458534,,path,"[0.0, 58105.0, 58106.0, 58107.0, 58108.0, 5810...",[]
1,34.028073,-11.458322,1279,0.000674,68.613613,34.028078,-11.458320,,track,"[1.0, 2.0, 3.0, 4.0, 29.0, 30.0, 517.0, 518.0]",[]
2,34.028159,-11.458254,1279,0.000784,68.912626,34.028165,-11.458251,,track,"[1.0, 2.0, 3.0, 4.0, 29.0, 30.0, 517.0, 518.0]",[]
3,34.028325,-11.458243,1277,0.000950,92.006825,34.028334,-11.458243,,track,"[1.0, 2.0, 3.0, 4.0, 29.0, 30.0, 517.0, 518.0]",[]
4,34.028443,-11.458259,1277,0.001069,116.018906,34.028452,-11.458263,,track,"[1.0, 2.0, 3.0, 4.0, 29.0, 30.0, 517.0, 518.0]",[]
...,...,...,...,...,...,...,...,...,...,...,...
58127,34.034627,-11.458959,1270,0.002183,78.742212,34.034646,-11.458956,,path,"[54991.0, 54992.0, 54993.0, 54994.0, 54995.0, ...",[]
58128,34.034729,-11.458953,1270,0.002285,94.031740,34.034749,-11.458954,,path,"[54991.0, 54992.0, 54993.0, 54994.0, 54995.0, ...",[]
58129,34.034887,-11.458986,1275,0.002447,100.120003,34.034909,-11.458990,,path,"[54991.0, 54992.0, 54993.0, 54994.0, 54995.0, ...",[]
58130,34.029513,-11.458301,1282,0.000201,102.616360,34.029515,-11.458301,,path,"[54565.0, 58130.0, 58131.0]",[]


In [82]:
def calculate_euclidean_dist(lat1, long1, lat2, long2): 
    return math.sqrt((lat2 - lat1)**2 + (long2 - long1)**2)
    
    

In [97]:
class Node: 

    def __init__(self, id):
        self.id = id
        self.connections = {}

    def fill_connections(self, dataframe):
        connections_list = dataframe.iat[self.id, 9]
        self_lat, self_long = dataframe.iat[index, 0], dataframe.iat[index, 1]
        for connection in connections_list: 
            connection = int(connection)
            connect_lat, connect_long = dataframe.iat[connection, 0], dataframe.iat[connection, 1]
            dist = calculate_euclidean_dist(self_lat, self_long, connect_lat, connect_long)
            self.connections[connection] = dist

In [100]:
class Graph:
    def __init__(self, dataframe):
        self.lat_long_nodes = {}
        for index, row in dataframe.iterrows():
            node_lat = row["Latitude"]
            node_long = row["Longitude"]
            node = Node(index)
            node.fill_connections(dataframe)
            self.lat_long_nodes[(node_lat, node_long)] = node
            

In [None]:
class state_node():
    def __init__(self, id, node, curr_dist, remain_dist, total_dist)
        self.id = id
        self.graph_node = node
        self.curr_dist = curr_dist
        self.remain_dist = remain_dist
        self.total_dist = total_dist

In [None]:
class priority_queue():
    def __init__(self):
        queue = []
    
    def pop(self):
        
    

In [101]:
graph = Graph(road_elevation_nodes)

In [None]:
def a_star_search(graph, start_node, end_node):
    
    
    

# More Testing

---

In [74]:
for index, row in road_elevation_nodes.iterrows(): 
    if row["Distance"] == 0.00125705485326:
        print(index)
    if row["Distance"] == 0.00129511125876:
        print(index)
    if row["Distance"] == 0.00035137504829:
        print(index)
    if row["Distance"] == 0.00024823242858:
        print(index)


37757
37758
51658
51659


In [75]:
road_elevation_nodes.iloc[37757]
road_elevation_nodes.iloc[37757, 4]

145.56988247303306

In [76]:
road_elevation_nodes.iloc[37758]
road_elevation_nodes.iloc[37758, 3]

0.00129511125876

In [77]:
road_elevation_nodes.iloc[51658]
road_elevation_nodes.iloc[51658, 4]

51.59403101345294