# General Stuff
---

In [1]:
import os
import geopandas as gpd
import pandas as pd

import numpy as np
import math
from math import sin, cos, radians, pi

from pyproj import Geod

In [2]:
# Get script and dataset file paths.
SCRIPT_PATH = "/Users/Syrin/Documents/GitHub/AI4PublicHealth/"

# Read the road line data .shp file via geopandas and store as a pandas dataframe.
ROAD_LINE_PATH = os.path.join(SCRIPT_PATH, "Datasets/MZUZU_roads_lines_CORRECT.shp")
ROAD_LINE_DATA = gpd.read_file(ROAD_LINE_PATH)
ROAD_LINE_DATA = pd.DataFrame(ROAD_LINE_DATA)
INCLUDE_MULTILINE = False

0       LINESTRING (33.98535 -11.39052, 33.98531 -11.3...
1       LINESTRING (33.95740 -11.39722, 33.95749 -11.3...
2       LINESTRING (33.96021 -11.39077, 33.96037 -11.3...
3       LINESTRING (33.98934 -11.39285, 33.98935 -11.3...
4       LINESTRING (34.04635 -11.38784, 34.04626 -11.3...
                              ...                        
5464    LINESTRING (33.97063 -11.44921, 33.97054 -11.4...
5465    LINESTRING (33.96980 -11.45031, 33.96959 -11.4...
5466    LINESTRING (33.96447 -11.43679, 33.96440 -11.4...
5467    LINESTRING (33.96737 -11.44572, 33.96719 -11.4...
5468    LINESTRING (33.96876 -11.45078, 33.96893 -11.4...
Name: geometry, Length: 5469, dtype: geometry

# Create DataFrame
---

In [50]:
def road_line_processing(road_line_df):
    """Clean the .shp file that contains the route data. Create a second pandas data frame to store a processed
        version of the original data from the .shp file. """

    processed_data_line = []

    for rows in range(len(road_line_df.index)):
        coordinates_line = road_line_df.iloc[rows, 11]
        string_type = (type(coordinates_line))

        if INCLUDE_MULTILINE:
            if str(string_type) == "<class 'shapely.geometry.linestring.LineString'>":
                coordinates_line = list(coordinates_line.coords)

                start_longitude_line = coordinates_line[0][0]
                start_latitude_line = coordinates_line[0][1]
                end_longitude_line = coordinates_line[-1][0]
                end_latitude_line = coordinates_line[-1][1]

                processed_data_line.append(
                    (start_longitude_line, start_latitude_line, end_longitude_line, end_latitude_line))

            elif str(string_type) != "<class 'shapely.geometry.linestring.MultiLineString'>":
                for item in coordinates_line:
                    coordinates_line = item

                    coordinates_line = list(coordinates_line.coords)

                    start_longitude_line = coordinates_line[0][0]
                    start_latitude_line = coordinates_line[0][1]
                    end_longitude_line = coordinates_line[-1][0]
                    end_latitude_line = coordinates_line[-1][1]

                    processed_data_line.append(
                        (start_longitude_line, start_latitude_line, end_longitude_line, end_latitude_line))

            else:
                print("There is a unique string type that is neither LineString or MultiString:")
                print("    ", string_type)

        else:
            if str(string_type) == "<class 'shapely.geometry.linestring.LineString'>":
                coordinates_line = list(coordinates_line.coords)

                start_longitude_line = round(coordinates_line[0][0], 5)
                start_latitude_line = round(coordinates_line[0][1], 5)
                end_longitude_line = round(coordinates_line[-1][0], 5)
                end_latitude_line = round(coordinates_line[-1][1], 5)

                processed_data_line.append(
                    (start_latitude_line, start_longitude_line, end_latitude_line, end_longitude_line, []))

            else:
                continue

    processed_data_line = pd.DataFrame(processed_data_line)
    processed_data_line = processed_data_line.rename(
        columns={0: "Start Latitude", 1: "Start Longitude", 2: "End Latitude", 3: "End Longitude", 4: "Connection(s)"})

    return processed_data_line

road_line_nodes = road_line_processing(extra_data)
road_line_nodes

Unnamed: 0,Start Latitude,Start Longitude,End Latitude,End Longitude,Connection(s)
0,-11.39052,33.98535,-11.36937,33.98421,[]
1,-11.39722,33.95740,-11.39434,34.04805,[]
2,-11.39077,33.96021,-11.39959,33.96272,[]
3,-11.39285,33.98934,-11.40975,33.99270,[]
4,-11.38784,34.04635,-11.38810,34.04486,[]
...,...,...,...,...,...
5458,-11.44921,33.97063,-11.44883,33.97009,[]
5459,-11.45031,33.96980,-11.45030,33.96959,[]
5460,-11.43679,33.96447,-11.43612,33.96356,[]
5461,-11.44572,33.96737,-11.44497,33.96482,[]


# More Testing
---

In [52]:
road_line_nodes.iloc[442]

Start Latitude    -11.40911
Start Longitude    34.00283
End Latitude      -11.39923
End Longitude      33.99528
Connection(s)            []
Name: 442, dtype: object

In [45]:
while index <= 5:
    for index, row in road_line_nodes.iterrows():
        start_lat = row["Start Latitude"]
        start_long = row["Start Longitude"]
        end_lat = row["End Latitude"]
        end_long = row["End Longitude"]
        for sub_index, sub_row in road_line_nodes.iterrows(): 
            if sub_row["Start Latitude"] == start_lat:
                print(index)
            if sub_row["End Latitude"] == end_lat:
                print(index)
            if sub_row["Start Longitude"] == start:
                print(index)
            if sub_row["End Longitude"] == end_long:
                print(index)


0
0
1
1
1
2
2
3
3
3
4
4
5
5
5
6
6
7
7
7
8
8
9
9
9
10
10
11
11
12
12
12
13
13
13
13
13
14
14
14
14
14
15
15
15
15
16
16
17
17
18
18
19
19
19
20
20
21
21
22
22
22
23
23
24
24
24
25
25
26
26
27
27
28
28


KeyboardInterrupt: 

# Create Dict
---

In [53]:
road_line_dict = {}

for index, row, in road_line_nodes.iterrows():
    lat_long_pair = (row["End Latitude"], row["End Longitude"])
    
    if lat_long_pair not in road_line_dict:
        road_line_dict[lat_long_pair] = [index]
        
    else: 
        road_line_dict[lat_long_pair].append(index)

road_line_dict

{(-11.36937, 33.98421): [0],
 (-11.39434, 34.04805): [1, 60],
 (-11.39959, 33.96272): [2],
 (-11.40975, 33.9927): [3, 991],
 (-11.3881, 34.04486): [4],
 (-11.39259, 33.97742): [5],
 (-11.39088, 33.98669): [6],
 (-11.39119, 33.98829): [7],
 (-11.3918, 33.97158): [8],
 (-11.39186, 33.96226): [9],
 (-11.38065, 34.04293): [10],
 (-11.42763, 34.00329): [11],
 (-11.37965, 33.98807): [12, 19],
 (-11.38019, 33.98932): [13, 105],
 (-11.38402, 33.98802): [14, 39],
 (-11.38176, 33.98776): [15],
 (-11.38178, 33.98295): [16],
 (-11.38161, 33.98377): [17],
 (-11.38189, 33.98652): [18],
 (-11.37604, 33.985): [20],
 (-11.3786, 33.98519): [21],
 (-11.37699, 33.98608): [22],
 (-11.37579, 33.99223): [23],
 (-11.37429, 33.98969): [24, 29],
 (-11.37397, 33.98811): [25],
 (-11.3764, 33.98073): [26],
 (-11.37333, 33.9874): [27],
 (-11.37153, 33.98322): [28],
 (-11.3698, 33.98943): [30, 237],
 (-11.38345, 33.97469): [31],
 (-11.39208, 34.02455): [32],
 (-11.38937, 33.99061): [33],
 (-11.38839, 33.98581): [34]

In [64]:
for index, row in road_line_nodes.iterrows():
    lat_long_pair = (row["Start Latitude"], row["Start Longitude"])
    
    if lat_long_pair in road_line_dict:
        
        fill_dataframe = road_line_dict[lat_long_pair]
        fill_dataframe_list = []
        fill_dataframe_list.append(fill_dataframe[x] for x in range(len()))
        
        print(fill_dataframe, fill_dataframe_list)
        
        road_line_nodes.loc[index, "Connection(s)"] += fill_dataframe_list

[442] [[442]]


  return array(a, dtype, copy=False, order=order)


ValueError: Must have equal len keys and value when setting with an iterable

In [61]:
road_line_nodes

Unnamed: 0,Start Latitude,Start Longitude,End Latitude,End Longitude,Connection(s)
0,-11.39052,33.98535,-11.36937,33.98421,[]
1,-11.39722,33.95740,-11.39434,34.04805,[]
2,-11.39077,33.96021,-11.39959,33.96272,[]
3,-11.39285,33.98934,-11.40975,33.99270,[]
4,-11.38784,34.04635,-11.38810,34.04486,[]
...,...,...,...,...,...
5458,-11.44921,33.97063,-11.44883,33.97009,4735
5459,-11.45031,33.96980,-11.45030,33.96959,5125
5460,-11.43679,33.96447,-11.43612,33.96356,[]
5461,-11.44572,33.96737,-11.44497,33.96482,4318
