# General Stuff
---

In [1]:
import os
import geopandas as gpd
import pandas as pd

import numpy as np

In [2]:
# Get script and dataset file paths.
SCRIPT_PATH = "/Users/Syrin/Documents/GitHub/AI4PublicHealth/"

# Read the road line data .shp file via geopandas and store as a pandas dataframe.
ROAD_LINE_PATH = os.path.join(SCRIPT_PATH, "Datasets/MZUZU_roads_lines_CORRECT.shp")
ROAD_LINE_DATA = gpd.read_file(ROAD_LINE_PATH)
ROAD_LINE_DATA = pd.DataFrame(ROAD_LINE_DATA)
INCLUDE_MULTILINE = False

# Create DataFrame
---

In [3]:
def road_line_processing(road_line_df):
    """Clean the .shp file that contains the route data. Create a second pandas data frame to store a processed
        version of the original data from the .shp file. """

    processed_data_line = []

    for rows in range(len(road_line_df.index)):
        coordinates_line = road_line_df.iloc[rows, 11]
        string_type = (type(coordinates_line))

        if INCLUDE_MULTILINE:
            if str(string_type) == "<class 'shapely.geometry.linestring.LineString'>":
                coordinates_line = list(coordinates_line.coords)

                start_longitude_line = coordinates_line[0][0]
                start_latitude_line = coordinates_line[0][1]
                end_longitude_line = coordinates_line[-1][0]
                end_latitude_line = coordinates_line[-1][1]

                processed_data_line.append(
                    (start_longitude_line, start_latitude_line, end_longitude_line, end_latitude_line))

            elif str(string_type) != "<class 'shapely.geometry.linestring.MultiLineString'>":
                for item in coordinates_line:
                    coordinates_line = item

                    coordinates_line = list(coordinates_line.coords)

                    start_longitude_line = coordinates_line[0][0]
                    start_latitude_line = coordinates_line[0][1]
                    end_longitude_line = coordinates_line[-1][0]
                    end_latitude_line = coordinates_line[-1][1]

                    processed_data_line.append(
                        (start_longitude_line, start_latitude_line, end_longitude_line, end_latitude_line))

            else:
                print("There is a unique string type that is neither LineString or MultiString:")
                print("    ", string_type)

        else:
            if str(string_type) == "<class 'shapely.geometry.linestring.LineString'>":
                coordinates_line = list(coordinates_line.coords)

                start_longitude_line = round(coordinates_line[0][0], 5)
                start_latitude_line = round(coordinates_line[0][1], 5)
                end_longitude_line = round(coordinates_line[-1][0], 5)
                end_latitude_line = round(coordinates_line[-1][1], 5)

                processed_data_line.append(
                    (start_latitude_line, start_longitude_line, end_latitude_line, end_longitude_line, np.array([])))

            else:
                continue

    processed_data_line = pd.DataFrame(processed_data_line)
    processed_data_line = processed_data_line.rename(
        columns={0: "Start Latitude", 1: "Start Longitude", 2: "End Latitude", 3: "End Longitude", 4: "Connection(s)"})

    return processed_data_line

road_line_nodes = road_line_processing(ROAD_LINE_DATA)
road_line_nodes

Unnamed: 0,Start Latitude,Start Longitude,End Latitude,End Longitude,Connection(s)
0,-11.39052,33.98535,-11.36937,33.98421,[]
1,-11.39722,33.95740,-11.39434,34.04805,[]
2,-11.39077,33.96021,-11.39959,33.96272,[]
3,-11.39285,33.98934,-11.40975,33.99270,[]
4,-11.38784,34.04635,-11.38810,34.04486,[]
...,...,...,...,...,...
5458,-11.44921,33.97063,-11.44883,33.97009,[]
5459,-11.45031,33.96980,-11.45030,33.96959,[]
5460,-11.43679,33.96447,-11.43612,33.96356,[]
5461,-11.44572,33.96737,-11.44497,33.96482,[]


# Testing
---

In [4]:
road_line_nodes.iloc[442]

Start Latitude    -11.40911
Start Longitude    34.00283
End Latitude      -11.39923
End Longitude      33.99528
Connection(s)            []
Name: 442, dtype: object

# Create Dicts
---

In [5]:
start_road_line_dict = {}
end_road_line_dict = {}

for index, row, in road_line_nodes.iterrows():
    start_lat_long_pair = (row["Start Latitude"], row["Start Longitude"])
    end_lat_long_pair = (row["End Latitude"], row["End Longitude"])
    
    if start_lat_long_pair not in start_road_line_dict:
        start_road_line_dict[start_lat_long_pair] = [index]
    else: 
        start_road_line_dict[start_lat_long_pair].append(index)
        
    if end_lat_long_pair not in end_road_line_dict:
        end_road_line_dict[end_lat_long_pair] = [index]
    else: 
        end_road_line_dict[end_lat_long_pair].append(index)

In [6]:
start_road_line_dict

{(-11.39052, 33.98535): [0],
 (-11.39722, 33.9574): [1],
 (-11.39077, 33.96021): [2],
 (-11.39285, 33.98934): [3],
 (-11.38784, 34.04635): [4],
 (-11.4081, 33.98253): [5],
 (-11.40163, 33.98339): [6],
 (-11.39923, 33.99528): [7, 849],
 (-11.38448, 33.96634): [8],
 (-11.38983, 33.96197): [9],
 (-11.39434, 34.04805): [10],
 (-11.39366, 33.99819): [11],
 (-11.38019, 33.98932): [12],
 (-11.38202, 33.98909): [13],
 (-11.38202, 33.98892): [14, 15],
 (-11.38219, 33.98436): [16],
 (-11.38247, 33.98474): [17],
 (-11.38153, 33.98467): [18],
 (-11.38026, 33.98462): [19],
 (-11.37605, 33.98665): [20],
 (-11.37913, 33.98489): [21],
 (-11.37971, 33.98464): [22],
 (-11.37437, 33.98924): [23],
 (-11.37468, 33.98748): [24],
 (-11.37445, 33.98864): [25],
 (-11.37321, 33.98731): [26],
 (-11.3745, 33.98821): [27],
 (-11.3708, 33.9854): [28],
 (-11.36974, 33.98882): [29],
 (-11.37312, 33.98725): [30],
 (-11.37874, 33.97298): [31],
 (-11.39792, 34.01995): [32],
 (-11.39069, 33.98842): [33],
 (-11.39113, 33.

In [7]:
end_road_line_dict

{(-11.36937, 33.98421): [0],
 (-11.39434, 34.04805): [1, 60],
 (-11.39959, 33.96272): [2],
 (-11.40975, 33.9927): [3, 991],
 (-11.3881, 34.04486): [4],
 (-11.39259, 33.97742): [5],
 (-11.39088, 33.98669): [6],
 (-11.39119, 33.98829): [7],
 (-11.3918, 33.97158): [8],
 (-11.39186, 33.96226): [9],
 (-11.38065, 34.04293): [10],
 (-11.42763, 34.00329): [11],
 (-11.37965, 33.98807): [12, 19],
 (-11.38019, 33.98932): [13, 105],
 (-11.38402, 33.98802): [14, 39],
 (-11.38176, 33.98776): [15],
 (-11.38178, 33.98295): [16],
 (-11.38161, 33.98377): [17],
 (-11.38189, 33.98652): [18],
 (-11.37604, 33.985): [20],
 (-11.3786, 33.98519): [21],
 (-11.37699, 33.98608): [22],
 (-11.37579, 33.99223): [23],
 (-11.37429, 33.98969): [24, 29],
 (-11.37397, 33.98811): [25],
 (-11.3764, 33.98073): [26],
 (-11.37333, 33.9874): [27],
 (-11.37153, 33.98322): [28],
 (-11.3698, 33.98943): [30, 237],
 (-11.38345, 33.97469): [31],
 (-11.39208, 34.02455): [32],
 (-11.38937, 33.99061): [33],
 (-11.38839, 33.98581): [34]

In [8]:
for index, row in road_line_nodes.iterrows():
    start_lat_long_pair = (row["Start Latitude"], row["Start Longitude"])
    end_lat_long_pair = (row["End Latitude"], row["End Longitude"])
    
    current_indexs = road_line_nodes.iat[index, 4]
    start_index_number = []
    end_index_number = []
    
    if start_lat_long_pair in start_road_line_dict:
        start_index_number = start_road_line_dict[start_lat_long_pair]


    if end_lat_long_pair in end_road_line_dict:
        end_index_number = end_road_line_dict[end_lat_long_pair]
        
    for indexs in np.append(start_index_number, end_index_number):
        if indexs != index and index not in current_indexs:
            print(index, start_index_number, end_index_number, current_indexs)
            current_indexs = np.append(current_indexs, indexs)
            road_line_nodes.iat[index, 4] = current_indexs

1 [1] [1, 60] []
3 [3] [3, 991] []
7 [7, 849] [7] []
12 [12] [12, 19] []
13 [13] [13, 105] []
14 [14, 15] [14, 39] []
14 [14, 15] [14, 39] [15.]
15 [14, 15] [15] []
19 [19] [12, 19] []
24 [24] [24, 29] []
29 [29] [24, 29] []
30 [30] [30, 237] []
39 [39] [14, 39] []
49 [49, 65] [49, 66] []
49 [49, 65] [49, 66] [65.]
50 [50, 51, 53] [50] []
50 [50, 51, 53] [50] [51.]
51 [50, 51, 53] [51, 940] []
51 [50, 51, 53] [51, 940] [50.]
51 [50, 51, 53] [51, 940] [50. 53.]
53 [50, 51, 53] [53] []
53 [50, 51, 53] [53] [50.]
54 [54] [54, 205] []
55 [55] [55, 171] []
60 [60, 1171] [1, 60] []
60 [60, 1171] [1, 60] [1171.]
61 [61, 771] [61] []
62 [62] [62, 69] []
65 [49, 65] [65] []
66 [66] [49, 66] []
67 [67, 154] [67] []
69 [69] [62, 69] []
70 [70] [70, 175] []
89 [89, 91] [89] []
91 [89, 91] [91] []
105 [105] [13, 105] []
107 [107] [107, 270] []
132 [132] [132, 137] []
133 [133] [133, 134] []
134 [134] [133, 134] []
136 [136, 142] [136] []
137 [137] [132, 137] []
141 [141, 144] [141] []
142 [136, 142

627 [627, 784] [627, 779] []
627 [627, 784] [627, 779] [784.]
628 [628] [622, 628, 1048] []
628 [628] [622, 628, 1048] [622.]
637 [637] [637, 1677] []
641 [641, 4773, 4774] [542, 641] []
641 [641, 4773, 4774] [542, 641] [4773.]
641 [641, 4773, 4774] [542, 641] [4773. 4774.]
642 [642, 4772] [642, 652] []
642 [642, 4772] [642, 652] [4772.]
646 [646, 647] [646] []
647 [646, 647] [647] []
649 [649, 718] [649] []
652 [652] [642, 652] []
654 [654, 778] [654] []
655 [655, 656] [655] []
656 [655, 656] [656, 775] []
656 [655, 656] [656, 775] [655.]
659 [659] [659, 973] []
662 [662] [592, 662] []
663 [300, 663, 1053] [663] []
663 [300, 663, 1053] [663] [300.]
664 [664, 1700, 1701] [664, 669] []
664 [664, 1700, 1701] [664, 669] [1700.]
664 [664, 1700, 1701] [664, 669] [1700. 1701.]
669 [669, 1058] [664, 669] []
669 [669, 1058] [664, 669] [1058.]
671 [315, 671, 1049] [671] []
671 [315, 671, 1049] [671] [315.]
676 [676, 783] [676] []
677 [677, 779] [677, 783] []
677 [677, 779] [677, 783] [779.]
687

1288 [1288] [1288, 1513] []
1295 [1295, 1515] [1295] []
1298 [1298, 1303, 1521] [1298, 1520] []
1298 [1298, 1303, 1521] [1298, 1520] [1303.]
1298 [1298, 1303, 1521] [1298, 1520] [1303. 1521.]
1302 [1302, 1479] [1302] []
1303 [1298, 1303, 1521] [1303, 1512] []
1303 [1298, 1303, 1521] [1303, 1512] [1298.]
1303 [1298, 1303, 1521] [1303, 1512] [1298. 1521.]
1306 [1306] [1306, 1484] []
1317 [291, 1155, 1317] [1317] []
1317 [291, 1155, 1317] [1317] [291.]
1318 [1318] [1318, 1319] []
1319 [1319] [1318, 1319] []
1321 [1321] [1321, 1322] []
1322 [1322] [1321, 1322] []
1324 [1324] [1324, 1325] []
1325 [1325] [1324, 1325] []
1326 [1326, 1329] [1326] []
1329 [1326, 1329] [1329, 1330] []
1329 [1326, 1329] [1329, 1330] [1326.]
1330 [1330] [1329, 1330] []
1336 [1336, 1481] [1336, 1341] []
1336 [1336, 1481] [1336, 1341] [1481.]
1338 [1234, 1235, 1338] [1338] []
1338 [1234, 1235, 1338] [1338] [1234.]
1340 [1340, 1482] [1340] []
1341 [1341] [1336, 1341] []
1344 [623, 1344] [1344, 1359] []
1344 [623, 134

2139 [2139, 2141] [2139, 2147] []
2139 [2139, 2141] [2139, 2147] [2141.]
2141 [2139, 2141] [2141] []
2143 [2136, 2143] [2143, 2884] []
2143 [2136, 2143] [2143, 2884] [2136.]
2144 [2144] [2144, 2164] []
2147 [2147, 2433] [2139, 2147] []
2147 [2147, 2433] [2139, 2147] [2433.]
2148 [2148, 2154] [2148] []
2154 [2148, 2154] [2154] []
2157 [2157] [2157, 2174, 2313] []
2157 [2157] [2157, 2174, 2313] [2174.]
2158 [2158, 2161, 2942] [2158] []
2158 [2158, 2161, 2942] [2158] [2161.]
2161 [2158, 2161, 2942] [2161, 2896] []
2161 [2158, 2161, 2942] [2161, 2896] [2158.]
2161 [2158, 2161, 2942] [2161, 2896] [2158. 2942.]
2162 [2162] [2162, 2939] []
2164 [2164] [2144, 2164] []
2165 [2165] [2136, 2165] []
2170 [2170, 2317] [2170] []
2171 [2171, 2518] [2171] []
2174 [2174] [2157, 2174, 2313] []
2174 [2174] [2157, 2174, 2313] [2157.]
2175 [2175, 2177] [2175, 3065] []
2175 [2175, 2177] [2175, 3065] [2177.]
2177 [2175, 2177] [2177, 2674] []
2177 [2175, 2177] [2177, 2674] [2175.]
2179 [2179] [2179, 2431] []


2744 [2744] [2101, 2744] []
2746 [2746, 4872] [2746] []
2749 [2749, 3074] [2749] []
2751 [2751, 3025] [2751] []
2753 [2753, 3027] [2753, 2968] []
2753 [2753, 3027] [2753, 2968] [3027.]
2755 [2755] [2755, 2969] []
2773 [2067, 2773] [2773] []
2775 [2775, 3119] [2775] []
2779 [2779, 3109, 3110] [2779] []
2779 [2779, 3109, 3110] [2779] [3109.]
2780 [2014, 2780] [2780] []
2785 [2785, 2787] [2785] []
2787 [2785, 2787] [2787] []
2795 [2795] [2625, 2795] []
2799 [2799] [2429, 2799] []
2801 [2801] [2801, 2828] []
2809 [2809, 3040] [2809] []
2821 [2821] [1744, 2821] []
2823 [2823, 2959] [2823, 2958, 2960, 2961] []
2823 [2823, 2959] [2823, 2958, 2960, 2961] [2959.]
2823 [2823, 2959] [2823, 2958, 2960, 2961] [2959. 2958.]
2823 [2823, 2959] [2823, 2958, 2960, 2961] [2959. 2958. 2960.]
2824 [2824] [2824, 2963] []
2826 [2826] [2185, 2826] []
2827 [2827] [2827, 3085] []
2828 [2828] [2801, 2828] []
2835 [2465, 2835] [2835] []
2840 [2840] [2840, 2844] []
2844 [2844] [2840, 2844] []
2847 [2585, 2847] [28

3263 [3263] [3209, 3263] []
3266 [3266] [2997, 3266] []
3268 [3268] [3268, 3527] []
3273 [3273, 3280] [3273] []
3279 [3279, 3524] [3279] []
3280 [3273, 3280] [3280, 3282] []
3280 [3273, 3280] [3280, 3282] [3273.]
3282 [3282] [3280, 3282] []
3284 [3284, 3551] [3284] []
3297 [3297] [3297, 3321] []
3317 [3317] [3317, 3388] []
3321 [3321] [3297, 3321] []
3322 [3322, 3381] [3322, 3325] []
3322 [3322, 3381] [3322, 3325] [3381.]
3325 [3325] [3322, 3325] []
3331 [3331] [3331, 3340] []
3340 [3340] [3331, 3340] []
3341 [3341, 3344] [3341, 3550] []
3341 [3341, 3344] [3341, 3550] [3344.]
3342 [3342, 3568] [3342, 3343] []
3342 [3342, 3568] [3342, 3343] [3568.]
3343 [3343] [3342, 3343] []
3344 [3341, 3344] [3344] []
3348 [3348, 3470] [3348] []
3349 [3349, 3562] [3349] []
3355 [3355] [3355, 4028] []
3358 [3358] [3358, 3404] []
3361 [3361, 3423] [3361] []
3363 [3363, 3444] [3363] []
3365 [3365] [3365, 3446] []
3367 [3367] [3367, 3386] []
3368 [3368] [3368, 3538] []
3374 [3374, 3464] [3374, 3497] []
33

4231 [2029, 4231] [4231] []
4232 [4232] [4232, 5264] []
4233 [4233, 5193] [4233, 5191] []
4233 [4233, 5193] [4233, 5191] [5193.]
4234 [4234] [4234, 4258] []
4237 [4237] [4237, 5379] []
4239 [4226, 4239, 5354] [4239] []
4239 [4226, 4239, 5354] [4239] [4226.]
4240 [4240, 5200] [4240] []
4243 [4243] [4243, 5267] []
4245 [4245, 5444] [4245] []
4248 [4248, 4497] [4248] []
4249 [4249, 5220] [4249] []
4250 [4250, 5120] [4250] []
4252 [4252, 5163] [4252, 4254] []
4252 [4252, 5163] [4252, 4254] [5163.]
4253 [4253, 5121] [4253] []
4254 [4254] [4252, 4254] []
4255 [4255, 5156] [4255] []
4257 [4257, 5192] [4257] []
4258 [4258] [4234, 4258] []
4259 [4259, 5264] [4259] []
4261 [4261, 5106] [4261] []
4263 [4263, 5368] [4263] []
4267 [4267] [4267, 5230] []
4277 [4277, 4281, 4285] [4277] []
4277 [4277, 4281, 4285] [4277] [4281.]
4279 [4279, 5357] [4279] []
4281 [4277, 4281, 4285] [4281, 4364] []
4281 [4277, 4281, 4285] [4281, 4364] [4277.]
4281 [4277, 4281, 4285] [4281, 4364] [4277. 4285.]
4282 [4282, 

4728 [4728, 5411] [4417, 4728] []
4728 [4728, 5411] [4417, 4728] [5411.]
4729 [4729] [4461, 4729] []
4732 [4455, 4732] [4732] []
4733 [4733, 5009] [4733] []
4736 [4736] [4552, 4736] []
4737 [4737, 5458] [4737, 4740] []
4737 [4737, 5458] [4737, 4740] [5458.]
4738 [4738] [4421, 4738] []
4739 [4739] [4739, 4746] []
4740 [4549, 4740] [4737, 4740] []
4740 [4549, 4740] [4737, 4740] [4549.]
4744 [4744] [4544, 4744] []
4746 [4746] [4739, 4746] []
4748 [4748, 5125] [4748] []
4750 [4750] [4455, 4750] []
4752 [4752, 5459] [4752] []
4755 [4755] [4755, 4757] []
4757 [4757] [4755, 4757] []
4758 [4456, 4758] [4758, 4759] []
4758 [4456, 4758] [4758, 4759] [4456.]
4759 [4759] [4758, 4759] []
4760 [4760] [4318, 4760] []
4761 [4353, 4761] [4761] []
4763 [4763, 4764] [4763] []
4764 [4763, 4764] [4764] []
4765 [4765, 5437] [4765] []
4768 [2680, 4768, 4819] [4768] []
4768 [2680, 4768, 4819] [4768] [2680.]
4769 [4603, 4769] [4769, 4771] []
4769 [4603, 4769] [4769, 4771] [4603.]
4770 [4770] [4770, 4774] []
47

5342 [5342] [4873, 5342] []
5343 [2593, 5343] [5343] []
5344 [4919, 5344] [5337, 5344] []
5344 [4919, 5344] [5337, 5344] [4919.]
5346 [5048, 5346] [5346] []
5353 [2007, 5206, 5353] [2008, 5132, 5353] []
5353 [2007, 5206, 5353] [2008, 5132, 5353] [2007.]
5353 [2007, 5206, 5353] [2008, 5132, 5353] [2007. 5206.]
5353 [2007, 5206, 5353] [2008, 5132, 5353] [2007. 5206. 2008.]
5354 [4226, 4239, 5354] [5354] []
5354 [4226, 4239, 5354] [5354] [4226.]
5357 [4279, 5357] [5357] []
5358 [4282, 5358] [5358] []
5360 [5360, 5431] [4394, 5360] []
5360 [5360, 5431] [4394, 5360] [5431.]
5362 [5362] [4860, 5362] []
5363 [5363] [4859, 5363] []
5365 [4801, 5365] [5365] []
5368 [4263, 5368] [5368] []
5369 [5369, 5373] [5369] []
5372 [5372, 5374] [4689, 5372] []
5372 [5372, 5374] [4689, 5372] [5374.]
5373 [5369, 5373] [5373] []
5374 [5372, 5374] [5374] []
5375 [5375] [2042, 5375, 5415] []
5375 [5375] [2042, 5375, 5415] [2042.]
5376 [5054, 5376] [5030, 5376] []
5376 [5054, 5376] [5030, 5376] [5054.]
5379 [537

In [9]:
road_line_nodes.iloc[269]

Start Latitude    -11.39642
Start Longitude    34.03312
End Latitude      -11.39334
End Longitude      34.03339
Connection(s)       [262.0]
Name: 269, dtype: object

In [10]:
road_line_nodes

Unnamed: 0,Start Latitude,Start Longitude,End Latitude,End Longitude,Connection(s)
0,-11.39052,33.98535,-11.36937,33.98421,[]
1,-11.39722,33.95740,-11.39434,34.04805,[60.0]
2,-11.39077,33.96021,-11.39959,33.96272,[]
3,-11.39285,33.98934,-11.40975,33.99270,[991.0]
4,-11.38784,34.04635,-11.38810,34.04486,[]
...,...,...,...,...,...
5458,-11.44921,33.97063,-11.44883,33.97009,[4737.0]
5459,-11.45031,33.96980,-11.45030,33.96959,"[4752.0, 4550.0, 5462.0]"
5460,-11.43679,33.96447,-11.43612,33.96356,"[4355.0, 4357.0]"
5461,-11.44572,33.96737,-11.44497,33.96482,"[4931.0, 4317.0]"
