**Imports**

In [1]:
import time
start_time = time.time()

import itertools
import pandas as pd
import numpy as np
from scipy.spatial.distance import cdist
from haversine import haversine


**loading data**

In [2]:
roads = pd.read_csv('../data/_roads3.csv')
roads['road_lrp'] = roads[['road', 'lrp']].agg('_'.join, axis=1) # creating supporting column with road name and lrp concatenated
roads['coordinates'] = list(zip(roads.lat, roads.lon)) # creating supporting column with coordinates (lat,lon)
roads.head()

Unnamed: 0,road,chainage,lrp,lat,lon,gap,type,name,road_lrp,coordinates
0,N1,0.0,LRPS,23.706028,90.443333,,Others,Start of Road after Jatrabari Flyover infront of filling station.,N1_LRPS,"(23.7060278, 90.443333)"
1,N1,0.814,LRPSa,23.702917,90.450417,,Culvert,Box Culvert,N1_LRPSa,"(23.7029167, 90.4504167)"
2,N1,0.822,LRPSb,23.702778,90.450472,,CrossRoad,Intersection with Z1101,N1_LRPSb,"(23.7027778, 90.4504722)"
3,N1,1.0,LRP001,23.702139,90.451972,,KmPost,Km post missing,N1_LRP001,"(23.7021389, 90.4519722)"
4,N1,2.0,LRP002,23.697889,90.460583,,KmPost,Km post missing,N1_LRP002,"(23.6978886, 90.4605833)"


In [3]:
# temporary (just to create the similar pairs from previous notebook) ----------- ADDAPT LATER

similar_pairs = [['N1', 'N2'],
 ['N1', 'N102'],
 ['N2', 'N102'],
 ['N1', 'N104'],
 ['N2', 'N104'],
 ['N1', 'N105'],
 ['N2', 'N105'],
 ['N1', 'N106'],
 ['N2', 'N106'],
 ['N1', 'N110'],
 ['N2', 'N110'],
 ['N1', 'N119'],
 ['N2', 'N119'],
 ['N2', 'N204'],
 ['N2', 'N207'],
 ['N2', 'N208'],
 ['N1', 'N8'],
 ['N2', 'N8'],
 ['N1', 'N809'],
 ['N2', 'N809']]

**Computing distances**

The distance **between each point from road i to road j** was calculated using scipy.spatial.distance and haversine metric. After performing the calculations, the matrix was converted into a dataframe similar to the one depicted below, in which every row concerns a point from road i and every column concerns a point from road j

![distance matrix](https://46gyn61z4i0t1u1pnq2bbk2e-wpengine.netdna-ssl.com/wp-content/uploads/2018/04/Distance-Matrix.png)

We created a function to return the filtered dataframes from the roads dataset, so we can calculate the distance between ear pair or roads as stated in similar_pairs list above

In [4]:
def filter_dataframes(from_road,to_road):
    """
    
    Returns filtered dataframes from the original roads data based on the specific roads of interest (from_road and to_road)

    """
    df_from = roads.loc[roads['road']== from_road].copy()
    df_from = df_from.sort_values(by='chainage', ascending=True)
    df_from = df_from.reset_index()
    df_from = df_from.iloc[: , 1:]

    df_to = roads.loc[roads['road']== to_road].copy()
    df_to = df_to.sort_values(by='chainage', ascending=True)
    df_to = df_to.reset_index()
    df_to = df_to.iloc[: , 1:]

    
    return (df_from,df_to)

The next step is to compute the distance between every point from road i to every point in road j, according to the similar_pairs list above


*add better written details here.... maybe even a figure???*

In [5]:
threshold = 30 # this is the threshold to be considered as an intersection between the roads (if distance is less than threshold, then it is an intersection)
intersections_dict_aggregated = {}

# iterate through the list of road pairs for which intersections are to be identified

for item in similar_pairs:
    from_road = item[0]
    to_road = item[1]
    
    from_road_df, to_road_df = filter_dataframes(from_road,to_road)

    # creating lists of the relevant roads with the information of road name and lrp
    # this is used later on to replace the column names and index of the distance matrix
    from_road_list = from_road_df['road_lrp'].tolist()
    to_road_list = to_road_df['road_lrp'].tolist()


    # creating numpy arrays of the lat and lon of each datapoint of each relevant road
    # this is needed to compute the distance between each point
    from_road_locations = np.array(from_road_df[['lat','lon']])
    to_road_locations = np.array(to_road_df[['lat','lon']])

    # computing the distance between each point from the first road to each point in the second road
    # this method used the haversine metric (haversine formula determines the great-circle distance between two points on a sphere given their longitudes and latitudes)
    distance_array = cdist(from_road_locations, to_road_locations, metric=haversine, unit='m')

    # creating the dataframe from the distance calculation
    distance_df = pd.DataFrame(distance_array)
    distance_df = distance_df.set_axis(to_road_list, axis='columns') # adding the name of the columns to be each point in the second road (road j)
    distance_df.insert(0,'FROM',from_road_df['road_lrp'].copy()) # adding a column with the "from" information (i.e., each point from road i)
    distance_df.set_index('FROM', inplace=True)
    
    

    # getting the position in array in which the values that meet the threshold are
    meets_threshold_array = np.where(distance_array < threshold)

    # getting the values that meet the threshold
    meets_threshold_values = list(distance_array[distance_array < threshold])

    # arranging the positions returned from meets_threshold_array into tuples (x,y) in which the first value is the row and the second is the column of each value that meets the threshold
    list_of_coordinates = list(zip(meets_threshold_array[0], meets_threshold_array[1])) # get the id of row and column of the values that met the threshold criteria

    col_name = [] # supporting list for the column names
    index_name = [] # supporting list for the index names
    col_coordinates = [] # supporting list for the column coordinates (from original roads data)
    index_coordinates = [] # supporting list for the index coordinates (from original roads data)
    col_chainage = [] # supporting list for the column chainage (from original roads data - sorting purposes)
    index_chainage = [] # supporting list for the column chainage (from original roads data - sorting purposes)
    index_lat = [] # supporting list for the latitude (from original roads data)
    index_lon = [] # supporting list for the longitude (from original roads data)
    col_lat = [] # supporting list for the latitude (from original roads data)
    col_lon = [] # supporting list for the longitude (from original roads data)

    intersections_dict = {}

    ##### remove when working ok #####
    next_index_name = [] # supporting list for the next index names
    next_col_name = [] # supporting list for the next column names
    

    # iterate through the tuples in the list of coordinates
    for elem in list_of_coordinates:
        row = elem[0] # row is the first element in each tuple
        col = elem[1] # column is the second element in each tuple
        next_point_from_road = row + 1 # get the next point in the from_road (so we know where to position the intersection later, i.e., between row and row+1 items in the original dataset)
        if next_point_from_road > len(from_road_df)-1: # if end of the road, use row and not row+1 (out of bound)
            next_point_from_road = row
        next_point_to_road = col + 1 # get the next point in the from_road (so we know where to position the intersection later, i.e., between col and col+1 items in the original dataset)
        if next_point_to_road > len(to_road_df)-1: # if end of the road, use row and not row+1 (out of bound)
            next_point_to_road = col

        index_name.append(distance_df.index[row]) # index_name gets the name of the row in dataframe equivalent to the 1st value of list of coordinates (X,y)
        col_name.append(distance_df.columns[col]) # col_name gets the name of the column in dataframe equivalent to the 2nd value of list of coordinates (x,Y)
        index_coordinates.append(roads[roads['road_lrp']==distance_df.index[row]]['coordinates'].values[0]) # 
        col_coordinates.append(roads[roads['road_lrp']==distance_df.columns[col]]['coordinates'].values[0])
        index_chainage.append(roads[roads['road_lrp']==distance_df.index[row]]['chainage'].values[0])
        col_chainage.append(roads[roads['road_lrp']==distance_df.columns[col]]['chainage'].values[0])
        index_lat.append(roads[roads['road_lrp']==distance_df.index[row]]['lat'].values[0])
        index_lon.append(roads[roads['road_lrp']==distance_df.index[row]]['lon'].values[0])
        col_lat.append(roads[roads['road_lrp']==distance_df.columns[col]]['lat'].values[0])
        col_lon.append(roads[roads['road_lrp']==distance_df.columns[col]]['lon'].values[0])





        
        ##### remove when working ok #####
        #next_index_name.append(distance_df.index[next_point_from_road]) # next_index_name gets the name of the next row in dataframe equivalent to the 1st value of list of coordinates (X+1,y)
        #next_col_name.append(distance_df.columns[next_point_to_road]) # next_col_name gets the name of the next column in dataframe equivalent to the 2nd value of list of coordinates (x,Y+1)



    
    # putting structure index_name and col_name in a list of tupples -- ##### remove when working ok #####
    dataframe_zip = zip(index_name, col_name)
    dataframe_info = list(dataframe_zip)
    dataframe_next_values_zip = zip(next_index_name, next_col_name)
    dataframe_next_values_info = list(dataframe_next_values_zip)

    # ,index_chainage[i],col_chainage[i]

    # creates dictionary with value that meets threshold and column and index names from meets_threshold_values and dataframe
    #intersections_dict = {meets_threshold_values[i]: [dataframe_info[i], dataframe_next_values_info[i]] for i in range(len(meets_threshold_values))}
    intersections_dict = {meets_threshold_values[i]: [index_name[i],col_name[i],index_coordinates[i],col_coordinates[i],
    index_chainage[i],col_chainage[i],index_lat[i],index_lon[i],col_lat[i],col_lon[i]] for i in range(len(meets_threshold_values))}
    #intersections_dict_aggregated = intersections_dict_aggregated | intersections_dict
    intersections_dict_aggregated.update(intersections_dict)


In [6]:
# consolidated dictionary with all intersections found within threshold specified
intersections_dict_aggregated

{20.37415958669164: ['N1_LRP009a',
  'N2_LRPS',
  (23.7060833, 90.5215271),
  (23.7059167, 90.5214438),
  8.763,
  0.0,
  23.7060833,
  90.5215271,
  23.7059167,
  90.5214438],
 19.836828355177154: ['N1_LRP084',
  'N102_LRPS',
  (23.4789716, 91.1179993),
  (23.4789716, 91.1181938),
  81.829,
  0.0,
  23.4789716,
  91.1179993,
  23.4789716,
  91.1181938],
 2.835289605490322: ['N1_LRP084a',
  'N102_LRPS',
  (23.4789716, 91.118166),
  (23.4789716, 91.1181938),
  81.846,
  0.0,
  23.4789716,
  91.118166,
  23.4789716,
  91.1181938],
 9.078351803300341: ['N1_LRP148a',
  'N104_LRP001a',
  (23.0095556, 91.3813604),
  (23.0095278, 91.3814438),
  145.409,
  1.878,
  23.0095556,
  91.3813604,
  23.0095278,
  91.3814438],
 2.8307252713980935: ['N1_LRP012c',
  'N105_LRPS',
  (23.6904163, 90.546583),
  (23.6904163, 90.5466108),
  11.936,
  0.0,
  23.6904163,
  90.546583,
  23.6904163,
  90.5466108],
 29.759390807779774: ['N2_LRP012a',
  'N105_LRP012a',
  (23.785333, 90.5685549),
  (23.7851941, 90.5

In [7]:
intersection_columns = ['distance', 'from_road_point','to_road_point','from_road_point_coords', 'to_road_point_coords','from_road_point_chainage',
'to_road_point_chainage','from_road_point_lat','from_road_point_lon','to_road_point_lat','to_road_point_lon']

intersection_df_full = pd.DataFrame.from_dict(intersections_dict_aggregated,orient='index')
intersection_df_full = intersection_df_full.reset_index()
intersection_df_full = intersection_df_full.set_axis(intersection_columns, axis=1)
intersection_df_full["intersection_lat"] = (intersection_df_full["from_road_point_lat"] + intersection_df_full["to_road_point_lat"]) / 2 # estimating lat of the intersection
intersection_df_full["intersection_lon"] = (intersection_df_full["from_road_point_lon"] + intersection_df_full["to_road_point_lon"]) / 2 # estimating long of the intersection
intersection_df_full["from_road"] = intersection_df_full['from_road_point'].str.split('_').str[0]
intersection_df_full["to_road"] = intersection_df_full['to_road_point'].str.split('_').str[0]
intersection_df_full['id'] = intersection_df_full[['from_road_point', 'to_road_point']].agg('-'.join, axis=1)
intersection_df_full['from_road_to_road'] = intersection_df_full[['from_road', 'to_road']].agg('-'.join, axis=1)
intersection_df_full['Counts'] = intersection_df_full.groupby(['from_road_to_road'])['id'].transform('count')


intersection_df_full

Unnamed: 0,distance,from_road_point,to_road_point,from_road_point_coords,to_road_point_coords,from_road_point_chainage,to_road_point_chainage,from_road_point_lat,from_road_point_lon,to_road_point_lat,to_road_point_lon,intersection_lat,intersection_lon,from_road,to_road,id,from_road_to_road,Counts
0,20.374160,N1_LRP009a,N2_LRPS,"(23.7060833, 90.5215271)","(23.7059167, 90.5214438)",8.763,0.000,23.706083,90.521527,23.705917,90.521444,23.706000,90.521485,N1,N2,N1_LRP009a-N2_LRPS,N1-N2,1
1,19.836828,N1_LRP084,N102_LRPS,"(23.4789716, 91.1179993)","(23.4789716, 91.1181938)",81.829,0.000,23.478972,91.117999,23.478972,91.118194,23.478972,91.118097,N1,N102,N1_LRP084-N102_LRPS,N1-N102,2
2,2.835290,N1_LRP084a,N102_LRPS,"(23.4789716, 91.118166)","(23.4789716, 91.1181938)",81.846,0.000,23.478972,91.118166,23.478972,91.118194,23.478972,91.118180,N1,N102,N1_LRP084a-N102_LRPS,N1-N102,2
3,9.078352,N1_LRP148a,N104_LRP001a,"(23.0095556, 91.3813604)","(23.0095278, 91.3814438)",145.409,1.878,23.009556,91.381360,23.009528,91.381444,23.009542,91.381402,N1,N104,N1_LRP148a-N104_LRP001a,N1-N104,1
4,2.830725,N1_LRP012c,N105_LRPS,"(23.6904163, 90.546583)","(23.6904163, 90.5466108)",11.936,0.000,23.690416,90.546583,23.690416,90.546611,23.690416,90.546597,N1,N105,N1_LRP012c-N105_LRPS,N1-N105,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
8,15.444997,N1_LRP387,N110_LRPS,"(21.4258049, 92.0232493)","(21.4259438, 92.0232493)",381.494,0.000,21.425805,92.023249,21.425944,92.023249,21.425874,92.023249,N1,N110,N1_LRP387-N110_LRPS,N1-N110,2
9,18.004838,N2_LRP117b,N204_LRPS,"(24.1478608, 91.3464441)","(24.1479163, 91.3466108)",116.294,0.000,24.147861,91.346444,24.147916,91.346611,24.147889,91.346527,N2,N204,N2_LRP117b-N204_LRPS,N2-N204,1
10,22.890558,N2_LRP146b,N207_LRPS,"(24.2947219, 91.5100833)","(24.2948608, 91.51025)",145.961,0.000,24.294722,91.510083,24.294861,91.510250,24.294791,91.510167,N2,N207,N2_LRP146b-N207_LRPS,N2-N207,2
11,18.737016,N2_LRP191b,N207_LRPE,"(24.6264993, 91.6774993)","(24.6263327, 91.6775271)",190.892,67.985,24.626499,91.677499,24.626333,91.677527,24.626416,91.677513,N2,N207,N2_LRP191b-N207_LRPE,N2-N207,2


In [8]:
# separating the resulting dataframe into 2 (one with just one intersection between road i and road j and another with multiple intersections between road i and road j)
# the cases with only 1 intersection between roads don't have to be treater, but the cases with 2 we have to further analyse
intersection_df_full_single = intersection_df_full.loc[(intersection_df_full.Counts == 1)] 

intersection_df_full_multiple = intersection_df_full.loc[(intersection_df_full.Counts > 1)] 
intersection_df_full_multiple = intersection_df_full_multiple.sort_values(by='from_road_to_road', ascending=True)


# For the cases with more than one intersection between road i and road j we will check the following:
# 1) if the value in the column from_road_to_road is equal, it means that the rows have more than one intersection that met the threshold used above (for example: N1-N102 for more than one column)
# 2) get the chainage value for each from_road and calculate the distance between them
# 3) if the distance between the chainages if less than or equal to the threshold_between_chainages, then it means that these two points are still part of the same intersection,
# and we will consider the smallest distance on the first column (computed before) to select just one intersection (otherwise we could have two intersections quite close)
# 4) if the distance between the chainages is bigger than threshold_between_chainages, then we will consider both intersections (this might be the clase where two roads might have more than one intersection
# and still be correct)

threshold_between_chainages = 1 # this value will be used to compare the distance between chainages in case of more than one intersection found for the same pair of roads (i,j)
distance_to_consider = []
distance_above_threshold = []
list_from_road_to_road = intersection_df_full_multiple['from_road_to_road'].copy() # getting the values from column into list to be iterated


for a, b in itertools.combinations(list_from_road_to_road, 2):
    if a == b: # check if the values are the same (for example N1-N102 and N1-N102)

        chainage_1 = intersection_df_full_multiple[intersection_df_full_multiple['from_road_to_road']==a]['from_road_point_chainage'].values[0] # returning chainage value for first point
        chainage_2 = intersection_df_full_multiple[intersection_df_full_multiple['from_road_to_road']==b]['from_road_point_chainage'].values[1] # returning chainage value for second point
        distance_between_chainages = abs(chainage_1 - chainage_2) # calculating distance between chainages

        if distance_between_chainages <= threshold_between_chainages: # checking if distance meets threshold
            distance_1 = intersection_df_full_multiple[intersection_df_full_multiple['from_road_to_road']==a]['distance'].values[0] # returning distance value for first point
            distance_2 = intersection_df_full_multiple[intersection_df_full_multiple['from_road_to_road']==b]['distance'].values[1] # returning distance value for second point
            smallest_distance = min(distance_1,distance_2) # getting smallest distance between both
            distance_to_consider.append(smallest_distance) # appending list with only the smallest distance

        else:
            distance_3 = intersection_df_full_multiple[intersection_df_full_multiple['from_road_to_road']==a]['distance'].values[0] # returning distance value for first point
            distance_4 = intersection_df_full_multiple[intersection_df_full_multiple['from_road_to_road']==b]['distance'].values[1] # returning distance value for second point
            distance_above_threshold.append(distance_3) # appending list with original distance (does not meet threshold, so we should keep)
            distance_above_threshold.append(distance_4) # appending list with original distance (does not meet threshold, so we should keep)


# the final list is composed of the original values that where considered to be correct (distance_above_threshold) and the values where we selected the smallest distance (distance_to_consider)
final_list_multiple_intersections = distance_above_threshold + distance_to_consider


In [9]:
# the corrected dataframe with multiple intersections 

intersection_df_full_multiple_adj = intersection_df_full_multiple[intersection_df_full_multiple['distance'].isin(final_list_multiple_intersections)]
intersection_df_full_multiple_adj

Unnamed: 0,distance,from_road_point,to_road_point,from_road_point_coords,to_road_point_coords,from_road_point_chainage,to_road_point_chainage,from_road_point_lat,from_road_point_lon,to_road_point_lat,to_road_point_lon,intersection_lat,intersection_lon,from_road,to_road,id,from_road_to_road,Counts
2,2.83529,N1_LRP084a,N102_LRPS,"(23.4789716, 91.118166)","(23.4789716, 91.1181938)",81.846,0.0,23.478972,91.118166,23.478972,91.118194,23.478972,91.11818,N1,N102,N1_LRP084a-N102_LRPS,N1-N102,2
7,12.662824,N1_LRP386c,N110_LRPS,"(21.4258604, 92.023166)","(21.4259438, 92.0232493)",381.481,0.0,21.42586,92.023166,21.425944,92.023249,21.425902,92.023208,N1,N110,N1_LRP386c-N110_LRPS,N1-N110,2
6,23.228986,N2_LRP012b,N105_LRP012a,"(23.7853886, 90.5688882)","(23.7851941, 90.5688049)",11.476,12.803,23.785389,90.568888,23.785194,90.568805,23.785291,90.568847,N2,N105,N2_LRP012b-N105_LRP012a,N2-N105,2
10,22.890558,N2_LRP146b,N207_LRPS,"(24.2947219, 91.5100833)","(24.2948608, 91.51025)",145.961,0.0,24.294722,91.510083,24.294861,91.51025,24.294791,91.510167,N2,N207,N2_LRP146b-N207_LRPS,N2-N207,2
11,18.737016,N2_LRP191b,N207_LRPE,"(24.6264993, 91.6774993)","(24.6263327, 91.6775271)",190.892,67.985,24.626499,91.677499,24.626333,91.677527,24.626416,91.677513,N2,N207,N2_LRP191b-N207_LRPE,N2-N207,2


In [10]:
# the final dataframe is then composed of the dataframe with just one intersection (intersection_df_full_single), which was already correct, and the adjusted
# dataframe with multiple intersections (intersection_df_full_multiple_adj)

intersection_df_full_adj = intersection_df_full_single.append(intersection_df_full_multiple_adj)
intersection_df_full_adj.shape

(10, 18)

In [11]:
# now we can drop unnecessary columns and format the final dataframe of intersections
# for simplicity, we will separate the dataframe in 2, one for the from_road and other for the to_road
# this is needed because every intersection has to be placed on both datasets. Later on we will merge them together in the final dataframe of intersections

cols_interest_from = ["from_road_point_chainage","intersection_lat", "intersection_lon", "from_road","id"]
cols_interest_to = ["to_road_point_chainage", "intersection_lat", "intersection_lon","to_road","id"]
final_columns = ['chainage','lat','lon','road','id']

intersection_df_from = intersection_df_full_adj[cols_interest_from].copy()
intersection_df_from = intersection_df_from.set_axis(final_columns, axis=1)

intersection_df_to = intersection_df_full_adj[cols_interest_to].copy()
intersection_df_to = intersection_df_to.set_axis(final_columns, axis=1)



In [46]:
# putting together the final dataframe of intersections

intersection_df_tiny = intersection_df_from.append(intersection_df_to)
intersection_df_tiny.insert(0,'name','')
intersection_df_tiny.insert(0,'length','')
intersection_df_tiny.insert(0,'model_type','intersection')
intersection_df_tiny.insert(0,'condition','')
columns_right_order = ['road','id','model_type','condition','name','lat','lon','length','chainage']


intersection_df_tiny = intersection_df_tiny.reindex(columns=columns_right_order)
intersection_df_tiny = intersection_df_tiny.sort_values(by='id', ascending=True)
intersection_df_tiny = intersection_df_tiny.set_index("road")
intersection_df_tiny = intersection_df_tiny.reset_index()
intersection_df_tiny.head(30)


Unnamed: 0,road,id,model_type,condition,name,lat,lon,length,chainage
0,N1,N1_LRP009a-N2_LRPS,intersection,,,23.706000,90.521485,,8.763
1,N2,N1_LRP009a-N2_LRPS,intersection,,,23.706000,90.521485,,0.000
2,N1,N1_LRP012c-N105_LRPS,intersection,,,23.690416,90.546597,,11.936
3,N105,N1_LRP012c-N105_LRPS,intersection,,,23.690416,90.546597,,0.000
4,N1,N1_LRP084-N102_LRPS,intersection,,,23.478972,91.118097,,81.829
...,...,...,...,...,...,...,...,...,...
21,N2,N2_LRP146b-N207_LRPS,intersection,,,24.294791,91.510167,,145.961
22,N207,N2_LRP191b-N207_LRPE,intersection,,,24.626416,91.677513,,67.985
23,N2,N2_LRP191b-N207_LRPE,intersection,,,24.626416,91.677513,,190.892
24,N2,N2_LRP228c-N208_LRPE,intersection,,,24.877624,91.875319,,227.756


In [45]:
import mitosheet
mitosheet.sheet(intersection_df_tiny, view_df=True)

MitoWidget(analysis_data_json='{"analysisName": "UUID-58bd4ff2-570d-4570-8fb1-515db5ccddd4", "code": {"imports…

In [13]:
intersection_df_tiny.shape

(20, 9)

In [14]:
print("Process finished --- %s seconds ---" % (time.time() - start_time))

Process finished --- 33.59984278678894 seconds ---
