This notebook creates a csv file in the necessary format.

In [1]:
import pandas as pd
from scipy.spatial import distance

# load raw datafiles into DataFrames
roads = pd.read_csv("../data/raw/_roads3.csv")
bridges = pd.read_excel('../data/raw/BMMS_overview.xlsx', engine='openpyxl')

In [55]:
#--- Identifying side roads of N1 and N2

# Only keep N1 and N2
df = roads.loc[(roads["road"] == "N1") | (roads["road"] == "N2")]

# List of all roads
Roads = roads['road'].unique().tolist()

# List of all crossroads and side roads (in ugly format)
names = df['name'].tolist()

# Find roads that appear in the crossroads / sideroads 
side_roads = [road for road in Roads if any(road in name for name in names)]
print("there are", len(Roads), " roads, and ", len(side_roads), "side roads to N1 and N2")

there are 847  roads, and  103 side roads to N1 and N2


In [56]:
#--- Filtering for side roads over 25km long

# Take all side roads
sideroads_df = roads[roads['road'].isin(side_roads)]

# Filter for those over 25km long (aka LRPE had chainage >25)
sideroads_ends = sideroads_df.loc[(sideroads_df["lrp"] == "LRPE") & (sideroads_df["chainage"] > 25)]
sideroads_tokeep = sideroads_ends['road'].tolist()

# Apply filter
sideroads_df = sideroads_df.loc[(sideroads_df['road'].isin(sideroads_tokeep))]

In [57]:
#--- Adding a length to each road segment

# Create the length column
sideroads_df["length"] = sideroads_df["chainage"]*1000 
sideroads_df = sideroads_df.reset_index(drop=True)

# Change the chainage to a length
for i in range((len(sideroads_df)-1),0,-1):
    if sideroads_df["road"][i] == sideroads_df["road"][i-1]:
        sideroads_df.loc[i,"length"] = sideroads_df.loc[i,"length"]-sideroads_df.loc[i-1,"length"]

In [58]:
#--- Moving on to bridges!

# Filter for relevant columns and relevant roads
bridges_relevant = bridges[["road", "LRPName", "condition", "length", "chainage", "lat", "lon", 'name', 'km','constructionYear']]
bridges_relevant = bridges_relevant.loc[bridges['road'].isin(side_roads)]
bridges_relevant = bridges_relevant.reset_index(drop = True)

bridgestemp = bridges_relevant # useful in a second

# Only keep right side of each bridge
for i in range(1,len(bridges_relevant)):
    if len(str(bridges_relevant["name"][i])) > 4:
        if bridges_relevant["name"][i][-2:] == 'L)' or bridges_relevant["name"][i][-4:] == 'eft)' or bridges_relevant["name"][i][-3:] == 'L )' or bridges_relevant["name"][i][-4:] == 'EFT)':
            bridgestemp = bridgestemp.drop(i,axis = 0)

# Delete depulicates by removing older information
#  assumption: no 2 lrps in the same road have the exact same km
bridges_relevant = bridgestemp \
    .sort_values(by=['road','km','constructionYear'], ascending=False) \
    .drop_duplicates(subset=['road', 'km'], keep='first')


In [93]:
#--- Bringing relevant roads and bridges together 

# Prepare merge
bridges_relevant = bridges_relevant.rename(columns={"LRPName": "lrp"})

# Merge
merged = pd.merge(sideroads_df, bridges_relevant, how="outer", on=["road", "lrp"])
merged = merged.reset_index(drop = True)

# Add model_type
merged["model_type"] = merged["lrp"].apply(lambda x: "sourcesink" if x == "LRPS" else ("sourcesink" if x == "LRPE" else "link"))
merged.loc[merged["condition"].notnull(), "model_type"] = "bridge"

# Fill in missing data
merged["chainage_x"] = merged["chainage_x"].fillna(value=merged["chainage_y"])
merged["lat_x"] = merged["lat_x"].fillna(value=merged["lat_y"])
merged["lon_x"] = merged["lon_x"].fillna(value=merged["lon_y"])
merged["name_y"] = merged["name_y"].fillna(value=merged["name_x"])
merged["length_x"] = merged["length_x"].fillna(value=merged["length_y"])

# Keep and rename useful columns only
merged = merged.sort_values(by=['road','chainage_x'],ascending = True)
col_tokeep = ["road", "model_type", "lrp", "name_y", "lat_x", "lon_x", "length_x", "condition","type"]
merged = merged[col_tokeep]
merged = merged.rename(columns={"name_y": "name","lat_x" : "lat", "lon_x" : "lon", "length_x" : "length"})
merged = merged.reset_index(drop = True)

# Add ids
merged["id"] =  range(1000000, len(merged) +1000000 )

In [94]:
cross_sideroads = merged.loc[(merged['type'].str.contains("CrossRoad"))| (merged['type'].str.contains("SideRoad"))]
cross_sideroads = cross_sideroads.reset_index()
cross_sideroads.iloc[60:70,:]

Unnamed: 0,index,road,model_type,lrp,name,lat,lon,length,condition,type,id
60,936,N1,link,LRP337a,Road to Badarkhali (R172),21.759639,92.076499,575.0,,"SideRoad,Right",1000936
61,950,N1,link,LRP341b,Road to Alikadam(Z1005),21.721555,92.083083,163.0,,"SideRoad,Left",1000950
62,965,N1,bridge,LRP348a,FULCHARI CULVERT,21.668833,92.075944,96.0,A,"SideRoad,Left",1000965
63,985,N1,link,LRP354c,"Z1131, Kutakhali-Maheshkhali",21.615056,92.066777,59.0,,"SideRoad,Right",1000985
64,1056,N1,bridge,LRP376a,MUKTAR KUL,21.449472,92.099527,57.0,A,"SideRoad,Left",1001056
65,1070,N1,link,LRP378c,Ramu old Section (N109),21.428499,92.095194,79.0,,"SideRoad,Left",1001070
66,1112,N1,bridge,LRP386c,DALA-2 SLAB CULVERT,21.42586,92.023166,65.0,A,"SideRoad,Right",1001112
67,1213,N1,link,LRP403c,Road to Ramu(Z1009),21.317666,92.093027,82.0,,"SideRoad,Left",1001213
68,1217,N1,bridge,LRP404a,UKLIA-SHAHABMARKAT,21.310167,92.0975,220.0,C,"SideRoad,Left",1001217
69,1255,N1,bridge,LRP413a,BALUKHALI,21.24475,92.138944,300.0,B,"SideRoad,Left",1001255


In [95]:
#--- Defining the intersections

# subset the dataframe to only the rows that indicate an intersection
cross_sideroads = merged.loc[(merged['type'].str.contains("CrossRoad"))| (merged['type'].str.contains("SideRoad"))]
cross_sideroads = cross_sideroads.reset_index()

roads_done = []
# iterate over all road rows that are crossroads or sideroads
for i,crossrow in cross_sideroads.iterrows():
    # check if any of those roads indicate in their name that they intersect with a road that crosses the N1 and/or N2
    for j in sideroads_tokeep:
        if j in str(crossrow["name"]) and j != crossrow["road"]:
            # if you are not trying one intersection that you already did the other way around
            if i not in roads_done:
                found = False
                # iterate over all points in that intersecting road to see if any of those indicate to be an intersection with the road at hand
                # since a road can cross another road on multiple occasions, we only look as far as 1 lat and 1 lon
                for k,roadrow in merged[merged['road'] == j].iterrows():
                    if crossrow["name"] in roadrow["name"] and (roadrow["lat"] - crossrow["lat"] < 1) and (roadrow["lon"] - crossrow["lon"] < 1) and crossrow["road"] != roadrow["road"]:
                        # if this finds the intersection point, make its id, lat and lon the same and make the model_type "intersection"
                        merged.iloc[k,9] = cross_sideroads.iloc[i,10]
                        merged.iloc[k,4] = cross_sideroads.iloc[i,5]
                        merged.iloc[k,5] = cross_sideroads.iloc[i,6]
                        merged.iloc[k,1] = "intersection"
                        merged.iloc[cross_sideroads.iloc[i,0],1] = "intersection"
                        #print("made indersection here1:",merged.iloc[k,:],merged.iloc[cross_sideroads.iloc[i,0],1])
                        # save the fact that you have handled this intersection
                        found = True
                        roads_done.append(i)
                # if you didnt find the intersecting road through the name, assign the closest road point as the intersection
                # assumption: projection is so small that it will not distort the distance to much so we can use euclidean distance to determine the closest point
                if found == False:
                    closestrow = merged.iloc[[0]]
                    closestindex = 0
                    closestdistance = 1000
                    p1 = (crossrow["lat"],crossrow["lon"])
                    # calculate for each road point the distance to the intersection, save it if it is closer than what you found before
                    for k,roadrow in merged[merged['road'] == j].iterrows():
                        p2 = (roadrow["lat"],roadrow["lon"])
                        if distance.euclidean(p1, p2) < closestdistance:
                            closestrow = roadrow
                            closestindex = k
                            closestdistance = distance.euclidean(p1, p2)
                    # of the closest point, make the id, lat and lon the same and make the model_type "intersection"
                    merged.iloc[closestindex,9] = cross_sideroads.iloc[i,10]
                    merged.iloc[closestindex,4] = cross_sideroads.iloc[i,5]
                    merged.iloc[closestindex,5] = cross_sideroads.iloc[i,6]
                    merged.iloc[closestindex,1] = "intersection"
                    merged.iloc[cross_sideroads.iloc[i,0],1] = "intersection"
                    #print("made indersection here2:",merged.iloc[k,:],"other roadpoint:",merged.iloc[cross_sideroads.iloc[i,0],:])
                    # save the fact that you have handled this intersection
                    roads_done.append(i)         

made indersection here2: road                            N2
model_type              sourcesink
lrp                           LRPE
name          Road End at Jaflong.
lat                        25.1571
lon                        92.0176
length                         256
condition                      NaN
type                        Others
id                         1002995
Name: 2995, dtype: object other roadpoint: road                           N1
model_type           intersection
lrp                       LRP009a
name          Road to Sylhet (N2)
lat                       23.7061
lon                       90.5215
length                        260
condition                     NaN
type                SideRoad,Left
id                        1000019
Name: 19, dtype: object
made indersection here2: road                                Z1042
model_type                     sourcesink
lrp                                  LRPE
name          Fathepur Connect with R203.
lat                      

made indersection here2: road                                                  R203
model_type                                    intersection
lrp                                                   LRPE
name          Road Connect with N-102 at Radhika ,B.Baria.
lat                                                23.9033
lon                                                91.1266
length                                                 245
condition                                              NaN
type                                                Others
id                                                 1001738
Name: 4239, dtype: object other roadpoint: road                               N102
model_type                 intersection
lrp                             LRP064a
name          R203 on left to Nabinagar
lat                             23.9033
lon                             91.1266
length                              285
condition                           NaN
type                  

made indersection here2: road                                                       N207
model_type                                           sourcesink
lrp                                                        LRPE
name          Road end with N2 at Mukti joddha Chattor Sherpur.
lat                                                     24.6263
lon                                                     91.6775
length                                                      350
condition                                                   NaN
type                                                     Others
id                                                      1003289
Name: 3289, dtype: object other roadpoint: road                               N2
model_type               intersection
lrp                           LRP146b
name          Road to Srimongal(N207)
lat                           24.2947
lon                           91.5101
length                            639
condition                 

made indersection here2: road                                                         N1
model_type                                           sourcesink
lrp                                                        LRPE
name          End of Road at Shapla Chattar ,Teknaf Meet wit...
lat                                                     20.8629
lon                                                     92.2981
length                                                      130
condition                                                   NaN
type                                                     Others
id                                                      1001554
Name: 1554, dtype: object other roadpoint: road                           Z1031
model_type              intersection
lrp                          LRP007a
name          Side Road, Right, N104
lat                          23.0101
lon                           91.399
length                           495
condition                        

In [96]:
# show it worked
merged.iloc[690:700,:]

Unnamed: 0,road,model_type,lrp,name,lat,lon,length,condition,type,id
690,N1,link,LRP259b,Road to Kaliganj (Z1070),22.309694,91.91575,424.0,,CrossRoad,1000690
691,N1,link,LRP260,Box culvert,22.311417,91.917638,273.0,,KmPost,1000691
692,N1,bridge,LRP260a,PALYA BUS STATION,22.312472,91.918833,168.0,A,Culvert,1000692
693,N1,link,LRP260b,Box culvert,22.315639,91.922499,521.0,,Culvert,1000693
694,N1,intersection,LRP260c,Road to Kalurghat (N107),22.374638,91.906278,121.0,,"SideRoad,Left",1006716
695,N1,bridge,LRP252c,UTTOR SONAPUR,22.315747,91.924352,3.0,A,,1000695
696,N1,link,LRP261,Km post missing,22.315556,91.925388,190.0,,KmPost,1000696
697,N1,bridge,LRP261a,SRIMI BRIDGE,22.313778,91.932222,750.0,C,Culvert,1000697
698,N1,link,LRP262,Km post missing,22.31375,91.934527,250.0,,KmPost,1000698
699,N1,link,LRP263,Km post missing,22.313639,91.943889,1000.0,,KmPost,1000699


In [98]:
merged.loc[merged["id"] == 1006716]

Unnamed: 0,road,model_type,lrp,name,lat,lon,length,condition,type,id
694,N1,intersection,LRP260c,Road to Kalurghat (N107),22.374638,91.906278,121.0,,"SideRoad,Left",1006716
6716,Z1065,intersection,LRP013e,N107,22.374638,91.906278,20.0,,CrossRoad,1006716


In [22]:
# as the above code does not find all intersections, here is a more crude way to find them anyhow based on the closeness of the points in different roads
# warning: takes a very long time to run
#for i, row in merged.iterrows():
#    if row['model_type'] != 'bridge':
#        for j in range(i,len(merged)):
#            if merged['model_type'][j] != 'bridge':
#                if row['id'] != merged['id'][j] and row['road'] != merged['road'][j] and distance.euclidean((row['lat'],row['lon']), (merged['lat'][j],merged['lon'][j])) < 0.0001:
#                    merged.iloc[j,9] = merged.iloc[i,9]
#                    merged.iloc[j,4] = merged.iloc[i,4]
#                    merged.iloc[j,5] = merged.iloc[i,5]
#                    merged.iloc[j,1] = "intersection"
#                    print('new intersection made at id',row['id'],'between', row['road'], 'and', merged['road'][j])

new intersection made at id 1000210 between N1 and N102


KeyboardInterrupt: 

In [99]:
# as the above code does not find all intersections, here is a more crude way to find them anyhow based on the closeness of the points in different roads
# warning: takes a very long time to run
#for i, row in merged.iterrows():
#    if row['model_type'] != 'bridge':
#        for j, newrow in merged.iterrows():
#            if i < j and newrow['model_type'] != 'bridge':
#                if row['id'] != newrow['id'] and row['road'] != newrow['road'] and distance.euclidean((row['lat'],row['lon']), (newrow['lat'],newrow['lon'])) < 0.0001:
#                    merged.iloc[j,9] = merged.iloc[i,9]
#                    merged.iloc[j,4] = merged.iloc[i,4]
#                    merged.iloc[j,5] = merged.iloc[i,5]
#                    merged.iloc[j,1] = "intersection"
#                    print('new intersection made at id',row['id'],'between', row['road'], 'and', newrow['road'])

KeyboardInterrupt: 

In [100]:
#--- Adapting to the newest csv guidelines

# Adapting names
#move bridge names to a new column 
merged["bridge_name"] = merged["name"].loc[merged['model_type'] == "bridge"]
#delete names for everything and replace that of SourceSinks according to convention
i = 1 # useful in a second
for index, row in merged.iterrows():
    if not row['model_type'] == "sourcesink":
        merged["name"][index] = ""
        
    elif row['model_type'] == "sourcesink":
        merged["name"][index] = "SoSi" + str(i)
        merged["condition"][index] = ""
        i += 1

# Put columns in right order
merged = merged[["road", "id", "model_type", "condition", "name", "lat", "lon", "length", "bridge_name"]]
merged.head()

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  merged["name"][index] = "SoSi" + str(i)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  merged["condition"][index] = ""
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  merged["name"][index] = ""


Unnamed: 0,road,id,model_type,condition,name,lat,lon,length,bridge_name
0,N1,1000000,sourcesink,,SoSi1,23.706028,90.443333,0.0,
1,N1,1000001,link,,,23.702917,90.450417,814.0,
2,N1,1000002,link,,,23.702778,90.450472,8.0,
3,N1,1000003,link,,,23.702139,90.451972,178.0,
4,N1,1000004,bridge,A,,23.698739,90.458861,11.3,.


In [101]:
# Save to csv 
merged.to_csv("../data/processed/N1_N2_plus_sideroads.csv", index=None)

In [None]:
roads.head(20)