In [61]:
import pandas as pd

# load raw datafiles into DataFrames
roads = pd.read_csv("../data/raw/_roads3.csv")
bridges = pd.read_excel('../data/raw/BMMS_overview.xlsx')
from scipy.spatial import distance

In [62]:
roads_dict = dict(zip(roads.road, roads.road))
'N1' in roads_dict

True

In [63]:
# remove unneeded data
#roads = roads[["road", "chainage", "lrp", "lat", "lon"]]

# convert chainage length to meters (1 chain = 60 feet = 20.11680 meters)
roads["chainage"] = roads["chainage"].astype(float)
roads["length"] = roads["chainage"]*1000 # 20.11680

In [64]:
# calculate length of segment in stead of cumulative length of the road
for i in range((len(roads["length"])-1),0,-1):
    roads.loc[i,"length"] = roads.loc[i,"length"]-roads.loc[i-1,"length"]

bridges = bridges[["road", "LRPName", "condition", "length", "chainage", "lat", "lon", 'name', 'km','constructionYear']]


In [65]:
# Only keep N1 and N2
df = roads.loc[(roads["road"] == "N1") | (roads["road"] == "N2")]
 
# List of all roads
Roads = roads['road'].unique().tolist()
 
# List of all crossroads and side roads (in ugly format)
names = df['name'].tolist()
 
# Find roads that appear in the crossroads / sideroads 
side_roads = [road for road in Roads if any(road in name for name in names)]

In [66]:
#--- Filtering for side roads over 25km long
 
# Take all side roads
sideroads_df = roads[roads['road'].isin(side_roads)]
 
# Filter for those over 25km long (aka LRPE had chainage >25)
sideroads_ends = sideroads_df.loc[(sideroads_df["lrp"] == "LRPE") & (sideroads_df["chainage"] > 25)]
sideroads_tokeep = sideroads_ends['road'].tolist()
sideroads_tokeep

['N1',
 'N102',
 'N104',
 'N2',
 'N204',
 'N207',
 'N208',
 'R151',
 'R170',
 'R203',
 'R211',
 'R220',
 'R240',
 'R241',
 'R250',
 'R301',
 'R310',
 'R360',
 'Z1005',
 'Z1031',
 'Z1034',
 'Z1042',
 'Z1044',
 'Z1048',
 'Z1065',
 'Z1098',
 'Z1124',
 'Z1402',
 'Z2013']

In [67]:
roads = roads[roads['road'].isin(sideroads_tokeep)]

In [68]:
roads.head(20)

Unnamed: 0,road,chainage,lrp,lat,lon,gap,type,name,length
0,N1,0.0,LRPS,23.706028,90.443333,,Others,Start of Road after Jatrabari Flyover infront...,0.0
1,N1,0.814,LRPSa,23.702917,90.450417,,Culvert,Box Culvert,814.0
2,N1,0.822,LRPSb,23.702778,90.450472,,CrossRoad,Intersection with Z1101,8.0
3,N1,1.0,LRP001,23.702139,90.451972,,KmPost,Km post missing,178.0
4,N1,2.0,LRP002,23.697889,90.460583,,KmPost,Km post missing,1000.0
5,N1,2.13,LRP002a,23.697361,90.461667,,Culvert,Box culvert,130.0
6,N1,3.0,LRP003,23.693833,90.469138,,KmPost,Km post missing,870.0
7,N1,4.0,LRP004,23.693611,90.478777,,KmPost,Km post missing,1000.0
8,N1,4.175,LRP004a,23.693805,90.480527,,"SideRoad,Right",Road to Narayanganj(R111),175.0
9,N1,5.0,LRP005,23.69475,90.4885,,KmPost,Km post missing,825.0


In [80]:
roads["id"] = range(len(roads))
roads["id"] = roads["id"] + 1000000

In [81]:
cross_sideroads = roads[(roads['type'].str.contains("CrossRoad"))| (roads['type'].str.contains("SideRoad"))]

print(cross_sideroads.shape)
cross_sideroads

(236, 10)


Unnamed: 0,road,chainage,lrp,lat,lon,gap,type,name,length,id
2,N1,0.822,LRPSb,23.702778,90.450472,,CrossRoad,Intersection with Z1101,8.0,1000002
8,N1,4.175,LRP004a,23.693805,90.480527,,"SideRoad,Right",Road to Narayanganj(R111),175.0,1000008
12,N1,7.181,LRP007a,23.697916,90.509278,,CrossRoad,"R110,Left to Demra, Right to Narayanganj",181.0,1000012
17,N1,8.763,LRP009a,23.706083,90.521527,,"SideRoad,Left",Road to Sylhet (N2),260.0,1000017
28,N1,11.936,LRP012c,23.690416,90.546583,,"SideRoad,Right",Right to Syedpur (R113) left to Joydebpur N105),165.0,1000028
...,...,...,...,...,...,...,...,...,...,...
22189,Z1065,13.720,LRP013e,22.374638,91.906278,,CrossRoad,N107,20.0,1005408
22219,Z1065,20.850,LRP020c,22.385000,91.967166,,"SideRoad,Right","Z1059, Patya-Kanongopara road",350.0,1005438
24031,Z1402,10.879,LRP010c,23.344555,90.706889,,"SideRoad,Left",Road to Matlab Ferry ghat.,282.0,1005648
24042,Z1402,13.632,LRP013a,23.346333,90.730444,,"SideRoad,Right",Road to Kachua (Z 1445),425.0,1005659


Make a loop that goes through the name column, when it finds an intersecting road, loop through this road to first check if there is a location that says it is this intersection (maybe within a reasonable distance), if not, find the lat&lon that is closest to this point and give that the same id as the calling road.
Check if it already did this. 

In [83]:
for i,crossrow in cross_sideroads.iterrows():
    for j in sideroads_tokeep:
        if j in str(crossrow["name"]):
            found = False
            for k,roadrow in roads[roads['road'] == j].iterrows():
                if crossrow["name"] in roadrow["name"] and (roadrow["lat"] - crossrow["lat"] < 1) and (roadrow["lon"] - crossrow["lon"] < 1):
                    print("found specified crossroad at",crossrow,roadrow)
                    # found the crossroad, make id and lat and lon the same
                    roads[roadrow]["id"] = cross_sideroads[crossrow]["id"]
                    roads[roadrow]["lat"] = cross_sideroads[crossrow]["lat"]
                    roads[roadrow]["lon"] = cross_sideroads[crossrow]["lon"]
                    found = True
            if found == False:
                # assumption: projection is so small that it will not distort the distance to much
                closestrow = roads.iloc[[0]]
                closestdistance = 1000
                p1 = (crossrow["lat"],crossrow["lon"])
                for k,roadrow in roads[roads['road']==j].iterrows():
                    p2 = (roadrow["lat"],roadrow["lon"])
                    if distance.euclidean(p1, p2) < closestdistance:
                        closestrow = roadrow
                        closestdistance = distance.euclidean(p1, p2)
                print(closestrow,crossrow)
                roads[closestrow]["id"] = cross_sideroads[crossrow]["id"]
                roads[closestrow]["lat"] = cross_sideroads[crossrow]["lat"]
                roads[closestrow]["lon"] = cross_sideroads[crossrow]["lon"]

road                                    N2
chainage                                 0
lrp                                   LRPS
lat                                23.7059
lon                                90.5214
gap                                    NaN
type                                Others
name        Road Start from N1 at Katchpur
length                               -2790
id                                 1001681
Name: 2346, dtype: object road                         N1
chainage                  8.763
lrp                     LRP009a
lat                     23.7061
lon                     90.5215
gap                         NaN
type              SideRoad,Left
name        Road to Sylhet (N2)
length                      260
id                      1000017
Name: 17, dtype: object


ValueError: Cannot mask with non-boolean array containing NA / NaN values

In [72]:
p1 = (23.69, 90.5466)
p2 = (23.5, 92)
d = distance.euclidean(p1, p2)
print("Euclidean distance: ",d)

Euclidean distance:  1.4657665434850142


In [73]:
p1 = (23.69, 90.5466)
p2 = (23.5, 92.5)
d = distance.euclidean(p1, p2)
print("Euclidean distance: ",d)

Euclidean distance:  1.9626185467380075


In [74]:
p1 = (23.69, 90.5466)
p2 = (23.5, 91.5)
d = distance.euclidean(p1, p2)
print("Euclidean distance: ",d)

Euclidean distance:  0.9721479105568268


In [20]:
"N1" in roadnames

True

In [22]:
roadnames = roads['road'].unique().tolist()
names = roads['name'].unique().tolist()

test = [road for road in roadnames if any(road in name for name in names)]

In [23]:
len(roadnames)

847

In [24]:
len(test)

673

In [None]:
df['code'] = df[[x[0] in x[1] for x in zip(df['road'], df['name'])]]['road']

In [8]:
# the N1 between Dhaka and Chittagong are the first 164 entries
BridgesN1 = bridges.loc[(bridges["road"] == "N1") & (bridges["chainage"] < 241)]
BridgesN1 = BridgesN1.reset_index()

bridgestemp = BridgesN1

# drop all left side bridges, we checked that there is no condition data lost
for i in range(1,len(BridgesN1["road"])):
    if len(BridgesN1["name"][i]) > 4:
        if BridgesN1["name"][i][-2:] == 'L)' or BridgesN1["name"][i][-4:] == 'eft)' or BridgesN1["name"][i][-3:] == 'L )' or BridgesN1["name"][i][-4:] == 'EFT)':
            bridgestemp = bridgestemp.drop(i,axis = 0)


In [9]:
# for all remaining duplicates: drop the least recent one or the one with the missing value
# assumption: no 2 lrps in the same road have the exact same km
BridgesN1 = bridgestemp \
    .sort_values(by=['road','km','constructionYear'], ascending=False) \
    .drop_duplicates(subset=['road', 'km'], keep='first')

BridgesN1 = BridgesN1.rename(columns={"LRPName": "lrp"})

In [10]:
# merge the roads and bridges datafile
merged = pd.merge(roads, BridgesN1, how="outer", on=["road", "lrp", "length", "lat", "lon", "chainage"])

# add type of road/bridge
merged["model_type"] = merged["lrp"].apply(lambda x: "sink" if x == "LRPS" else ("source" if x == "LRPE" else "link"))
merged.loc[merged["condition"].notnull(), "model_type"] = "bridge"

In [11]:
merged = merged.sort_values(by=['road','chainage'],ascending = False)
merged = merged[2:]
merged = merged.reset_index()

In [12]:
merged["bridge_name"] = merged["name"]
merged["name"] = merged["model_type"]

# create the index as a column
merged["id"] = range(len(merged))
merged["id"] = merged["id"]+1000000