## Bridges EDA

In [16]:
import pandas as pd
import math

In [17]:
# Read the Bridges file
bridges = pd.read_excel("../data/BMMS_overview.xlsx")
bridges.head()

Unnamed: 0,road,km,type,LRPName,name,length,condition,structureNr,roadName,chainage,width,constructionYear,spans,zone,circle,division,sub-division,lat,lon,EstimatedLoc
0,N1,1.8,Box Culvert,LRP001a,.,11.3,A,117861,Dhaka (Jatrabari)-Comilla (Mainamati)-Chittago...,1.8,19.5,2005.0,2.0,Dhaka,Dhaka,Narayanganj,Narayanganj-1,23.698739,90.458861,interpolate
1,N1,4.925,Box Culvert,LRP004b,.,6.6,A,117862,Dhaka (Jatrabari)-Comilla (Mainamati)-Chittago...,4.925,35.4,2006.0,1.0,Dhaka,Dhaka,Narayanganj,Narayanganj-1,23.694664,90.487775,interpolate
2,N1,8.976,PC Girder Bridge,LRP008b,Kanch pur Bridge.,394.23,A,119889,Dhaka (Jatrabari)-Comilla (Mainamati)-Chittago...,8.976,,,,Dhaka,Dhaka,Narayanganj,Narayanganj-1,23.70506,90.523214,interpolate
3,N1,10.88,Box Culvert,LRP010b,NOYAPARA CULVERT,6.3,A,112531,Dhaka (Jatrabari)-Comilla (Mainamati)-Chittago...,10.88,12.2,1992.0,2.0,Dhaka,Dhaka,Narayanganj,Vitikandi,23.694391,90.537574,interpolate
4,N1,10.897,Box Culvert,LRP010c,ADUPUR CULVERT,6.3,A,112532,Dhaka (Jatrabari)-Comilla (Mainamati)-Chittago...,10.897,12.2,1984.0,2.0,Dhaka,Dhaka,Narayanganj,Vitikandi,23.694302,90.537707,interpolate


In [18]:
# Keep only bridges on the N1 road
bridges = bridges.loc[bridges["road"] == "N1"]

# Define bright length bins and labels
length_bins = [0, 10, 50, 200, math.inf]
length_labels = ['S', 'M', 'L', 'XL']

# Categorize bridges per length class
bridges["length_class"] = pd.cut(bridges["length"], bins=length_bins, include_lowest=False, right=False, labels=length_labels)

# Keep only useful columns
keep_columns = ['km', 'type', 'LRPName', 'name', 'length', 'condition', 'chainage', 'lat', 'lon', 'length_class']
bridges_clean = bridges[keep_columns]
bridges_clean

Unnamed: 0,km,type,LRPName,name,length,condition,chainage,lat,lon,length_class
0,1.800,Box Culvert,LRP001a,.,11.30,A,1.800,23.698739,90.458861,M
1,4.925,Box Culvert,LRP004b,.,6.60,A,4.925,23.694664,90.487775,S
2,8.976,PC Girder Bridge,LRP008b,Kanch pur Bridge.,394.23,A,8.976,23.705060,90.523214,XL
3,10.880,Box Culvert,LRP010b,NOYAPARA CULVERT,6.30,A,10.880,23.694391,90.537574,S
4,10.897,Box Culvert,LRP010c,ADUPUR CULVERT,6.30,A,10.897,23.694302,90.537707,S
...,...,...,...,...,...,...,...,...,...,...
19378,426.915,RCC Girder Bridge,LRP423b,Balu Khali Bridge,7.90,D,426.915,21.129768,92.187047,S
19379,427.622,RCC Girder Bridge,LRP424a,Wheke Kang Bridge,6.40,D,427.622,21.125104,92.191367,S
19380,429.081,RCC Girder Bridge,LRP425c,Tasse Bridge,12.80,D,429.081,21.114558,92.198021,M
19381,436.789,RCC Girder Bridge,LRP433b,Naya Bazar(2) Bridge,9.40,D,436.789,21.056294,92.226645,S


In [19]:
# Filters out all variations of (R), (Right), (  R  ), etc.
patternDel = "\(\s*[a-zA-Z]*(R|r)[a-zA-Z]*\s*\)"
filter = bridges_clean['name'].str.contains(patternDel)
bridges_clean2 = bridges_clean[~filter]

# Sort on name ascending, then condition descending (taking the worst) and km ascending
bridges_clean2 = bridges_clean2.sort_values(["km", "condition", "name"], ascending=[True, False, True])

# Calculate distance between bridges
bridges_clean2["dist"] = bridges_clean2["km"].diff()
bridges_clean2

  return func(self, *args, **kwargs)


Unnamed: 0,km,type,LRPName,name,length,condition,chainage,lat,lon,length_class,dist
0,1.800,Box Culvert,LRP001a,.,11.30,A,1.800,23.698739,90.458861,M,
1,4.925,Box Culvert,LRP004b,.,6.60,A,4.925,23.694664,90.487775,S,3.125
12706,8.976,PC Girder Bridge,LRP008b,KANCHPUR PC GIRDER BRIDGE,397.00,C,8.976,23.705060,90.523214,XL,4.051
2,8.976,PC Girder Bridge,LRP008b,Kanch pur Bridge.,394.23,A,8.976,23.705060,90.523214,XL,0.000
14585,10.543,Box Culvert,LRP010a,KATCHPUR BOX CULVERT,8.00,B,10.543,23.696400,90.535099,S,1.567
...,...,...,...,...,...,...,...,...,...,...,...
14739,458.799,RCC Girder Bridge,LRP455a,TEKNAF NAITANG RCC GIDER BRIDGE,37.30,B,458.799,20.890158,92.295718,M,0.000
465,459.681,Box Culvert,LRP456a,BUS STAND TEKNAF BOX CULVERT,1.50,A,459.681,20.884567,92.298716,S,0.882
15733,459.866,Box Culvert,LRP456b,BUS STAND TEKNAF BOX CULVERT,1.50,C,459.866,20.883070,92.298163,S,0.185
15734,460.113,RCC Girder Bridge,LRP456c,YAKNAT BRIDGE,35.90,C,460.113,20.880985,92.297777,M,0.247


In [20]:
# TODO: Don't import first bridge!

# Drop all bridges that are closer to the previous bridge then their own length
bridges_clean3 = bridges_clean2.loc[bridges_clean2["dist"]*1000 >= bridges_clean2["length"]]

## Convert to example data format

TODO:
 - Calculate length between each bridge (without the bridge length itself)
 - Convert column names to be identical to example data

In [21]:
# bridges_clean3.to_csv('bridges.csv')

In [22]:
bridges_clean3["model_type"] = 'bridge'
bridges_clean3.head()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  bridges_clean3["model_type"] = 'bridge'


Unnamed: 0,km,type,LRPName,name,length,condition,chainage,lat,lon,length_class,dist,model_type
1,4.925,Box Culvert,LRP004b,.,6.6,A,4.925,23.694664,90.487775,S,3.125,bridge
12706,8.976,PC Girder Bridge,LRP008b,KANCHPUR PC GIRDER BRIDGE,397.0,C,8.976,23.70506,90.523214,XL,4.051,bridge
14585,10.543,Box Culvert,LRP010a,KATCHPUR BOX CULVERT,8.0,B,10.543,23.6964,90.535099,S,1.567,bridge
3,10.88,Box Culvert,LRP010b,NOYAPARA CULVERT,6.3,A,10.88,23.694391,90.537574,S,0.337,bridge
4,10.897,Box Culvert,LRP010c,ADUPUR CULVERT,6.3,A,10.897,23.694302,90.537707,S,0.017,bridge


Todo:

** Add the starting and ending point of the N1
- Coordinates
- Give them  model_type sourcesink
-
** Add new rows after all bridges
- give them model_type link
- dist = dist - length / 1000
- give them coordinates of the end point

It can also be an idea to look for the road information in _roads3.csv

In [23]:
bridges1 = bridges_clean3.copy()
bridges1.tail()

Unnamed: 0,km,type,LRPName,name,length,condition,chainage,lat,lon,length_class,dist,model_type
14738,458.213,RCC Girder Bridge,LRP454b,NAITOM PARA RCC GIDER BRIDGE,9.0,B,458.213,20.892271,92.290984,S,0.462,bridge
15732,458.799,RCC Girder Bridge,LRP455a,Eskahal Bridge,37.3,C,458.799,20.890158,92.295718,M,0.586,bridge
465,459.681,Box Culvert,LRP456a,BUS STAND TEKNAF BOX CULVERT,1.5,A,459.681,20.884567,92.298716,S,0.882,bridge
15733,459.866,Box Culvert,LRP456b,BUS STAND TEKNAF BOX CULVERT,1.5,C,459.866,20.88307,92.298163,S,0.185,bridge
15734,460.113,RCC Girder Bridge,LRP456c,YAKNAT BRIDGE,35.9,C,460.113,20.880985,92.297777,M,0.247,bridge


In [24]:
# bridges[['LRPName', 'model_type', 'name', 'lat', 'lon', 'length']]
bridges1 = bridges1.reset_index()
bridges1 = bridges1[bridges1.LRPName != 'LRP018a']
bridges1

Unnamed: 0,index,km,type,LRPName,name,length,condition,chainage,lat,lon,length_class,dist,model_type
0,1,4.925,Box Culvert,LRP004b,.,6.6,A,4.925,23.694664,90.487775,S,3.125,bridge
1,12706,8.976,PC Girder Bridge,LRP008b,KANCHPUR PC GIRDER BRIDGE,397.0,C,8.976,23.705060,90.523214,XL,4.051,bridge
2,14585,10.543,Box Culvert,LRP010a,KATCHPUR BOX CULVERT,8.0,B,10.543,23.696400,90.535099,S,1.567,bridge
3,3,10.880,Box Culvert,LRP010b,NOYAPARA CULVERT,6.3,A,10.880,23.694391,90.537574,S,0.337,bridge
4,4,10.897,Box Culvert,LRP010c,ADUPUR CULVERT,6.3,A,10.897,23.694302,90.537707,S,0.017,bridge
...,...,...,...,...,...,...,...,...,...,...,...,...,...
612,14738,458.213,RCC Girder Bridge,LRP454b,NAITOM PARA RCC GIDER BRIDGE,9.0,B,458.213,20.892271,92.290984,S,0.462,bridge
613,15732,458.799,RCC Girder Bridge,LRP455a,Eskahal Bridge,37.3,C,458.799,20.890158,92.295718,M,0.586,bridge
614,465,459.681,Box Culvert,LRP456a,BUS STAND TEKNAF BOX CULVERT,1.5,A,459.681,20.884567,92.298716,S,0.882,bridge
615,15733,459.866,Box Culvert,LRP456b,BUS STAND TEKNAF BOX CULVERT,1.5,C,459.866,20.883070,92.298163,S,0.185,bridge


In [52]:
cols = ['LRPName', 'model_type', 'name', 'lat', 'lon', 'length', 'condition'] # Later column 'road' toevoegen.
df = pd.DataFrame(columns=cols)
for index, row in bridges1.iterrows():
    df = df.append(row[['LRPName', 'model_type', 'name', 'lat', 'lon', 'length', 'condition']], ignore_index=True)
    road_id = row['LRPName'] + 'r'
    road_index = f'road{index}'
    if index >= 616:
        break
    road_lat = (row['lat'] + bridges.iloc[index+1]['lat']) / 2
    road_lon = (row['lon'] + bridges.iloc[index+1]['lon']) / 2
    road_length = row['dist']*1000 - row['length']
    road_condition = '-'
    new_row = {'LRPName' : road_id, 'model_type' : 'link', 'name' : road_index,
               'lat' : road_lat, 'lon' : road_lon, 'length' : road_length, 'condition' : road_condition}
    df = df.append(new_row, ignore_index=True)
df.head()  

Unnamed: 0,LRPName,model_type,name,lat,lon,length,condition
0,LRP004b,bridge,.,23.694664,90.487775,6.6,A
1,LRP004br,link,road0,23.694664,90.487775,3118.4,-
2,LRP008b,bridge,KANCHPUR PC GIRDER BRIDGE,23.70506,90.523214,397.0,C
3,LRP008br,link,road1,23.70506,90.523214,3654.0,-
4,LRP010a,bridge,KATCHPUR BOX CULVERT,23.6964,90.535099,8.0,B


In [53]:
result = df.copy()
result.rename(columns={'LRPName' : 'id'}, inplace=True)
result['road'] = 'N1'
result1 = result[['road', 'id', 'model_type', 'name', 'lat', 'lon', 'length', 'condition']]
result1.head()

Unnamed: 0,road,id,model_type,name,lat,lon,length,condition
0,N1,LRP004b,bridge,.,23.694664,90.487775,6.6,A
1,N1,LRP004br,link,road0,23.694664,90.487775,3118.4,-
2,N1,LRP008b,bridge,KANCHPUR PC GIRDER BRIDGE,23.70506,90.523214,397.0,C
3,N1,LRP008br,link,road1,23.70506,90.523214,3654.0,-
4,N1,LRP010a,bridge,KATCHPUR BOX CULVERT,23.6964,90.535099,8.0,B


In [54]:
result1['model_type'].iloc[0] = 'source'
result1['model_type'].iloc[-1] = 'sink'
result1

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_block(indexer, value, name)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  iloc._setitem_with_indexer(indexer, value, self.name)


Unnamed: 0,road,id,model_type,name,lat,lon,length,condition
0,N1,LRP004b,source,.,23.694664,90.487775,6.6,A
1,N1,LRP004br,link,road0,23.694664,90.487775,3118.4,-
2,N1,LRP008b,bridge,KANCHPUR PC GIRDER BRIDGE,23.705060,90.523214,397.0,C
3,N1,LRP008br,link,road1,23.705060,90.523214,3654.0,-
4,N1,LRP010a,bridge,KATCHPUR BOX CULVERT,23.696400,90.535099,8.0,B
...,...,...,...,...,...,...,...,...
1224,N1,LRP456a,bridge,BUS STAND TEKNAF BOX CULVERT,20.884567,92.298716,1.5,A
1225,N1,LRP456ar,link,road614,21.037670,92.233393,880.5,-
1226,N1,LRP456b,bridge,BUS STAND TEKNAF BOX CULVERT,20.883070,92.298163,1.5,C
1227,N1,LRP456br,link,road615,21.030244,92.227600,183.5,-


In [55]:
result1.to_csv('../data/simulation_file_N1.csv')