## Bridges EDA

In [24]:
import pandas as pd
import math

In [76]:
# Read the Bridges file
bridges = pd.read_excel("../data/BMMS_overview.xlsx")
bridges

Unnamed: 0,road,km,type,LRPName,name,length,condition,structureNr,roadName,chainage,width,constructionYear,spans,zone,circle,division,sub-division,lat,lon,EstimatedLoc
0,N1,1.800,Box Culvert,LRP001a,.,11.30,A,117861,Dhaka (Jatrabari)-Comilla (Mainamati)-Chittago...,1.800,19.5,2005.0,2.0,Dhaka,Dhaka,Narayanganj,Narayanganj-1,23.698739,90.458861,interpolate
1,N1,4.925,Box Culvert,LRP004b,.,6.60,A,117862,Dhaka (Jatrabari)-Comilla (Mainamati)-Chittago...,4.925,35.4,2006.0,1.0,Dhaka,Dhaka,Narayanganj,Narayanganj-1,23.694664,90.487775,interpolate
2,N1,8.976,PC Girder Bridge,LRP008b,Kanch pur Bridge.,394.23,A,119889,Dhaka (Jatrabari)-Comilla (Mainamati)-Chittago...,8.976,,,,Dhaka,Dhaka,Narayanganj,Narayanganj-1,23.705060,90.523214,interpolate
3,N1,10.880,Box Culvert,LRP010b,NOYAPARA CULVERT,6.30,A,112531,Dhaka (Jatrabari)-Comilla (Mainamati)-Chittago...,10.880,12.2,1992.0,2.0,Dhaka,Dhaka,Narayanganj,Vitikandi,23.694391,90.537574,interpolate
4,N1,10.897,Box Culvert,LRP010c,ADUPUR CULVERT,6.30,A,112532,Dhaka (Jatrabari)-Comilla (Mainamati)-Chittago...,10.897,12.2,1984.0,2.0,Dhaka,Dhaka,Narayanganj,Vitikandi,23.694302,90.537707,interpolate
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
20410,Z8806,83.728,Box Culvert,LRP083a,.,1.78,D,110470,Lebukhali-Dumki-Boga-Bauphal-Kalaiya-Dashmina-...,83.728,7.6,1980.0,1.0,Barisal,Patuakhali,Barguna,Barguna,22.183448,90.299962,interpolate
20411,Z8810,3.981,Steel Beam & RCC Slab,LRP003b,.,27.20,D,113458,Bakerganj-Padrishibpur-Kathaltali-Subidkhali-B...,3.981,3.2,1979.0,5.0,Barisal,Barisal,Barisal,Barisal-1,22.519353,90.310063,interpolate
20412,Z8814,20.730,Baily with Steel Deck,LRP020b,Kalbari Bridge,30.50,D,121147,Barguna (Dakhin Ramna Kheya Ghat)-Dewatala-Mat...,20.730,,,,Barisal,Barisal,Pirojpur,Kawkhali,22.282704,89.968512,interpolate
20413,Z8910,28.880,RCC Girder Bridge,LRP028a,Narainpur Bridge,9.50,D,119249,Barisal (Bairagirpool)-Tumchar-Bauphaul Road,28.880,,,,Barisal,Patuakhali,Patuakhali,Patuakhali,22.510650,90.535314,interpolate


In [79]:
# Keep only bridges on the N1 and N2 roads
#bridges = bridges.loc[bridges["road"] == "N1"]
bridges = bridges[(bridges["road"].str.contains(pat = ("N1"), regex = False)) | (bridges["road"].str.contains(pat = ("N2"), regex = False))]

# Drop roads smaller than 25 km. 
smallroads = bridges[(bridges['length'] < 25) & (bridges['road'] != 'N1') & (bridges['road'] != 'N2')].index
bridges = bridges.drop(smallroads)

# Define bright length bins and labels
length_bins = [0, 10, 50, 200, math.inf]
length_labels = ['S', 'M', 'L', 'XL']

# Categorize bridges per length class
bridges["length_class"] = pd.cut(bridges["length"], bins=length_bins, include_lowest=False, right=False, labels=length_labels)

# Keep only useful columns
keep_columns = ['road', 'km', 'type', 'LRPName', 'name', 'length', 'condition', 'chainage', 'lat', 'lon', 'length_class']
bridges_clean = bridges[keep_columns]
bridges_clean

Unnamed: 0,road,km,type,LRPName,name,length,condition,chainage,lat,lon,length_class
0,N1,1.800,Box Culvert,LRP001a,.,11.30,A,1.800,23.698739,90.458861,M
1,N1,4.925,Box Culvert,LRP004b,.,6.60,A,4.925,23.694664,90.487775,S
2,N1,8.976,PC Girder Bridge,LRP008b,Kanch pur Bridge.,394.23,A,8.976,23.705060,90.523214,XL
3,N1,10.880,Box Culvert,LRP010b,NOYAPARA CULVERT,6.30,A,10.880,23.694391,90.537574,S
4,N1,10.897,Box Culvert,LRP010c,ADUPUR CULVERT,6.30,A,10.897,23.694302,90.537707,S
...,...,...,...,...,...,...,...,...,...,...,...
19424,N102,68.007,RCC Girder Bridge,LRP067c,RAMRAIL BRIDGE.,48.40,D,68.007,23.932813,91.116812,M
19431,N106,40.007,Steel Beam & RCC Slab,LRP042a,RANIRHAT BRIDGE,95.00,D,40.007,22.563635,92.037963,L
19433,N107,7.288,RCC Girder Bridge,LRP007a,Military pool,73.00,D,7.288,22.340325,91.917067,L
19506,N208,24.890,RCC Girder Bridge,LRP025b,Damai Bridge,41.55,D,24.890,24.610259,91.904383,M


In [63]:
# Filters out all variations of (R), (Right), (  R  ), etc.
patternDel = "\(\s*[a-zA-Z]*(R|r)[a-zA-Z]*\s*\)"
filter = bridges_clean['name'].str.contains(patternDel)
bridges_clean2 = bridges_clean[~filter]

# Sort on name ascending, then condition descending (taking the worst) and km ascending
bridges_clean2 = bridges_clean2.sort_values(["km", "condition", "name"], ascending=[True, False, True])

# Calculate distance between bridges
bridges_clean2["dist"] = bridges_clean2["km"].diff()

# We save the first bridge because it needs to be added later.
first_bridge = bridges_clean2.iloc[0]

bridges_clean2.head()

  return func(self, *args, **kwargs)


Unnamed: 0,road,km,type,LRPName,name,length,condition,chainage,lat,lon,length_class,dist
2622,N205,0.022,Box Culvert,LRPSa,CHONDRI BOX CULVERT,5.6,A,0.022,24.86791,91.857191,S,
16049,N105,0.024,Box Culvert,LRPSa,MODONPUR,12.8,C,0.024,23.690608,90.546717,M,0.002
2048,N107,0.052,Baily with Steel Deck,LRPSa,KALURGHAT BRIDGE,23.8,A,0.052,22.394042,91.892057,M,0.028
2750,N209,0.156,Box Culvert,LRPSa,MENIKHOLA CULVERT,2.45,A,0.156,24.878769,91.87531,S,0.104
1921,N104,0.349,Slab Culvert,LRPSa,Shasaugtachha culvert,3.65,A,0.349,23.009362,91.396149,S,0.193


In [64]:
# Drop all bridges that are closer to the previous bridge then their own length
bridges_clean3 = bridges_clean2.loc[bridges_clean2["dist"]*1000 >= bridges_clean2["length"]]

## Convert to example data format

In [65]:
# bridges_clean3.to_csv('bridges.csv')

In [66]:
# The bridges get a column 'model_type' which is named to bridge for every bridge.
bridges_clean3["model_type"] = 'bridge'
bridges_clean3.head()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  bridges_clean3["model_type"] = 'bridge'


Unnamed: 0,road,km,type,LRPName,name,length,condition,chainage,lat,lon,length_class,dist,model_type
2048,N107,0.052,Baily with Steel Deck,LRPSa,KALURGHAT BRIDGE,23.8,A,0.052,22.394042,91.892057,M,0.028,bridge
2750,N209,0.156,Box Culvert,LRPSa,MENIKHOLA CULVERT,2.45,A,0.156,24.878769,91.87531,S,0.104,bridge
1921,N104,0.349,Slab Culvert,LRPSa,Shasaugtachha culvert,3.65,A,0.349,23.009362,91.396149,S,0.193,bridge
2093,N110,0.371,Box Culvert,LRPSa,zilinza kali,8.3,A,0.371,21.424832,92.020022,S,0.022,bridge
15060,N104,0.417,Slab Culvert,LRPSb,S.S.SK ROAD SLAB CULVERT,2.7,B,0.417,23.009302,91.395512,S,0.035,bridge


In [67]:
bridges1 = bridges_clean3.copy()

# To drop the duplicate Darikandi bridge we delete the value from the column
bridges1['LRPName'] = bridges1['LRPName'].drop_duplicates()
# And delete the rows with a NaN value. (There were no other NaN values in the DataFrame).
bridges1 = bridges1.dropna()

# The previously saved bridge is appended to the end of the flipped DataFrame
bridges1 = bridges1.append(first_bridge)
bridges1 = bridges1.reset_index()
bridges1

Unnamed: 0,index,road,km,type,LRPName,name,length,condition,chainage,lat,lon,length_class,dist,model_type
0,2048,N107,0.052,Baily with Steel Deck,LRPSa,KALURGHAT BRIDGE,23.8,A,0.052,22.394042,91.892057,M,0.028,bridge
1,15060,N104,0.417,Slab Culvert,LRPSb,S.S.SK ROAD SLAB CULVERT,2.7,B,0.417,23.009302,91.395512,S,0.035,bridge
2,15279,N204,0.806,Truss with RCC Slab,LRP001a,TELARIA CHARA,40.0,B,0.806,24.146027,91.351306,M,0.156,bridge
3,16051,N106,0.818,Box Culvert,LRPSc,TANARI BOX CULVERT,2.1,C,0.818,22.400440,91.819371,S,0.012,bridge
4,2095,N110,0.934,Box Culvert,LRPsd,DANARGONA,3.0,A,0.934,21.424587,92.014666,S,0.006,bridge
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1020,15732,N1,458.799,RCC Girder Bridge,LRP455a,Eskahal Bridge,37.3,C,458.799,20.890158,92.295718,M,0.586,bridge
1021,465,N1,459.681,Box Culvert,LRP456a,BUS STAND TEKNAF BOX CULVERT,1.5,A,459.681,20.884567,92.298716,S,0.882,bridge
1022,15733,N1,459.866,Box Culvert,LRP456b,BUS STAND TEKNAF BOX CULVERT,1.5,C,459.866,20.883070,92.298163,S,0.185,bridge
1023,15734,N1,460.113,RCC Girder Bridge,LRP456c,YAKNAT BRIDGE,35.9,C,460.113,20.880985,92.297777,M,0.247,bridge


In [69]:
# Now we add the rows in between the bridges with the correct data.
cols = ['road', 'LRPName', 'model_type', 'name', 'lat', 'lon', 'length', 'condition', 'length_class']

#Create an empty DataFrame
df = pd.DataFrame(columns=cols)

#Iterate over the rows with an index to refer to the index within the loop.
for index, row in bridges1.iterrows():
    #Add the bridge
    df = df.append(row[cols], ignore_index=True)
    #stop at the last bridge because that will become the sink.
    if index >= bridges1.shape[0]-1:
        break
    # Save all the road information that is needed.
    road_id = row['LRPName'] + 'r'
    road_index = f'road{index}'
    # The location of the road will be in the middle of the two bridges it connects.
    road_lat = (row['lat'] + bridges1.iloc[index+1]['lat']) / 2
    road_lon = (row['lon'] + bridges1.iloc[index+1]['lon']) / 2
    road_length = bridges1.iloc[index+1]['dist']*1000 - row['length']
    # Roads don't have a condition or a length class
    road_condition = '-'
    road_length_class = '-'
    # The new row is created and appended after the preceding bridge.
    new_row = {'LRPName' : road_id, 'model_type' : 'link', 'name' : road_index,
               'lat' : road_lat, 'lon' : road_lon, 'length' : road_length, 'condition' : road_condition, 'length_class' : road_length_class}
    df = df.append(new_row, ignore_index=True)
df.head()  

Unnamed: 0,road,LRPName,model_type,name,lat,lon,length,condition,length_class
0,N107,LRPSa,bridge,KALURGHAT BRIDGE,22.394042,91.892057,23.8,A,M
1,,LRPSar,link,road0,22.701672,91.643785,11.2,-,-
2,N104,LRPSb,bridge,S.S.SK ROAD SLAB CULVERT,23.009302,91.395512,2.7,B,S
3,,LRPSbr,link,road1,23.577665,91.373409,153.3,-,-
4,N204,LRP001a,bridge,TELARIA CHARA,24.146027,91.351306,40.0,B,M


In [71]:
# With the finished DataFrame it is made ready to be exported in the correct format which the model can read.
result = df.copy()
result.rename(columns={'LRPName' : 'id'}, inplace=True)

result1 = result[['road', 'id', 'model_type', 'name', 'lat', 'lon', 'length', 'condition', 'length_class']]
result1.head()

Unnamed: 0,road,id,model_type,name,lat,lon,length,condition,length_class
0,N107,LRPSa,bridge,KALURGHAT BRIDGE,22.394042,91.892057,23.8,A,M
1,,LRPSar,link,road0,22.701672,91.643785,11.2,-,-
2,N104,LRPSb,bridge,S.S.SK ROAD SLAB CULVERT,23.009302,91.395512,2.7,B,S
3,,LRPSbr,link,road1,23.577665,91.373409,153.3,-,-
4,N204,LRP001a,bridge,TELARIA CHARA,24.146027,91.351306,40.0,B,M


In [84]:
# The first bridge will be the source and the last bridge will be the sink.
result1['model_type'].iloc[0] = 'SourceSink'
result1['model_type'].iloc[-1] = 'SourceSink'
result1

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_block(indexer, value, name)


Unnamed: 0,road,id,model_type,name,lat,lon,length,condition,length_class
0,N107,LRPSa,SourceSink,KALURGHAT BRIDGE,22.394042,91.892057,23.8,A,M
1,,LRPSar,link,road0,22.701672,91.643785,11.2,-,-
2,N104,LRPSb,bridge,S.S.SK ROAD SLAB CULVERT,23.009302,91.395512,2.7,B,S
3,,LRPSbr,link,road1,23.577665,91.373409,153.3,-,-
4,N204,LRP001a,bridge,TELARIA CHARA,24.146027,91.351306,40.0,B,M
...,...,...,...,...,...,...,...,...,...
2044,N1,LRP456b,bridge,BUS STAND TEKNAF BOX CULVERT,20.883070,92.298163,1.5,C,S
2045,,LRP456br,link,road1022,20.882028,92.297970,245.5,-,-
2046,N1,LRP456c,bridge,YAKNAT BRIDGE,20.880985,92.297777,35.9,C,M
2047,,LRP456cr,link,road1023,22.874448,92.077484,,-,-


In [12]:
# Export the finished file to a CSV file in the data folder.
result1.to_csv('../data/simulation_file_N1_copy.csv')