In [1]:
import pandas as pd

In [2]:
demo1 = pd.read_csv('../data/demo-1.csv')

In [3]:
demo1

Unnamed: 0,road,id,model_type,name,lat,lon,length
0,N1,1000000,source,source,0,0,4
1,N1,1000001,link,link 1,1,1,1800
2,N1,1000002,bridge,bridge 1,2,2,200
3,N1,1000003,link,link 2,3,3,1500
4,N1,1000004,bridge,bridge 2,4,4,8
5,N1,1000005,link,link 3,5,5,2000
6,N1,1000006,bridge,bridge 3,6,6,4
7,N1,1000007,link,link 4,7,7,2500
8,N1,1000008,bridge,bridge 4,8,8,500
9,N1,1000009,link,link 5,9,9,800


In [4]:
roads = pd.read_csv('../data/_roads3.csv')
bmms = pd.read_excel('../data/BMMS_overview.xlsx')

In [5]:
# Filter the 'roads' DataFrame for rows where the 'road' column is 'N1'
n1_roads = roads[roads['road'] == 'N1']
n1_roads = n1_roads[(n1_roads['lon'] >= 90.44) & (n1_roads['lon'] <= 91.851) &
                 (n1_roads['lat'] >= 22.363) & (n1_roads['lat'] <= 23.711)]

# Generate an ID sequence starting from 1
n1_roads['id'] = range(1, len(n1_roads) + 1)

# Set model_type to 'link' for all rows
n1_roads['model_type'] = 'link'

# Generate 'name' as "Link" + id as string
n1_roads['name'] = ['Link ' + str(id) for id in n1_roads['id']]

# Calculate 'length' as difference between this row's 'chainage' and the next row's 'chainage'
# Shift(-1) moves the chainage up by one row to subtract, fillna(0) to handle the last item
n1_roads['length'] = (n1_roads['chainage'].shift(-1) - n1_roads['chainage']).fillna(0)

# Selecting the columns needed for the empty DataFrame
n1_roads_final = n1_roads[['road', 'id', 'model_type', 'name', 'lat', 'lon', 'length', 'chainage']]

# Display the first few rows of the final DataFrame
n1_roads_final


Unnamed: 0,road,id,model_type,name,lat,lon,length,chainage
0,N1,1,link,Link 1,23.706028,90.443333,0.814,0.000
1,N1,2,link,Link 2,23.702917,90.450417,0.008,0.814
2,N1,3,link,Link 3,23.702778,90.450472,0.178,0.822
3,N1,4,link,Link 4,23.702139,90.451972,1.000,1.000
4,N1,5,link,Link 5,23.697889,90.460583,0.130,2.000
...,...,...,...,...,...,...,...,...
560,N1,561,link,Link 561,22.376472,91.770416,0.019,231.442
561,N1,562,link,Link 562,22.376225,91.770543,0.545,231.461
562,N1,563,link,Link 563,22.369166,91.774194,0.555,232.006
563,N1,564,link,Link 564,22.369138,91.777666,8.895,232.561


In [6]:
# Filter BMMS data for road 'N1'
bmms_n1 = bmms[bmms['road'] == 'N1'].copy()
bmms_n1 = bmms_n1[(bmms_n1['lon'] >= 90.44) & (bmms_n1['lon'] <= 91.851) &
                 (bmms_n1['lat'] >= 22.363) & (bmms_n1['lat'] <= 23.711)]

bmms_n1 = bmms_n1[~bmms_n1['name'].str.contains(r"\(R\)", na=False)]
bmms_n1 = bmms_n1[~bmms_n1['name'].str.contains("right", case=False, na=False)]
bmms_n1 = bmms_n1[~bmms_n1['name'].str.contains("RIGHT", case=False, na=False)]
bmms_n1 = bmms_n1[~bmms_n1['name'].str.contains(r"\( R \)", na=False)]

# Set up for new entries
bmms_n1['model_type'] = 'bridge'
bmms_n1['name'] = ['Bridge ' + str(i+1) for i in range(bmms_n1.shape[0])]
bmms_n1['id'] = range(n1_roads_final['id'].max() + 1, n1_roads_final['id'].max() + 1 + bmms_n1.shape[0])
bmms_n1['chainage'] = bmms_n1['km']  # Use 'km' as 'chainage'
bmms_n1['length'] = bmms_n1['length'] / 1000

# Select and rename columns to match the format of `n1_roads_final_with_chainage`
bmms_n1_formatted = bmms_n1[['road', 'id', 'model_type', 'name', 'lat', 'lon', 'chainage', 'length', 'condition']]

# Combine the dataframes and sort by chainage
combined_df = pd.concat([n1_roads_final, bmms_n1_formatted], ignore_index=True).sort_values(by='chainage')

# Display the first few rows of the combined and sorted DataFrame
combined_df


Unnamed: 0,road,id,model_type,name,lat,lon,length,chainage,condition
0,N1,1,link,Link 1,23.706028,90.443333,0.8140,0.000,
1,N1,2,link,Link 2,23.702917,90.450417,0.0080,0.814,
2,N1,3,link,Link 3,23.702778,90.450472,0.1780,0.822,
3,N1,4,link,Link 4,23.702139,90.451972,1.0000,1.000,
565,N1,566,bridge,Bridge 1,23.698739,90.458861,0.0113,1.800,A
...,...,...,...,...,...,...,...,...,...
561,N1,562,link,Link 562,22.376225,91.770543,0.5450,231.461,
706,N1,707,bridge,Bridge 142,22.369684,91.773926,0.0030,231.966,A
562,N1,563,link,Link 563,22.369166,91.774194,0.5550,232.006,
563,N1,564,link,Link 564,22.369138,91.777666,8.8950,232.561,


In [7]:
combined_df.iloc[0, 2] = 'source'
combined_df.iloc[-1, combined_df.columns.get_loc('model_type')] = 'sink'
combined_df.reset_index(drop=True, inplace=True)
count = 1
for index, row in combined_df.iterrows():
    combined_df.iloc[index, 1] = count
    count += 1


In [8]:
#add together the links
length = 0
rows_to_add = []  # List to accumulate rows
last_row = None

# Initialize the DataFrame
n1_combined = pd.DataFrame(columns=['road', "id", 'model_type', 'name', 'length', 'condition'])

for index, row in combined_df.iterrows():
    if row['model_type'] == 'source':
        rows_to_add.append({'road': row['road'], 'id': row['id'], 'model_type': row['model_type'],
                            'name': row['name'], 'length': row['length'], 'condition': row['condition']})
        length = 0  # Reset length after adding the combined link
    elif row['model_type'] == 'link':
        length += row['length']
    elif row['model_type'] in ['bridge', 'sink']:
        if last_row is not None and last_row['model_type'] == 'link':
            # Add the previous link with the new length
            rows_to_add.append({'road': last_row['road'], 'id': last_row['id'], 'model_type': last_row['model_type'],
                                'name': last_row['name'], 'length': length, 'condition': last_row['condition']})
        # Now add the current row (bridge or sink)
        rows_to_add.append({'road': row['road'], 'id': row['id'], 'model_type': row['model_type'],
                            'name': row['name'], 'length': row['length'], 'condition': row['condition']})
        length = 0  # Reset length after adding the combined link
    last_row = row

# Once the loop is complete, add all accumulated rows to n1_combined
n1_combined = pd.concat([n1_combined, pd.DataFrame(rows_to_add)], ignore_index=True)
n1_combined

  n1_combined = pd.concat([n1_combined, pd.DataFrame(rows_to_add)], ignore_index=True)


Unnamed: 0,road,id,model_type,name,length,condition
0,N1,1,source,Link 1,0.8140,
1,N1,4,link,Link 4,1.1860,
2,N1,5,bridge,Bridge 1,0.0113,A
3,N1,10,link,Link 9,3.0000,
4,N1,11,bridge,Bridge 2,0.0066,A
...,...,...,...,...,...,...
432,N1,801,bridge,Bridge 141,0.0083,A
433,N1,804,link,Link 562,1.0000,
434,N1,805,bridge,Bridge 142,0.0030,A
435,N1,807,link,Link 564,9.4500,


In [9]:
combined_df.to_csv('../data/n1_model.csv', index=False)

In [10]:
n1 = pd.read_csv('../data/n1_model.csv')

In [11]:
n1

Unnamed: 0,road,id,model_type,name,lat,lon,length,chainage,condition
0,N1,1,source,Link 1,23.706028,90.443333,0.8140,0.000,
1,N1,2,link,Link 2,23.702917,90.450417,0.0080,0.814,
2,N1,3,link,Link 3,23.702778,90.450472,0.1780,0.822,
3,N1,4,link,Link 4,23.702139,90.451972,1.0000,1.000,
4,N1,5,bridge,Bridge 1,23.698739,90.458861,0.0113,1.800,A
...,...,...,...,...,...,...,...,...,...
803,N1,804,link,Link 562,22.376225,91.770543,0.5450,231.461,
804,N1,805,bridge,Bridge 142,22.369684,91.773926,0.0030,231.966,A
805,N1,806,link,Link 563,22.369166,91.774194,0.5550,232.006,
806,N1,807,link,Link 564,22.369138,91.777666,8.8950,232.561,


In [12]:
#add together the links
length = 0
rows_to_add = []  # List to accumulate rows
last_row = None

# Initialize the DataFrame
n1_combined = pd.DataFrame(columns=['road', "id", 'model_type', 'name', 'length', 'condition'])

for index, row in n1.iterrows():
    if row['model_type'] == 'source':
        rows_to_add.append({'road': row['road'], 'id': row['id'], 'model_type': row['model_type'],
                            'name': row['name'], 'length': row['length'], 'condition': row['condition']})
        length = 0  # Reset length after adding the combined link
    elif row['model_type'] == 'link':
        length += row['length']
    elif row['model_type'] in ['bridge', 'sink']:
        if last_row is not None and last_row['model_type'] == 'link':
            # Add the previous link with the new length
            rows_to_add.append({'road': last_row['road'], 'id': last_row['id'], 'model_type': last_row['model_type'],
                                'name': last_row['name'], 'length': length, 'condition': last_row['condition']})
        # Now add the current row (bridge or sink)
        rows_to_add.append({'road': row['road'], 'id': row['id'], 'model_type': row['model_type'],
                            'name': row['name'], 'length': row['length'], 'condition': row['condition']})
        length = 0  # Reset length after adding the combined link
    last_row = row

# Once the loop is complete, add all accumulated rows to n1_combined
n1_combined = pd.concat([n1_combined, pd.DataFrame(rows_to_add)], ignore_index=True)
n1_combined

  n1_combined = pd.concat([n1_combined, pd.DataFrame(rows_to_add)], ignore_index=True)


Unnamed: 0,road,id,model_type,name,length,condition
0,N1,1,source,Link 1,0.8140,
1,N1,4,link,Link 4,1.1860,
2,N1,5,bridge,Bridge 1,0.0113,A
3,N1,10,link,Link 9,3.0000,
4,N1,11,bridge,Bridge 2,0.0066,A
...,...,...,...,...,...,...
432,N1,801,bridge,Bridge 141,0.0083,A
433,N1,804,link,Link 562,1.0000,
434,N1,805,bridge,Bridge 142,0.0030,A
435,N1,807,link,Link 564,9.4500,


In [26]:
bmms_n1

Unnamed: 0,road,km,type,LRPName,name,length,condition,structureNr,roadName,chainage,...,spans,zone,circle,division,sub-division,lat,lon,EstimatedLoc,model_type,id
0,N1,1.800,Box Culvert,LRP001a,Bridge 1,0.01130,A,117861,Dhaka (Jatrabari)-Comilla (Mainamati)-Chittago...,1.800,...,2.0,Dhaka,Dhaka,Narayanganj,Narayanganj-1,23.698739,90.458861,interpolate,bridge,566
1,N1,4.925,Box Culvert,LRP004b,Bridge 2,0.00660,A,117862,Dhaka (Jatrabari)-Comilla (Mainamati)-Chittago...,4.925,...,1.0,Dhaka,Dhaka,Narayanganj,Narayanganj-1,23.694664,90.487775,interpolate,bridge,567
2,N1,8.976,PC Girder Bridge,LRP008b,Bridge 3,0.39423,A,119889,Dhaka (Jatrabari)-Comilla (Mainamati)-Chittago...,8.976,...,,Dhaka,Dhaka,Narayanganj,Narayanganj-1,23.705060,90.523214,interpolate,bridge,568
3,N1,10.880,Box Culvert,LRP010b,Bridge 4,0.00630,A,112531,Dhaka (Jatrabari)-Comilla (Mainamati)-Chittago...,10.880,...,2.0,Dhaka,Dhaka,Narayanganj,Vitikandi,23.694391,90.537574,interpolate,bridge,569
4,N1,10.897,Box Culvert,LRP010c,Bridge 5,0.00630,A,112532,Dhaka (Jatrabari)-Comilla (Mainamati)-Chittago...,10.897,...,2.0,Dhaka,Dhaka,Narayanganj,Vitikandi,23.694302,90.537707,interpolate,bridge,570
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
14659,N1,220.725,RCC Girder Bridge,LRP223a,Bridge 267,0.02320,B,100611,Dhaka (Jatrabari)-Comilla (Mainamati)-Chittago...,220.725,...,3.0,Chittagong,Chittagong,Chittagong,Sitakunda,22.462439,91.728531,interpolate,bridge,832
14660,N1,225.702,PC Girder Bridge,LRP228a,Bridge 268,0.03510,B,100616,Dhaka (Jatrabari)-Comilla (Mainamati)-Chittago...,225.702,...,3.0,Chittagong,Chittagong,Chittagong,Sitakunda,22.422042,91.748021,interpolate,bridge,833
15617,N1,222.687,RCC Girder Bridge,LRP225a,Bridge 269,0.02630,C,100613,Dhaka (Jatrabari)-Comilla (Mainamati)-Chittago...,222.687,...,3.0,Chittagong,Chittagong,Chittagong,Sitakunda,22.446079,91.735367,interpolate,bridge,834
19366,N1,218.441,RCC Girder Bridge,LRP220b,Bridge 270,0.02433,D,121352,Dhaka (Jatrabari)-Comilla (Mainamati)-Chittago...,218.441,...,,Chittagong,Chittagong,Chittagong,Sitakunda,22.481164,91.720363,interpolate,bridge,835


In [32]:
bmms_n1 = bmms[bmms['road'] == 'N1'].copy()
bmms_n1 = bmms_n1[(bmms_n1['lon'] >= 90.44) & (bmms_n1['lon'] <= 91.851) &
                 (bmms_n1['lat'] >= 22.363) & (bmms_n1['lat'] <= 23.711)]
bmms_n1 

Unnamed: 0,road,km,type,LRPName,name,length,condition,structureNr,roadName,chainage,width,constructionYear,spans,zone,circle,division,sub-division,lat,lon,EstimatedLoc
0,N1,1.800,Box Culvert,LRP001a,.,11.30,A,117861,Dhaka (Jatrabari)-Comilla (Mainamati)-Chittago...,1.800,19.5,2005.0,2.0,Dhaka,Dhaka,Narayanganj,Narayanganj-1,23.698739,90.458861,interpolate
1,N1,4.925,Box Culvert,LRP004b,.,6.60,A,117862,Dhaka (Jatrabari)-Comilla (Mainamati)-Chittago...,4.925,35.4,2006.0,1.0,Dhaka,Dhaka,Narayanganj,Narayanganj-1,23.694664,90.487775,interpolate
2,N1,8.976,PC Girder Bridge,LRP008b,Kanch pur Bridge.,394.23,A,119889,Dhaka (Jatrabari)-Comilla (Mainamati)-Chittago...,8.976,,,,Dhaka,Dhaka,Narayanganj,Narayanganj-1,23.705060,90.523214,interpolate
3,N1,10.880,Box Culvert,LRP010b,NOYAPARA CULVERT,6.30,A,112531,Dhaka (Jatrabari)-Comilla (Mainamati)-Chittago...,10.880,12.2,1992.0,2.0,Dhaka,Dhaka,Narayanganj,Vitikandi,23.694391,90.537574,interpolate
4,N1,10.897,Box Culvert,LRP010c,ADUPUR CULVERT,6.30,A,112532,Dhaka (Jatrabari)-Comilla (Mainamati)-Chittago...,10.897,12.2,1984.0,2.0,Dhaka,Dhaka,Narayanganj,Vitikandi,23.694302,90.537707,interpolate
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
14659,N1,220.725,RCC Girder Bridge,LRP223a,SHLAKHAR PAD BRIDGE,23.20,B,100611,Dhaka (Jatrabari)-Comilla (Mainamati)-Chittago...,220.725,12.4,1986.0,3.0,Chittagong,Chittagong,Chittagong,Sitakunda,22.462439,91.728531,interpolate
14660,N1,225.702,PC Girder Bridge,LRP228a,DHAMAKHALI BRIDGE,35.10,B,100616,Dhaka (Jatrabari)-Comilla (Mainamati)-Chittago...,225.702,12.6,1990.0,3.0,Chittagong,Chittagong,Chittagong,Sitakunda,22.422042,91.748021,interpolate
15617,N1,222.687,RCC Girder Bridge,LRP225a,SALAKHARPAD BRIDGE,26.30,C,100613,Dhaka (Jatrabari)-Comilla (Mainamati)-Chittago...,222.687,12.7,1984.0,3.0,Chittagong,Chittagong,Chittagong,Sitakunda,22.446079,91.735367,interpolate
19366,N1,218.441,RCC Girder Bridge,LRP220b,Baro Aulia,24.33,D,121352,Dhaka (Jatrabari)-Comilla (Mainamati)-Chittago...,218.441,,,,Chittagong,Chittagong,Chittagong,Sitakunda,22.481164,91.720363,interpolate


In [43]:
bmms_n1 = bmms_n1[~bmms_n1['name'].str.contains(r"\(R\)", na=False)]
bmms_n1 = bmms_n1[~bmms_n1['name'].str.contains("right", case=False, na=False)]
bmms_n1 = bmms_n1[~bmms_n1['name'].str.contains("RIGHT", case=False, na=False)]
bmms_n1 = bmms_n1[~bmms_n1['name'].str.contains(r"\( R \)", na=False)]


In [44]:
duplicates_df = bmms_n1[bmms_n1.duplicated('km', keep=False)]

In [49]:
# Assuming duplicates_df is your DataFrame
# Convert 'condition' to a numerical value for averaging
condition_mapping = {'A': 1, 'B': 2, 'C': 3, 'D': 4, 'E': 5}
duplicates_df['condition_num'] = duplicates_df['condition'].map(condition_mapping)
duplicates_df

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  duplicates_df['condition_num'] = duplicates_df['condition'].map(condition_mapping)


Unnamed: 0,road,km,type,LRPName,name,length,condition,structureNr,roadName,chainage,...,constructionYear,spans,zone,circle,division,sub-division,lat,lon,EstimatedLoc,condition_num
2,N1,8.976,PC Girder Bridge,LRP008b,Kanch pur Bridge.,394.23,A,119889,Dhaka (Jatrabari)-Comilla (Mainamati)-Chittago...,8.976,...,,,Dhaka,Dhaka,Narayanganj,Narayanganj-1,23.705060,90.523214,interpolate,1
8,N1,12.660,PC Girder Bridge,LRP013a,Madanpur Bridge.(L),27.50,A,119897,Dhaka (Jatrabari)-Comilla (Mainamati)-Chittago...,12.660,...,,,Dhaka,Dhaka,Narayanganj,Vitikandi,23.685583,90.551208,interpolate,1
10,N1,12.660,PC Girder Bridge,LRP013a,MADANPUR BRIDGE(L),26.30,A,109838,Dhaka (Jatrabari)-Comilla (Mainamati)-Chittago...,12.660,...,2003.0,1.0,Dhaka,Dhaka,Narayanganj,Vitikandi,23.685583,90.551208,interpolate,1
15,N1,18.093,PC Girder Bridge,LRP018c,Sonakhal Bridge(L),25.50,A,119933,Dhaka (Jatrabari)-Comilla (Mainamati)-Chittago...,18.093,...,,,Dhaka,Dhaka,Narayanganj,Vitikandi,23.649754,90.584549,interpolate,1
17,N1,21.184,PC Girder Bridge,LRP021c,Marikhali Bridge(L),86.92,A,119944,Dhaka (Jatrabari)-Comilla (Mainamati)-Chittago...,21.184,...,,,Dhaka,Dhaka,Narayanganj,Vitikandi,23.628467,90.602302,interpolate,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
14648,N1,165.183,PC Girder Bridge,LRP166a,DHOOM GHAT PC GIRDER,220.60,B,101258,Dhaka (Jatrabari)-Comilla (Mainamati)-Chittago...,165.183,...,1985.0,7.0,Comilla,Noakhali,Feni,Feni-1,22.914965,91.526290,interpolate,2
14658,N1,218.441,RCC Girder Bridge,LRP220b,MUDDO SONAISORI BRIDGE,23.20,B,100608,Dhaka (Jatrabari)-Comilla (Mainamati)-Chittago...,218.441,...,1987.0,3.0,Chittagong,Chittagong,Chittagong,Sitakunda,22.481164,91.720363,interpolate,2
14659,N1,220.725,RCC Girder Bridge,LRP223a,SHLAKHAR PAD BRIDGE,23.20,B,100611,Dhaka (Jatrabari)-Comilla (Mainamati)-Chittago...,220.725,...,1986.0,3.0,Chittagong,Chittagong,Chittagong,Sitakunda,22.462439,91.728531,interpolate,2
19366,N1,218.441,RCC Girder Bridge,LRP220b,Baro Aulia,24.33,D,121352,Dhaka (Jatrabari)-Comilla (Mainamati)-Chittago...,218.441,...,,,Chittagong,Chittagong,Chittagong,Sitakunda,22.481164,91.720363,interpolate,4


In [45]:
duplicates_df.sort_values(by='km', ascending=False, inplace=True)
final_bmms = pd.DataFrame(columns = ['road', "id", 'model_type', 'name', 'length', 'condition', 'condition_num'])
rows_to_add = []
last_row = None
count = 0
sum_condition = 0
for index, row in duplicates_df.iterrows():
    if last_row is not None and last_row['km'] == row['km']:
        count += 1
        sum_condition += row['condition_num']
    else:
            rows_to_add.append({'road': last_row['road'], 'id': last_row['id'], 'model_type': last_row['model_type'],
                            'name': last_row['name'], 'length': last_row['length'], 'condition': last_row['condition'], 'condition_num': sum_condition/count})
           
    