In [32]:
import pandas as pd
import math

In [33]:
file_name = '../data/demo-4.csv'
file_name_bmms = '../data/BMMS_overview.xlsx'
file_name_roads = '../data/_roads3.csv'
demo = pd.read_csv(file_name)
bmms = pd.read_excel(file_name_bmms)
roads = pd.read_csv(file_name_roads)

Roads required: N1 N106 N105 N102 N120 N104 N2 N204 N207 N208

In [34]:
def clean_file(roads, bmms, road_name):
    # Filter the 'roads' DataFrame for rows where the 'road' column is 'N1'
    n1_roads = roads[roads['road'] == road_name]
    # if road_name == 'N1':
    #     n1_roads = n1_roads[(n1_roads['lon'] <= 91.851) &
    #                      (n1_roads['lat'] >= 22.36)]
    
    # Generate an ID sequence starting from 1
    n1_roads['id'] = range(1, len(n1_roads) + 1)
    
    # Set model_type to 'link' for all rows
    n1_roads['model_type'] = 'link'
    
    # Generate 'name' as "Link" + id as string
    #n1_roads['name'] = ['Link ' + str(id) for id in n1_roads['id']]
    n1_roads['name'] = 'Link'
    # Calculate 'length' as difference between this row's 'chainage' and the next row's 'chainage'
    # Shift(-1) moves the chainage up by one row to subtract, fillna(0) to handle the last item
    n1_roads['length'] = (n1_roads['chainage'].shift(-1) - n1_roads['chainage']).fillna(0)
    
    # Selecting the columns needed for the empty DataFrame
    n1_roads_final = n1_roads[['road', 'id', 'model_type', 'name', 'lat', 'lon', 'length', 'chainage']]
    
    # Filter BMMS data for road 'N1'
    bmms_n1 = bmms[bmms['road'] == road_name].copy()
    # bmms_n1 = bmms_n1[(bmms_n1['lon'] <= 91.851) &
    #                  (bmms_n1['lat'] >= 22.363)]
    
    bmms_n1 = bmms_n1[~bmms_n1['name'].str.contains(r"\(R\)", na=False)]
    bmms_n1 = bmms_n1[~bmms_n1['name'].str.contains("right", case=False, na=False)]
    bmms_n1 = bmms_n1[~bmms_n1['name'].str.contains("RIGHT", case=False, na=False)]
    bmms_n1 = bmms_n1[~bmms_n1['name'].str.contains(r"\( R \)", na=False)]
    
    # Set up for new entries
    bmms_n1['model_type'] = 'bridge'
    #bmms_n1['name'] = ['Bridge ' + str(i+1) for i in range(bmms_n1.shape[0])]
    bmms_n1['id'] = range(n1_roads_final['id'].max() + 1, n1_roads_final['id'].max() + 1 + bmms_n1.shape[0])
    bmms_n1['chainage'] = bmms_n1['km']  # Use 'km' as 'chainage'
    bmms_n1['length'] = bmms_n1['length'] / 1000
    
    # Select and rename columns to match the format of n1_roads_final_with_chainage
    bmms_n1_formatted = bmms_n1[['road', 'id', 'model_type', 'name', 'lat', 'lon', 'chainage', 'length', 'condition']]
    
    # Combine the dataframes and sort by chainage
    combined_df = pd.concat([n1_roads_final, bmms_n1_formatted], ignore_index=True).sort_values(by='chainage')
    
    combined_df.iloc[0, 2] = 'sourcesink'
    combined_df.iloc[-1, combined_df.columns.get_loc('model_type')] = 'sourcesink'
    combined_df.reset_index(drop=True, inplace=True)
    count = 1
    for index, row in combined_df.iterrows():
        combined_df.iloc[index, 1] = count
        count += 1
    
    n1 = combined_df
    
    duplicates_df = bmms_n1[bmms_n1.duplicated('km', keep=False)]

    # Assuming duplicates_df is your DataFrame


    # Convert 'condition' to a numerical value for averaging
    condition_mapping = {'A': 1, 'B': 2, 'C': 3, 'D': 4, 'E': 5}
    n1['condition_num'] = n1['condition'].map(condition_mapping)
    
    #add together the links
    amount = 0
    condition_sum = 0
    rows_to_add = []  # List to accumulate rows
    last_row = None
    
    # Initialize the DataFrame
    n1_new = pd.DataFrame(columns=['road', "id", 'model_type', 'name', 'lon', 'lat', 'length', 'condition', 'condition_num'])
    for index, row in n1.iterrows():
        if last_row is not None:
            if row['model_type'] == 'bridge':
                amount += 1
                condition_sum += row['condition_num']
            if row['model_type'] != 'bridge' and last_row['model_type'] != 'bridge':
                rows_to_add.append({'road': row['road'], 'id': row['id'], 'model_type': row['model_type'],
                                    'name': row['name'],'lon': row['lon'], 'lat':row['lat'], 'length': row['length'], 'condition': row['condition'],
                                    'condition_num': row['condition_num']})
                amount = 0
                condition_sum = 0
            if row['model_type'] != 'bridge' and last_row['model_type'] == 'bridge':
                rows_to_add.append({'road': last_row['road'], 'id': last_row['id'], 'model_type': last_row['model_type'],
                                    'name': last_row['name'],'lon': row['lon'], 'lat':row['lat'], 'length': last_row['length'],
                                    'condition': last_row['condition'], 'condition_num': condition_sum / amount})
                amount = 0
                condition_sum = 0
                rows_to_add.append({'road': row['road'], 'id': row['id'], 'model_type': row['model_type'],
                                    'name': row['name'],'lon': row['lon'], 'lat':row['lat'], 'length': row['length'], 'condition': row['condition'],
                                    'condition_num': row['condition_num']})
        else:
            rows_to_add.append({'road': row['road'], 'id': row['id'], 'model_type': row['model_type'],
                                'name': row['name'],'lon': row['lon'], 'lat':row['lat'], 'length': row['length'], 'condition': row['condition'],
                                'condition_num': row['condition_num']})
        last_row = row
    
    n1_new = pd.concat([n1_new, pd.DataFrame(rows_to_add)], ignore_index=True)

    for index, row in n1_new.iterrows():
        if pd.isna(row['condition_num']) == False:
            n1_new.iloc[index, 8] = math.ceil(n1_new.iloc[index, 8])
    condition_mapping = {1: 'A', 2: 'B', 3: 'C', 4: 'D', 5: 'E'}
    n1_new['condition'] = n1_new['condition_num'].map(condition_mapping)
    # n1_new
    # n1_new['id'] = range(1, len(n1_new) + 1)
    # for index, row in n1_new.iterrows():
    #     if row['model_type'] != 'bridge':
    #         n1_new.iloc[index, 3] = row['model_type'] + ' ' + str(row['id'])
    #     else:
    #         n1_new.iloc[index, 3] = n1_new.iloc[index, 3] + ' ' + str(row['id'])
    # print(n1_new)
    return n1_new

In [35]:
all_roads = []
for road in ['N1', 'N105','N106', 'N102', 'N120', 'N104', 'N2', 'N204', 'N207', 'N208']:
    all_roads.append(clean_file(roads, bmms, road))
merged_dataframe = pd.concat(all_roads, ignore_index=True)
desired_order = ['road', 'id', 'model_type', 'condition', 'name', 'lat', 'lon', 'length']
merged_dataframe = merged_dataframe[desired_order]
count = 1
for index, row in merged_dataframe.iterrows():
    merged_dataframe.loc[index, 'id'] = str(count)
    count+=1
    if row.model_type == 'link':
        merged_dataframe.loc[index, 'name'] = 'link ' + merged_dataframe.loc[index, 'id']
    elif row.model_type == 'source':
        merged_dataframe.loc[index, 'name'] = 'source ' + merged_dataframe.loc[index, 'id']
    elif row.model_type == 'sink':
        merged_dataframe.loc[index, 'name'] = 'sink ' + merged_dataframe.loc[index, 'id']
    elif row.model_type == 'bridge':
        merged_dataframe.loc[index, 'name'] = merged_dataframe.loc[index, 'name'] + ' id ' + merged_dataframe.loc[index, 'id']


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  n1_roads['id'] = range(1, len(n1_roads) + 1)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  n1_roads['model_type'] = 'link'
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  n1_roads['name'] = 'Link'
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_inde

In [36]:
merged_dataframe

Unnamed: 0,road,id,model_type,condition,name,lat,lon,length
0,N1,1,sourcesink,,Link,23.706028,90.443333,0.8140
1,N1,2,link,,link 2,23.702917,90.450417,0.0080
2,N1,3,link,,link 3,23.702778,90.450472,0.1780
3,N1,4,link,,link 4,23.702139,90.451972,1.0000
4,N1,5,bridge,A,. id 5,23.697889,90.460583,0.0113
...,...,...,...,...,...,...,...,...
4757,N208,4758,link,,link 4758,24.861528,91.881860,0.0850
4758,N208,4759,link,,link 4759,24.861889,91.881027,1.0000
4759,N208,4760,link,,link 4760,24.869499,91.876027,0.9000
4760,N208,4761,bridge,B,KADAM TALI BOX CULVERT id 4761,24.877555,91.875360,0.0056


In [37]:
demo = pd.read_csv('../data/demo-4.csv')
demo

Unnamed: 0,road,id,model_type,condition,name,lat,lon,length
0,N1,1000000,sourcesink,,SoSi1,0,0,4
1,N1,1000001,link,,,1,1,1800
2,N1,1000002,bridge,A,,2,2,200
3,N1,1000003,link,,,3,3,1500
4,N1,1000004,bridge,A,,4,4,8
5,N1,1000005,link,,,5,5,2000
6,N1,1000006,intersection,,,6,6,20
7,N1,1000007,link,,,7,7,1400
8,N1,1000008,bridge,A,,8,8,4
9,N1,1000009,link,,,9,9,2500


In [38]:
for index, row_ss in merged_dataframe[merged_dataframe['model_type'] == 'sourcesink'].iterrows():
    closest_lon = 100
    closest_lat = 100
    closest_diff = 100
    closest_id = None
    closest_row = None
    for index2, row in merged_dataframe.iterrows():
        if row.road != row_ss.road and row.model_type != 'bridge':
            diff_lat = abs(row_ss.lat-row.lat)
            diff_lon = abs(row_ss.lon-row.lon)
            diff = diff_lat + diff_lon
            if diff < closest_diff:
                closest_lon = row.lon 
                closest_lat = row.lat
                closest_id = row.id
                closest_diff = diff
                closest_row = row
    if closest_diff < 0.02:
        merged_dataframe.loc[merged_dataframe['id'] == row_ss.id, 'model_type'] = 'intersection'
        merged_dataframe.loc[merged_dataframe['id'] == row_ss.id, 'name'] = f"intersection of {row_ss.road} with {closest_row.road}"
        merged_dataframe.loc[merged_dataframe['id'] == closest_id, 'model_type'] = 'intersection'
        merged_dataframe.loc[merged_dataframe['id'] == closest_id, 'name'] = f"intersection of {closest_row.road} with {row_ss.road}"
        print('\n Correct!\ndifference is:', closest_diff)
        print(f"intersection of {row_ss.road} with {closest_row.road}")
    else:
        print('\n NOPE!\ndifference is:', closest_diff)
        print(f"intersection of {row_ss.road} with {closest_row.road}")
    


 NOPE!
difference is: 0.0779996999999959
intersection of N1 with N2

 NOPE!
difference is: 1.8818604000000008
intersection of N1 with N106

 Correct!
difference is: 2.779999999802385e-05
intersection of N105 with N1

 NOPE!
difference is: 0.36861039999999434
intersection of N105 with N1

 NOPE!
difference is: 0.058194700000012034
intersection of N106 with N1

 NOPE!
difference is: 0.5611947999999991
intersection of N106 with N1

 Correct!
difference is: 2.779999999802385e-05
intersection of N102 with N1

 Correct!
difference is: 0.0004445000000039556
intersection of N102 with N2

 Correct!
difference is: 0.000888800000005574
intersection of N120 with N1

 Correct!
difference is: 0.0004166000000083159
intersection of N120 with N1

 Correct!
difference is: 0.013667000000005203
intersection of N104 with N1

 NOPE!
difference is: 0.45319439999999744
intersection of N104 with N1

 Correct!
difference is: 0.0002499000000000251
intersection of N2 with N1

 NOPE!
difference is: 0.421778499999

In [39]:
merged_dataframe[merged_dataframe['model_type'] == 'intersection']

Unnamed: 0,road,id,model_type,condition,name,lat,lon,length
19,N1,20,intersection,,intersection of N1 with N2,23.706083,90.521527,0.74
34,N1,35,intersection,,intersection of N1 with N105,23.690416,90.546583,0.588
243,N1,244,intersection,,intersection of N1 with N102,23.478972,91.118166,0.547
248,N1,249,intersection,,intersection of N1 with N120,23.476722,91.136444,1.0
293,N1,294,intersection,,intersection of N1 with N120,23.358444,91.253333,0.61
448,N1,449,intersection,,intersection of N1 with N104,22.996389,91.399805,0.085
1884,N105,1885,intersection,,intersection of N105 with N1,23.690416,90.546611,1.0
2332,N102,2333,intersection,,intersection of N102 with N1,23.478972,91.118194,0.329
2638,N102,2639,intersection,,intersection of N102 with N2,24.050611,91.114667,0.0
2639,N120,2640,intersection,,intersection of N120 with N1,23.477277,91.136111,0.281


In [40]:
merged_dataframe.to_csv('../data/merged_data.csv', index=False)

In [41]:
merged_dataframe

Unnamed: 0,road,id,model_type,condition,name,lat,lon,length
0,N1,1,sourcesink,,Link,23.706028,90.443333,0.8140
1,N1,2,link,,link 2,23.702917,90.450417,0.0080
2,N1,3,link,,link 3,23.702778,90.450472,0.1780
3,N1,4,link,,link 4,23.702139,90.451972,1.0000
4,N1,5,bridge,A,. id 5,23.697889,90.460583,0.0113
...,...,...,...,...,...,...,...,...
4757,N208,4758,link,,link 4758,24.861528,91.881860,0.0850
4758,N208,4759,link,,link 4759,24.861889,91.881027,1.0000
4759,N208,4760,link,,link 4760,24.869499,91.876027,0.9000
4760,N208,4761,bridge,B,KADAM TALI BOX CULVERT id 4761,24.877555,91.875360,0.0056
