In [102]:
import pandas as pd
import numpy as np
import math

## Bridges

In [103]:
bridges = pd.read_excel('../data/BMMS_overview.xlsx')

In [104]:
bridges = bridges[(bridges['road'].str.contains(pat = ('N1'), regex = False)) | (bridges['road'].str.contains(pat = ('N2'), regex = False))]

In [105]:
# Define bridge length bins and labels
length_bins = [0, 10, 50, 200, math.inf]
length_labels = ['S', 'M', 'L', 'XL']

# Categorize bridges per length class
bridges["length_class"] = pd.cut(bridges["length"], bins=length_bins, include_lowest=False, right=False, labels=length_labels)

In [106]:
# Keep only useful columns
keep_columns = ['road', 'lat', 'lon', 'length', 'condition', 'name', 'LRPName', 'chainage', 'km']
bridges = bridges[keep_columns]

In [107]:
# Filters out all variations of (R), (Right), (  R  ), etc.
patternDel = "\(\s*[a-zA-Z]*(R|r)[a-zA-Z]*\s*\)"
filter = bridges['name'].str.contains(patternDel)
bridges = bridges[~filter]

  return func(self, *args, **kwargs)


In [108]:
# Calculate distance between bridges
bridges1 = bridges.copy()
bridges1["dist"] = bridges1["km"].diff()

# Drop all bridges that are closer to the previous bridge then their own length
bridges = bridges.loc[bridges1["dist"]*1000 >= bridges["length"]]

In [109]:
bridges['model_type'] = 'bridge'

In [110]:
bridges.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 1763 entries, 1 to 19507
Data columns (total 10 columns):
 #   Column      Non-Null Count  Dtype  
---  ------      --------------  -----  
 0   road        1763 non-null   object 
 1   lat         1763 non-null   float64
 2   lon         1763 non-null   float64
 3   length      1763 non-null   float64
 4   condition   1763 non-null   object 
 5   name        1763 non-null   object 
 6   LRPName     1763 non-null   object 
 7   chainage    1763 non-null   float64
 8   km          1763 non-null   float64
 9   model_type  1763 non-null   object 
dtypes: float64(5), object(5)
memory usage: 151.5+ KB


## Roads

In [111]:
roads = pd.read_csv('../data/_roads3.csv')
roads = roads.rename(columns = {'lrp':'LRPName'})
roads = roads[["road", "chainage", "LRPName", "lat", "lon", "name"]]
length = (roads['chainage'] - roads['chainage'].shift(-1)).abs()
roads['length'] = (length * 1000)
roads.drop(roads.tail(1).index, inplace=True)
roads.reset_index()
roads['model_type'] = 'link'

In [112]:
roads = roads[(roads['road'].str.contains(pat = ('N1'), regex = False)) | (roads['road'].str.contains(pat = ('N2'), regex = False))]

In [113]:
short_roads = roads[(roads['length'] < 25) & (roads['road'] != 'N1') & (roads['road'] != 'N2')].index

In [114]:
roads = roads.drop(short_roads)

In [115]:
# Delete N1 and N2 from the list
roadslist = roads['road'].unique()
roadslist = np.delete(roadslist, 0)
roadslist = np.delete(roadslist, 17)
roadslist

array(['N101', 'N102', 'N103', 'N104', 'N105', 'N106', 'N107', 'N108',
       'N109', 'N110', 'N111', 'N112', 'N119', 'N120', 'N123', 'N128',
       'N129', 'N203', 'N204', 'N205', 'N206', 'N207', 'N208', 'N209',
       'N210'], dtype=object)

In [116]:
firstroad = []
for i in roadslist:
    firstroad.append(roads[roads['road'] == i].iloc[:1].index.values.tolist())

In [117]:
for y in firstroad:
    roads.at[y,'model_type'] = 'intersection'

In [118]:
lastroad = []
for z in roadslist:
    lastroad.append(roads[roads['road'] == z].iloc[-1:].index.values.tolist())

In [119]:
for w in lastroad:
    roads.at[w,'model_type'] = 'sourcesink'

In [120]:
roads.loc[(roads.road == 'N1') & (roads.LRPName == 'LRPS'), ['model_type']] = 'sourcesink'
roads.loc[(roads.road == 'N1') & (roads.LRPName == 'LRPE'), ['model_type']] = 'sourcesink'

In [121]:
roads.loc[(roads.road == 'N2') & (roads.LRPName == 'LRPS'), ['model_type']] = 'intersection'
roads.loc[(roads.road == 'N2') & (roads.LRPName == 'LRPE'), ['model_type']] = 'sourcesink'

In [122]:
Changelist = ['N109','N129','N123','N120','N102','N104','N207','N208','N206','N103','N210','N205','N107','N204','N103','N209', 'N205']
for i in Changelist:
    roads.loc[(roads.road == i) & (roads.LRPName == 'LRPE'), ['model_type']] = 'intersection'

In [123]:
roads.loc[(roads.road == 'N119') & (roads.LRPName == 'LRP002a'), ['model_type']] = 'intersection'
roads.loc[(roads.road == 'N104') & (roads.LRPName == 'LRPE'), ['model_type']] = 'sourcesink'
roads.loc[(roads.road == 'N107') & (roads.LRPName == 'LRPS'), ['model_type']] = 'sourcesink'

## Mergen

In [124]:
merged = pd.merge(roads, bridges, how = 'outer', on = ["road", "LRPName", "length", "lat", "lon", "model_type", "chainage", "name"])
merged = merged.sort_values(by = ['lat', 'lon'])
merged.reset_index()
merged.insert(0, 'id', range(0, len(merged)))
merged = merged[["id", "road", "name", "lat", "lon", "model_type", "chainage", "LRPName", "length", "condition"]]

# merged.to_csv("../data/A3_data_clean.csv", index = False)

In [125]:
intersection = merged[merged['model_type'] == 'intersection']

In [126]:
intersectlist = intersection[(intersection['name'].str.contains(pat = ('N1'), regex = False))].index
intersectlist

Int64Index([2099, 2098, 2089, 2017, 2016, 2130, 2267, 2259, 2253, 2252, 2234,
            2233, 1339, 2187, 1353, 2183, 1668, 2268, 1548, 1558],
           dtype='int64')

In [127]:
for y in intersectlist:
    intersection.at[y,'road'] = 'N1'

In [128]:
intersectlistN2 = intersection[(intersection['name'].str.contains(pat = ('N2'), regex = False))].index
intersectlistN2

Int64Index([3154, 3159, 3247, 3262, 3417, 3416, 3630, 3586, 3587, 3254, 3593,
            3253, 3261, 3594],
           dtype='int64')

In [129]:
for y in intersectlistN2:
    intersection.at[y,'road'] = 'N2'

In [130]:
intersection['road'][2176] = 'N1'
intersection['road'][1547] = 'N2'
intersection['road'][1559] = 'N1'
intersection['road'][1784] = 'N1'
intersection['road'][3418] = 'N207'
intersection['road'][1558] = 'N102'
intersection['road'][1548] = 'N102'
intersection['road'][3254] = 'N208'
intersection['road'][3631] = 'N208'
intersection['road'][3249] = 'N2'
intersection['road'][2187] = 'N1'
intersection['road'][1339] = 'N120'

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  intersection['road'][2176] = 'N1'
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  iloc._setitem_with_indexer(indexer, value, self.name)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  exec(code_obj, self.user_global_ns, self.user_ns)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  intersection['road'][1547] = '

In [131]:
full_merged = pd.merge(merged, intersection, how = 'outer', on = ["id", "road", "LRPName", "length", "lat", "lon", "model_type", "chainage", "name", "condition"])
full_merged = full_merged.sort_values(by = ['lat', 'lon'])
full_merged.reset_index()
full_merged = full_merged[["id", "road", "name", "lat", "lon", "model_type", "chainage", "LRPName", "length", "condition"]]

In [132]:
full_merged.to_csv("../data/A3_data_clean.csv", index = False)

In [None]:
roads.info()