In [1]:
%matplotlib inline

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib as mpl
import timeit

# path of bridge data for reading csv file (_roads2)
road_path     = 'D:\Jupyter_File\Advanced_Discrete_Cleaning\WBSIM_Lab2\infrastructure\_roads3.csv'

# path of bridge data for reading Excel file (BMMS_overview)
bridge_path     = 'D:\Jupyter_File\Advanced_Discrete_Cleaning\WBSIM_Lab2\infrastructure\BMMS_overview.xlsx'

# define variables
path  = 'D:\Jupyter_File\Advanced_Discrete_Cleaning\RMMS' # path to RMMS

rname = 'N2' # name of the road to load

## Preprocessing data from _roads and BMMS_overview data

In [2]:
# reading raw data
df_orig  = pd.read_csv(road_path,index_col=None, header=0)
bdf_orig = pd.read_excel(bridge_path, index_col=None, header=0)

# copy dataframe
df = df_orig.copy()
bdf = bdf_orig.copy()

# Display Number of data 
print('')
print('Total Road Data :' ,len(df),'Data')
print('')
print('Total Bridge Data :' ,len(bdf),'Data')


Total Road Data : 51348 Data

Total Bridge Data : 20415 Data


In [3]:
# Create dataframe for N1 roads with filtered longitude and lattitude Bounding box
df_selected  = df[(df['road'] == rname)].sort_values(by = 'chainage')
bdf_selected = bdf[(bdf['road'] == rname)].sort_values(by = 'chainage')

print('Total', rname ,'Roads data:',len(df_selected))
print('Total', rname ,'Bridges data:', len(bdf_selected))

Total N1 Roads data: 1339
Total N1 Bridges data: 787


In [4]:

df_selected['width'] = np.nan
df_selected['Length'] = np.nan
df_selected['Quality'] = np.nan
df_selected['LinkName'] = np.nan
df_selected['constructionYear'] = np.nan

# mark which data is from which source
df_selected['Data_source'] = 'road3'

bdf_selected['Data_source'] = 'BMMS'

# drop unnecessary column that is not needed in the construction of excel files
df_selected = df_selected.drop(['gap'], axis=1)
bdf_selected = bdf_selected.drop(['km', 'structureNr', 'spans', 
                                  'zone','circle','division',
                                  'sub-division', 'EstimatedLoc'], axis=1)

# rename some columns names
bdf_selected = bdf_selected.rename(index=str, columns={'LRPName': 'lrp' ,'condition':'Quality',
                                                       'length':'Length', 'roadName':'LinkName'})


In [5]:
# Combined roads and bridges data
df1_combined = pd.concat([df_selected, bdf_selected]).sort_values(by = 'chainage')

# drop duplicates
df1_combined = df1_combined.drop_duplicates(subset = ['lrp'], keep = 'first', inplace=False)
df1_combined = df1_combined.sort_values(by = 'chainage')

# preparing columns to be combined with html data later on
df1_combined = df1_combined.rename(index=str, columns={'chainage': 'ChainageS','lrp': 'LRPStart'})
df1_combined['OffsetStart'] = np.nan
df1_combined['LRPEnd'] = np.nan
df1_combined['OffsetEnd'] = np.nan
df1_combined['ChainageE'] = np.nan
df1_combined['Distance'] = np.nan
df1_combined['Heavy Truck'] = np.nan
df1_combined['Medium Truck'] = np.nan
df1_combined['Small Truck'] = np.nan
df1_combined['Large Bus'] = np.nan
df1_combined['Medium Bus'] = np.nan
df1_combined['Micro Bus'] = np.nan
df1_combined['Utility'] = np.nan
df1_combined['Car'] = np.nan
df1_combined['Auto Rickshaw'] = np.nan
df1_combined['Motor Cycle'] = np.nan
df1_combined['Bi-Cycle'] = np.nan
df1_combined['Cycle Rickshaw'] = np.nan
df1_combined['Cart'] = np.nan
df1_combined['Motorized'] = np.nan
df1_combined['Non Motorized'] = np.nan
df1_combined['AADT'] = np.nan
df1_combined['nrLanes'] = np.nan

# # Change the order of the dataframe columns name
# df1_combined = df1_combined[['road', 'lrp','chainage','lat','lon', 'width','Quality','Length',
#                              'constructionYear','point_type','type','LinkName','name']]

## Get data from RMMS html

In [6]:
# import traffic html file
htmlPath = path + '\\' + rname + ".traffic.htm"
rawHtml  = pd.read_html(htmlPath)[4]        # reading raw html

rawHtml.loc[2,0] = 'LinkNo'
rawHtml.loc[2,1] = 'LinkName'
rawHtml.loc[2,4] = 'ChainageS'
rawHtml.loc[2,7] = 'ChainageE'

tf         = rawHtml.loc[3:len(rawHtml),:]    # select only table information
tf.columns = rawHtml.loc[2,:]                 # change column name
tf = tf.reset_index(drop=True)
tf['width'] = np.nan
tf['nrLanes'] = np.nan
tf = tf.rename(index=str, columns={"(Km)": "Distance", "(AADT)": "AADT"})
tf = tf.drop('Total AADT', axis=1)
tf.columns.values[0] = 'road'
tf.columns.values[2] = 'LRPStart'
tf.columns.values[3] = 'OffsetStart'
tf.columns.values[5] = 'LRPEnd'
tf.columns.values[6] = 'OffsetEnd'

# import tab delimeted lanes file
texPath = path + '\\' + rname + '.widths.processed.txt'
ln      = pd.read_table(texPath)
ln = ln.rename(index=str, columns={"startChainage": "ChainageS", "endChainage": "ChainageE"})
ln.columns.values[0] = 'road'
#drop unecessary column
ln = ln.drop(['roadId'], axis=1)
#create blank columns name to be refilled by other set of data
ln['LinkName'] = np.nan
ln['LRPStart'] = np.nan
ln['OffsetStart'] = np.nan
ln['LRPEnd'] = np.nan
ln['OffsetEnd'] = np.nan
ln['Distance'] = np.nan
ln['Heavy Truck'] = np.nan
ln['Medium Truck'] = np.nan
ln['Small Truck'] = np.nan
ln['Large Bus'] = np.nan
ln['Medium Bus'] = np.nan
ln['Micro Bus'] = np.nan
ln['Utility'] = np.nan
ln['Car'] = np.nan
ln['Auto Rickshaw'] = np.nan
ln['Motor Cycle'] = np.nan
ln['Bi-Cycle'] = np.nan
ln['Cycle Rickshaw'] = np.nan
ln['Cart'] = np.nan
ln['Motorized'] = np.nan
ln['Non Motorized'] = np.nan
ln['AADT'] = np.nan

In [7]:
len(tf.columns)

27

In [8]:
# combine data road traffic with width.processed data
tf_combined  = pd.concat([tf, ln])
# some of the variables are "Strings object" this function convert data into numeric and sorting them by the end of Chainage
tf_combined = tf_combined.convert_objects(convert_numeric=True).sort_values(by = 'ChainageE')
# make the order of the columns the same as original traffic data
tf_combined = tf_combined[tf.columns]

  after removing the cwd from sys.path.


In [9]:
# divide left and right road segments
roadL = tf_combined[~(tf_combined['road'].str.endswith('R'))].reset_index(drop=True)
roadR = tf_combined[~(tf_combined['road'].str.endswith('L'))].reset_index(drop=True)

# add roadsegments and lane info
print(len(roadL))
print(len(roadR))

133
133


In [10]:
roadR.head(5)

Unnamed: 0,road,LinkName,LRPStart,OffsetStart,ChainageS,LRPEnd,OffsetEnd,ChainageE,Distance,Heavy Truck,...,Auto Rickshaw,Motor Cycle,Bi-Cycle,Cycle Rickshaw,Cart,Motorized,Non Motorized,AADT,width,nrLanes
0,N1,,,,0.0,,,0.13,,,...,,,,,,,,,29.7,8.0
1,N1,,,,0.13,,,0.4,,,...,,,,,,,,,14.0,4.0
2,N1-1R,Jatrabari - Int.with Z1101 (Left) (Right),LRPS,0.0,0.0,LRPS,822.0,0.822,0.822,660.0,...,2508.0,436.0,213.0,1088.0,0.0,20236.0,1301.0,21537.0,,
3,N1,,,,0.4,,,1.05,,,...,,,,,,,,,28.3,8.0
4,N1-2R,Int.with Z1101 - Signboard (Left) R111 (Right),LRPS,822.0,0.822,LRPS,4175.0,4.175,3.353,402.0,...,2980.0,398.0,232.0,889.0,0.0,18236.0,1121.0,19357.0,,


In [11]:
roadL.head(5)

Unnamed: 0,road,LinkName,LRPStart,OffsetStart,ChainageS,LRPEnd,OffsetEnd,ChainageE,Distance,Heavy Truck,...,Auto Rickshaw,Motor Cycle,Bi-Cycle,Cycle Rickshaw,Cart,Motorized,Non Motorized,AADT,width,nrLanes
0,N1,,,,0.0,,,0.13,,,...,,,,,,,,,29.7,8.0
1,N1,,,,0.13,,,0.4,,,...,,,,,,,,,14.0,4.0
2,N1-1L,Jatrabari - Int.with Z1101 (Left) (Left),LRPS,0.0,0.0,LRPS,822.0,0.822,0.822,402.0,...,2980.0,398.0,232.0,889.0,0.0,18236.0,1121.0,19357.0,,
3,N1,,,,0.4,,,1.05,,,...,,,,,,,,,28.3,8.0
4,N1-2L,Int.with Z1101 - Signboard (Left) R111 (Left),LRPS,822.0,0.822,LRPS,4175.0,4.175,3.353,660.0,...,2508.0,436.0,213.0,1088.0,0.0,20236.0,1301.0,21537.0,,


In [12]:
# change the starting point of the chainage to the last chainage 
#(function to match and connect the chainage of traffic data and width data)

for i in range(len(roadL)-1):
    substract = roadL['ChainageS'][i+1]-roadL['ChainageE'][i]
    if substract < 0:
        roadL['ChainageS'].iloc[i+1] = roadL['ChainageE'][i]
        
for i in range(len(roadR)-1):
    substract = roadR['ChainageS'][i+1]-roadR['ChainageE'][i]
    if substract < 0:
        roadR['ChainageS'].iloc[i+1] = roadR['ChainageE'][i]

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  self._setitem_with_indexer(indexer, value)


In [13]:
# filling the nan values with the next data available and then by its previous data
# (function to fill in traffic data into the width data)
roadL_filled = roadL.fillna(method='bfill').fillna(method='ffill')
roadR_filled = roadL.fillna(method='bfill').fillna(method='ffill')

In [14]:
roadR_filled.tail()

Unnamed: 0,road,LinkName,LRPStart,OffsetStart,ChainageS,LRPEnd,OffsetEnd,ChainageE,Distance,Heavy Truck,...,Auto Rickshaw,Motor Cycle,Bi-Cycle,Cycle Rickshaw,Cart,Motorized,Non Motorized,AADT,width,nrLanes
128,N1,Whykong Z1133 - Teknaf,LRP433,521.0,437.968,LRP467,130.0,444.511,33.346,24.0,...,2476.0,419.0,97.0,1042.0,0.0,4681.0,1139.0,5820.0,5.7,2.0
129,N1,Whykong Z1133 - Teknaf,LRP433,521.0,444.511,LRP467,130.0,451.519,33.346,24.0,...,2476.0,419.0,97.0,1042.0,0.0,4681.0,1139.0,5820.0,5.6,2.0
130,N1,Whykong Z1133 - Teknaf,LRP433,521.0,451.519,LRP467,130.0,459.326,33.346,24.0,...,2476.0,419.0,97.0,1042.0,0.0,4681.0,1139.0,5820.0,5.7,2.0
131,N1-69,Whykong Z1133 - Teknaf,LRP433,521.0,459.326,LRP467,130.0,462.254,33.346,24.0,...,2476.0,419.0,97.0,1042.0,0.0,4681.0,1139.0,5820.0,6.3,2.0
132,N1,Whykong Z1133 - Teknaf,LRP433,521.0,462.254,LRP467,130.0,462.254,33.346,24.0,...,2476.0,419.0,97.0,1042.0,0.0,4681.0,1139.0,5820.0,6.3,2.0


In [15]:
# #### THE CODE BELOW IS IN 'PSEUDO-CODE' IT IS NOT WORKING

# # Combining the lane info with the traffic info:

# # Create new dataframe
# LeftR = pd.dataframe()

# # add start chainage and endchainage
# leftR['StartChain']        = roadL['ChainageS']
# leftR['StartChain'].append = ln['startchainage']
# # repeat for Endchainage

# # order data frame on chainage (start or end)
# leftR.order('by':'Startchain')

# # remove all duplicates in chainage
# leftR.removeduplicates('StartChain')

# # reindex (important to prevent errors in .loc)

# # loop for every chainage and add the correct ADDT, lanes, etc.
# for i in range(len(leftR)):
    
#     # search the correct roadsegment for this chainage (endchainage<= endchain and startchain >=startchain)
#     leftR.loc[i,'lanes'] = ln.loc[idx_lanes,'lanes']
    
#     # repeat for traffic dataframe
#     lefR.loc[i,'ADDT'] = roadL.loc[idx_traffic,'ADDT']
    
#     # add amount of bridges between this start and end chainage
#     Search in BMMS for amount of bridges and quality
    

# #Notes: maybe it is possible to also create a dataframe with lat an lon in the same loop
# #       In loop search for a that particular point in roads data frame
# #       These lat and lon can be in a different dataframe because it contains points instead of links

## Combined all dataframe (BMMS, _roads3, traffic.html, width.processed) !not yet

In [16]:
# still need to add some additional blank columns to combine them together