In [1]:
import pandas as pd
import os
import math

In [2]:
Roads_df = pd.read_csv("../data/processed/test.csv")

In [3]:
Roads_df.head()

Unnamed: 0,road,id,model_type,condition,name,lat,lon,length,bridge_name,chainage
0,N1,1000000,sourcesink,,SoSi1,23.706028,90.443333,0.0,,0.0
1,N1,1000001,link,,,23.702917,90.450417,814.0,,0.814
2,N1,1000002,link,,,23.702778,90.450472,8.0,,0.822
3,N1,1000003,link,,,23.702139,90.451972,178.0,,1.0
4,N1,1000004,bridge,A,,23.698739,90.458861,11.3,bridge at id 1000004,1.8


In [4]:
# create two new columns, one for the ingoing traffic and one for the outgoing traffic

Roads_df['in'] = pd.Series()
Roads_df['out'] = pd.Series()

  Roads_df['in'] = pd.Series()
  Roads_df['out'] = pd.Series()


In [5]:
# For each sourcesink, find the closest point in the traffic data by comparing chainages.

for index, row in Roads_df.iterrows():
    if row['model_type'] == "sourcesink":
        path = '../data/raw/traffic/'+row['road']+'.traffic.htm'
        trafficdata = pd.read_html(path)[2]
        trafficdata = trafficdata.iloc[4:-3,[0,1,4,5,25]]
        trafficdata.columns = trafficdata.iloc[0]
        trafficdata = trafficdata[2:]
        trafficdata["Start location"] = trafficdata["Start location"].astype(float)*1000
        trafficdata['Traffic'] = trafficdata['Traffic'].astype(float)
        closestrow = trafficdata.iloc[[0]]
        closestdistance = 1000
        # For some html files, it can occur that traffic is saved separately for ingoing traffic and for outgoing traffic
        # If that is the case, we save those separately, otherwise, we divide the traffic equally over the 'in' and 'out' columns
        closestrowright = trafficdata.iloc[[0]]
        twowaydata = False        
        for i,r in trafficdata.iterrows():
            if abs(row['chainage']-r['Start location']) == closestdistance:
                closestrowright = r
                twowaydata = True
            elif abs(row['chainage']-r['Start location']) < closestdistance:
                closestrow = r
                closestdistance = abs(row['chainage']-r['Start location'])
        if twowaydata == True:
            Roads_df.iloc[index,10] = closestrowright['Traffic']
            Roads_df.iloc[index,11] = closestrow['Traffic']
        else:
            Roads_df.iloc[index,10] = closestrow['Traffic']/2
            Roads_df.iloc[index,11] = closestrow['Traffic']/2

In [6]:
# Show it worked

Roads_df.head(20)

Unnamed: 0,road,id,model_type,condition,name,lat,lon,length,bridge_name,chainage,in,out
0,N1,1000000,sourcesink,,SoSi1,23.706028,90.443333,0.0,,0.0,21537.0,19357.0
1,N1,1000001,link,,,23.702917,90.450417,814.0,,0.814,,
2,N1,1000002,link,,,23.702778,90.450472,8.0,,0.822,,
3,N1,1000003,link,,,23.702139,90.451972,178.0,,1.0,,
4,N1,1000004,bridge,A,,23.698739,90.458861,11.3,bridge at id 1000004,1.8,,
5,N1,1000005,link,,,23.697889,90.460583,1000.0,,2.0,,
6,N1,1000006,link,,,23.697361,90.461667,130.0,,2.13,,
7,N1,1000007,link,,,23.693833,90.469138,870.0,,3.0,,
8,N1,1000008,link,,,23.693611,90.478777,1000.0,,4.0,,
9,N1,1000009,link,,,23.693805,90.480527,175.0,,4.175,,


In [7]:
# Save the traffic as a fraction/probability of trucks generation

total_in = Roads_df['in'].sum()
total_out = Roads_df['out'].sum()
for index, row in Roads_df.iterrows():
    if row['model_type'] == "sourcesink":
        Roads_df.iloc[index,10] = row['in']/total_in
        Roads_df.iloc[index,11] = row['out']/total_out   

In [8]:
# Show it worked

print('sum of in column is', Roads_df['in'].sum())
print('sum of out column is', Roads_df['out'].sum())
Roads_df.loc[Roads_df['in'].notnull()] 

sum of in column is 1.0
sum of out column is 1.0


Unnamed: 0,road,id,model_type,condition,name,lat,lon,length,bridge_name,chainage,in,out
0,N1,1000000,sourcesink,,SoSi1,23.706028,90.443333,0.0,,0.0,0.285204,0.256335
1554,N1,1001554,sourcesink,,SoSi2,20.862917,92.298083,130.0,,462.254,0.256335,0.285204
1782,N102,1001782,sourcesink,,SoSi3,24.050611,91.114667,549.0,,82.682,0.064279,0.064279
1783,N104,1001783,sourcesink,,SoSi4,23.009667,91.399416,0.0,,0.0,0.06214,0.06214
1913,N104,1001913,sourcesink,,SoSi5,22.825749,91.101444,120.0,,49.63,0.06214,0.06214
2876,N2,1002876,sourcesink,,SoSi6,25.157056,92.017638,256.0,,286.516,0.163273,0.163273
2877,N204,1002877,sourcesink,,SoSi7,24.147916,91.346611,0.0,,0.0,0.017401,0.017401
3168,N207,1003168,sourcesink,,SoSi8,24.626333,91.677527,350.0,,67.985,0.035285,0.035285
3169,N208,1003169,sourcesink,,SoSi9,24.47136,91.765556,0.0,,0.0,0.053943,0.053943


In [9]:
# Drop the chainage column

Roads_df = Roads_df[["road", "id", "model_type", "condition", "name", "lat", "lon", "length", "bridge_name","in","out"]]

In [10]:
d = Roads_df.iloc[0,:]
Sparse_df = pd.DataFrame(data = d)
Sparse_df = Sparse_df.transpose()

In [11]:
# Merge al consecutive links together to one big link

chainage_build_up = 0
number_of_links = 0

for index, row in Roads_df.iterrows():
    if row["model_type"] == 'link':
        chainage_build_up += row['length']
        number_of_links += 1
    elif number_of_links > 0:
        if number_of_links == 1:
            Sparse_df = Sparse_df.append(Roads_df.iloc[index-1,:])
        else:
            Sparse_df = Sparse_df.append(Roads_df.iloc[index-math.floor(number_of_links/2)-1,:])
            Sparse_df.iloc[-1,7] = chainage_build_up
        chainage_build_up = 0
        number_of_links = 0
        Sparse_df = Sparse_df.append(row)
    elif index > 0:
        Sparse_df = Sparse_df.append(row)

In [12]:
# Show it worked

Sparse_df.head(10)

Unnamed: 0,road,id,model_type,condition,name,lat,lon,length,bridge_name,in,out
0,N1,1000000,sourcesink,,SoSi1,23.706,90.4433,0.0,,0.285204,0.256335
2,N1,1000002,link,,,23.7028,90.4505,1000.0,,,
4,N1,1000004,bridge,A,,23.6987,90.4589,11.3,bridge at id 1000004,,
7,N1,1000007,link,,,23.6938,90.4691,3175.0,,,
10,N1,1000010,bridge,A,,23.6947,90.4878,6.6,bridge at id 1000010,,
13,N1,1000013,link,,,23.6973,90.5078,3836.0,,,
17,N1,1000017,bridge,C,,23.7046,90.5188,418.0,KANCHPUR PC GIRDER BRIDGE,,
18,N1,1000018,link,,,23.705,90.5193,74.0,,,
19,N1,1001932,intersection,,,23.7853,90.5686,260.0,,,
20,N1,1000020,link,,,23.7025,90.5274,740.0,,,


In [13]:
Sparse_df = Sparse_df.reset_index()

In [14]:
# Save to csv 

Sparse_df.to_csv("../data/processed/test_input.csv", index=None)