## encode traffic features

data source: Verkehrsmengen DTV 2019 (Umweltatlas)

features:

`tvi_x`: traffic volume index (tvi) for different radius, x = [25, 50, 75, 100, 200] normalized by radius

`prop_main_tvi_200`: proportion of heavy vehicles

`nearest_street_meter`: distance to nearest street

`nearest_intersect_meter`: distance to nearest intersection


In [1]:
# load data
import geopandas

sites = geopandas.read_file('data/monitoring_station/monitoring_station.shp')[['id','stattyp', 'geometry']]
traffic = geopandas.read_file('data/traffic/traffic_volume_dtv_2019/traffic_volume_dtv_2019.shp')

# 1.2 check reference system - no adjustment needed 
print(f'Coordinate Reference System monitoring sites: {sites.crs}')
print(f'Coordinate Reference System traffic count data: {traffic.crs}')

Coordinate Reference System monitoring sites: EPSG:25833
Coordinate Reference System traffic count data: EPSG:25833


In [8]:
# extract tvi and
import geopandas 
import matplotlib.pyplot as plt


def calc_tvi(coordinate, radius, id, plot = False):
    
    # only consider roads within defined radius
    buffer  = geopandas.GeoDataFrame(geometry= [coordinate.buffer(radius)], crs=traffic.crs) # create polygon with buffer region around monitoring point
    in_buffer_one = geopandas.overlay(traffic, buffer, how='intersection') # intersect linestring (traffic road) with radius polygon to reduce to inside the radius 
    
    # calculate traffic volume index
    in_buffer_one['new_length'] = in_buffer_one['geometry'].length # calculate new length (within radius) per street
    in_buffer_one['main_emitter'] = in_buffer_one['lkw']   + in_buffer_one['linienbuss'] +  in_buffer_one['reisebusse'] # count main emitter to calc prop.
    in_buffer_one['volume_x_len'] = in_buffer_one['new_length'] * in_buffer_one['dtv']
    in_buffer_one['relative_street_len'] = in_buffer_one['new_length']/ in_buffer_one['new_length'].sum()
    tvi = in_buffer_one['volume_x_len'].sum()/(10000 * radius) # normalized by range
    prop_main_emitter = (in_buffer_one['main_emitter']/ in_buffer_one['dtv'] * in_buffer_one['relative_street_len']).sum()
    
    if plot == True: 
        in_buffer_one['geometry'] = in_buffer_one.buffer(radius/40)
        ax = buffer.plot(alpha=0.1, edgecolor='black')
        in_buffer_one.plot(ax=ax, column='dtv', cmap='Reds', legend=True) #, vmin= 0, vmax = 10,)
        ax.set_title(f'{id} with radius of {radius}')
        plt.show() 
    
    return tvi, prop_main_emitter

for radius in [25,50,75,100,200]:
    sites[f'tvi_{radius}'] = sites.apply(lambda row: calc_tvi(coordinate=row['geometry'], radius=radius, id=row['id'], plot = False)[0], axis=1)

sites[f'prop_main_tvi_{200}'] = sites.apply(lambda row: calc_tvi(coordinate=row['geometry'], radius=200, id=row['id'], plot = False)[1], axis=1)
sites.head(4)

Unnamed: 0,id,stattyp,geometry,tvi_25,tvi_50,tvi_75,tvi_100,tvi_200,prop_main_tvi_200
0,MC 042,Wohngebiet,POINT (393459.020 5816635.250),0.0,0.0,0.0,0.0,0.989238,0.113111
1,MC 124,Verkehr,POINT (390406.146 5810991.699),7.356094,7.878518,7.976514,8.777857,10.390357,0.080445
2,MC 143,Verkehr,POINT (394135.247 5814178.610),2.272461,4.782303,5.207626,5.316963,5.356686,0.039749
3,MC 171,Wohngebiet,POINT (392699.560 5819341.461),0.0,0.0,0.377908,2.104888,6.336192,0.032525


###

In [None]:
# plot differing traffic volume index at sites and changing radius  
import geopandas 
import matplotlib.pyplot as plt 

for n in range(len(sites['id'])):
    name = sites['id'][n]
    
    radius = [25,50,75,100,200,300]
    coordinate = sites['geometry'][n]


    fig, axs = plt.subplots((len(radius)//2), 2, figsize=(9, 6), dpi=150)
    fig.suptitle(f'Traffic Volume Index inside radius at {name}:', fontsize =15)


    for n in range(len(radius)):

        buffer  = geopandas.GeoDataFrame(geometry= [coordinate.buffer(radius[n])], crs=traffic.crs) # create polygon with buffer region around monitoring point
        in_buffer_one = geopandas.overlay(traffic, buffer, how='intersection') # intersect linestring (traffic road) with radius polygon to reduce to inside the radius 
        # calculate traffic volume index
        in_buffer_one['new_length'] = in_buffer_one['geometry'].length # calculate new length (within radius) of street
        in_buffer_one['volume_x_len'] = in_buffer_one['new_length'] * in_buffer_one['dtv']
        tvi = round(in_buffer_one['volume_x_len'].sum()/ 100000)
        street_len = in_buffer_one['new_length'].sum()
    
        # plot different VVI over radius
        row = n//2
        column = 0 if (n+1)%2 != 0 else 1

        in_buffer_one['geometry'] = in_buffer_one.buffer(radius[n]/40)

        ax = buffer.plot(ax = axs[row, column], alpha=0.1, edgecolor='black')
        ax = in_buffer_one.plot(ax=axs[row, column], column='dtv', cmap='Reds', legend=True, vmin= 0, vmax = 90000,)
        
        axs[row, column].set_title(f'tvi= {tvi} @ r={radius[n]}m', {'fontsize':11})
        axs[row, column].set_xticklabels([])
        axs[row, column].set_yticklabels([])
        #in_buffer_one.plot(ax=ax, column='dtv', cmap='Reds', legend=True) #, vmin= 0, vmax = 10,)
        plt.tight_layout()
           
    plt.savefig(f"data/traffic/traffic_volume_dtv_2019/output/compare_{name}.png", dpi = 450)
    plt.show()

        

In [143]:
# calculate distance between monitoring site and nearest street + intersection 

from shapely import geometry 

# 1. extract intersection points (as beginning & end of each street linestring)
frist_coord = traffic['geometry'].apply(lambda x: geometry.Point(list(x.coords)[0]))
last_coord = traffic['geometry'].apply(lambda x: geometry.Point(list(x.coords)[-1]))
intersection_points = frist_coord.append(last_coord)

# 2. calculate nearest point / street intersection
def distance_nearest_intersection(point):
    return intersection_points.apply(lambda x: point.distance(x)).min()

# 3. calculate distance between monitoring site and nearest street
def distance_nearest_street(point):
    return traffic['geometry'].apply(lambda x: point.distance(x)).min()

# 4. Apply to all sites
sites['nearest_intersect_meter'] = sites['geometry'].apply(lambda x: distance_nearest_intersection(x))
sites['nearest_street_meter'] = sites['geometry'].apply(lambda x: distance_nearest_street(x))

sites[['id', 'nearest_street_meter', 'nearest_intersect_meter']].head(5)

  val = getattr(super(), mtd)(*args, **kwargs)


Unnamed: 0,id,nearest_street_meter,nearest_intersect_meter
0,MC 042,154.096745,155.00739
1,MC 124,10.254152,74.625936
2,MC 143,7.490619,33.830426
3,MC 171,74.313229,76.895299
4,MC 174,16.057956,36.321913


In [146]:
sites.to_file('data/traffic/buffer_values/sites_traffic.shp')

  sites.to_file('data/traffic/buffer_values/sites_traffic.shp')


### Encode Time Manually:

orientation on traffic behavior:
- holidays
- weekend vs. non weekend
- peak traffic hours (6 - 20)

In [1]:
# holidays
holiday_timestemps = []
holidays_berlin_2023 = ['0101', '0308', '0407', '0410', '0501', '0518', '0529', '1003', '1225', '1226']

for date in holidays_berlin_2023:
    for hour in range(0,24):
        if len(str(hour)) == 1:
            holiday_timestemps.append(int('2023'+ date + '0' + str(hour)))
        else:
            holiday_timestemps.append(int('2023'+ str(date) +  str(hour)))


In [3]:
# encode workday (boolean) & peak-hour (boolean) for each time step
import pandas as pd
from datetime import datetime

def return_if_weekend(date): 
    # date formate = {4}%Y{2}%m{3}%d{2}%h
    dt =datetime.strptime(str(date)[:8], '%Y%m%d').weekday()
    if dt >6:
        return True 
    elif int(date) in holiday_timestemps:
        return True
    else:
        return False

def return_if_rushhour(date):
    date = int(str(date)[-2:])
    return False if date > 20 or date < 6 else True
        
timesteps = pd.read_csv('data/weather/df_weather_cleaned.csv').filter(['MESS_DATUM'])
timesteps['weekend'] =timesteps['MESS_DATUM'].apply(lambda x:return_if_weekend(x))
timesteps['rushhour'] = timesteps['MESS_DATUM'].apply(lambda x: return_if_rushhour(x))
timesteps.to_csv('data/date/date.csv')
timesteps.head(5)


Unnamed: 0,MESS_DATUM,weekend,rushhour
0,2023010100,True,False
1,2023010101,True,False
2,2023010102,True,False
3,2023010103,True,False
4,2023010104,True,False


### encode population density
data source: fis broker

features:


`pop_x`: number of registered habitants in range x, x = [200, 500]

In [10]:
# read data
import geopandas as gpd

sites = gpd.read_file('data/monitoring_station/monitoring_station.shp')[['id','stattyp', 'geometry']]
popu = gpd.read_file('data/population/population.shp')[['ew2022', 'geometry']]
popu['original_area'] = popu['geometry'].area #add area site to 

# 1.2 check reference system - no adjustment needed 
print(f'Coordinate Reference System monitoring sites: {sites.crs}')
print(f'Coordinate Reference System green volume: {popu.crs}')

Coordinate Reference System monitoring sites: EPSG:25833
Coordinate Reference System green volume: EPSG:25833


In [11]:
# find total population per radius
def get_popu_per_radius(coordinate, radius):
    buffer = gpd.GeoDataFrame(geometry= [coordinate.buffer(radius)], crs= popu.crs) # build radius around sensing site
    in_buffer = gpd.overlay(popu, buffer, how='intersection') # intersect and keep only those polygons inside radius area
    in_buffer['area'] = in_buffer['geometry'].area # calculate area size of new polygons inside radius 
    in_buffer['weighted_pop'] = round(in_buffer['area']/ in_buffer['original_area'] *in_buffer['ew2022'])
    
    return in_buffer['weighted_pop'].sum()

sites['pop_200'] = sites['geometry'].apply(lambda x: get_popu_per_radius(coordinate = x, radius = 200) )
sites['pop_500'] = sites['geometry'].apply(lambda x: get_popu_per_radius(coordinate = x, radius = 500) )

sites.to_file('data/population/buffer_radius/population_per_site.shp')
sites.head(5)

Unnamed: 0,id,stattyp,geometry,pop_200,pop_500
0,MC 042,Wohngebiet,POINT (393459.020 5816635.250),2984.0,19158.0
1,MC 124,Verkehr,POINT (390406.146 5810991.699),702.0,6332.0
2,MC 143,Verkehr,POINT (394135.247 5814178.610),1995.0,12259.0
3,MC 171,Wohngebiet,POINT (392699.560 5819341.461),787.0,7917.0
4,MC 174,Verkehr,POINT (396182.715 5819313.198),3698.0,21292.0
