## encoding street canyons using berlins building heights

Main extracted features:
- ``prop_intercept_50``: proportion (of 360°) with intersection (building) within 50 meter radius
- ``prop_intercept_200``: -||- within 200 meter radius
- ``border_values``: return beginning and ending degree [1,360] of open window of no obstacle  (200 meter radius)
- `free_wind`: boolean if wind at each site and timestamps can access site
 
Encoding Steps:
1. read dataset of building heights and geometry
2. exclude buildings below the height of X meter 
3. Main Functions:

``object_intersect_scan``: create spatial echo with lines distributed in all directions and cute if intersected with building polygon

``create_border_values_open_direction``: return beginning and ending degree [1,360] of open window of no obstacle 

``calc_valley``: combines previous functions and plots each location

``no_building_in_wind_degree``: combines meterological df with data from create_border_values_open_direction


In [120]:
# 1 load data and select features
import geopandas

sites = geopandas.read_file('data/monitoring_station/monitoring_station.shp')[['id','stattyp', 'geometry']]
building = geopandas.read_file('data/buildings_height/buildings_height.shp')

# 1.2 check reference system - (no adjustment needed)
print(f'Coordinate Reference System monitoring sites: {sites.crs}')
print(f'Coordinate Reference System green volume: {building.crs}')

Coordinate Reference System monitoring sites: EPSG:25833


In [124]:
# define which building height (germ. Höhe) is considered # adjust parameter in reference to literature
relevant_height = 20
building1 = building[['hoehe','geometry']]
building1 = building1[building1['hoehe']> relevant_height] 

In [15]:
from shapely import geometry
import numpy

def object_intersect_scan(center_point, radius, intersect_polygon):
    '''
    create spatial echo with lines distributed in all directions and cute if intersected with building polygon
    --
    Input: 
        center_point: geometry - coordinate (geo point) of center 
        radius: int - radius 
        intersect_polygone: geometry of surroundings obstacle

    Return: 
        full_rays: Linestring (len=360) geometry
        distances: list (len= 360) - length of lines [0, radius]

    '''
    #create rays (linestring) in 360 degree starting at north, doing counterclockwise
    rays = [geometry.LineString([
            center_point, 
            geometry.Point(
                center_point.x + numpy.sin(-angle) * radius, 
                center_point.y + numpy.cos(-angle) * radius
            )]) 
            for angle in numpy.linspace(0, 2 * numpy.pi, 360, endpoint=False)]

    distances = [] # list hold distance between center and closest polygon (building) for all lines
    full_rays = [] # keep only lines which reach border

    for ray in rays:
        min_distance = numpy.inf
        for polygon in intersect_polygon:
            if ray.intersects(polygon):
                intersection_point = ray.intersection(polygon)
                distance = center_point.distance(intersection_point)
                if distance < min_distance:
                    min_distance = distance 

        # store the only linestrings which intersects with border (length radius)      
        if min_distance == numpy.inf: 
            full_rays.append(ray)
        
        distances.append(min_distance if min_distance != numpy.inf else 0)
      
    return full_rays, distances


In [103]:
def create_border_values_open_direction(distances, degree_threshold = 5):
    ''' 
    converts ordered sequence of distance length to nested list with beginning and ending degree of open window
    Input: distances - list of len(360) with length of 360 degree lines 
    Output: nested list of beginning and end degree of each window of no building    
    '''
    # only keep degrees with no building within range
    degrees = [360 - i for i, e in enumerate(distances) if e == 0] # locate degrees which reach border
    degrees.sort()

    # cluster by groups 
    wind_directions = [] 
    current_directions = [degrees[0]]

    for degree in degrees[1:]:
        #if len(current_directions) == 0:
        #    current_directions.append(degree)
        if degree -1 == current_directions[-1]:
            current_directions.append(degree)
        else:
            wind_directions.append(current_directions)
            current_directions = []
            current_directions.append(degree)

    wind_directions.append(current_directions)  

    # combine groups ending with 360 and beginning with 1 
    if (wind_directions[0][0] == 1) & (wind_directions[-1][-1] == 360):
        merged_degrees = wind_directions[-1] + wind_directions[0]
        if len(wind_directions) > 1:
            wind_directions.pop(0)
            wind_directions.pop(-1)
        wind_directions.append(merged_degrees)

    # drop directions with below defined degrees
    for wind_direction in wind_directions:
            if len(wind_direction) < degree_threshold:
                    wind_directions.remove(wind_direction)

    # create border_values, first and last value of sequence
    border_values = []

    for wind_direction in wind_directions:
        border_values.append([wind_direction[0], wind_direction[-1]])

    return border_values

In [97]:
import geopandas 
import matplotlib.pyplot as plt


def calc_valley(coordinate, radius, id = None,  plot = False):
    '''
    SPATIAL ECHO obstical detection
    ---
    Input:
        coordinate: geopoint (monitoring site)
        radius: int
        id: str- name of site
        plot: boolean 
    '''

    buffer  = geopandas.GeoDataFrame(geometry= [coordinate.buffer(radius)], crs=building1.crs) # create polygon with buffer region around monitoring point
    in_buffer_one = geopandas.overlay(building1, buffer, how='intersection') # intersect with radius polygon to reduce to inside the radius 
         
    # calculate proportion of intersection in 50 meter range on 360 rays
    prop_intersect_50 = round(1 - len(object_intersect_scan(center_point = coordinate, radius = 50, intersect_polygon = in_buffer_one['geometry'])[0])/360,3)

    # return linestring and distance from analyis
    full_rays, len_rays = object_intersect_scan(center_point = coordinate, radius = radius, intersect_polygon = in_buffer_one['geometry'])

    # calculate proportion of intersection in 200 meter range on 360 rays
    prop_intersect_radius = round(1- len(full_rays)/360,3)  
      
    # beginning and ending degree [1,360] of open window of no obstacle    
    border_values = create_border_values_open_direction(distances = len_rays, degree_threshold = 5)

    if plot == True: 
        
        print(f'proportion of buildings (360°) in r=50m: {prop_intersect_50}')
        print(f'proportion of buildings (360°) in r={radius}m: {prop_intersect_radius}')
        
        # create additional buffer zones for orientation & lines which reach border
        buffer_50  = geopandas.GeoDataFrame(geometry= [coordinate.buffer(50)], crs=building1.crs) 
        full_lines = geopandas.GeoDataFrame({'geometry': full_rays}, crs="EPSG:25833")
        
        ax = buffer.plot(alpha=0.1, edgecolor='black') # plot buffer radius outer
        buffer_50.plot(ax= ax, alpha=0.05, edgecolor='black') # plot buffer radius inner     
        full_lines.plot(ax=ax, alpha=0.1, edgecolor='purple') # plot lines which reach buffer border
        in_buffer_one.plot(ax=ax, column='hoehe', cmap='Blues', legend=True, vmin= 10, vmax = 30) # plot building + height
        ax.set_xticklabels([])
        ax.set_yticklabels([])
        plt.title(f'{id} with radius of {radius}', size = 15, loc = "left")
        plt.suptitle(f'prop. of buildings (360°): {prop_intersect_50} (r=50m), {prop_intersect_radius} (r={radius}m)', size = 10)
        plt.savefig(f"data/buildings_height/output/buffer_{radius}_{id}.png", dpi = 300) # save plot
        plt.show() 
    
    return prop_intersect_radius, prop_intersect_50, border_values 

# notebooks_feature_engineering
    

In [121]:
import pandas as pd

# create temporary data frame with extracted features  
temp_df = sites['geometry'].apply(lambda x: calc_valley(coordinate=x, radius=200)).apply(pd.Series)

# rename column and merge with data frame
temp_df.rename(columns={0: 'prop_intercept_200', 1: 'prop_intercept_50', 2: 'border_values'}, inplace=True)
sites = pd.concat([sites, temp_df[['prop_intercept_200', 'prop_intercept_50', 'border_values']]], axis=1)

# save new df
sites.to_csv('data/buildings_height/buffer_values/buffer_values.csv')
sites.head(5)

Unnamed: 0,id,stattyp,geometry,prop_intercept_200,prop_intercept_50,border_values
0,MC 042,Wohngebiet,POINT (393459.020 5816635.250),0.894,0.3,"[[129, 143], [215, 225], [230, 234]]"
1,MC 124,Verkehr,POINT (390406.146 5810991.699),0.172,0.0,"[[41, 105], [112, 140], [155, 288], [304, 12]]"
2,MC 143,Verkehr,POINT (394135.247 5814178.610),0.15,0.0,"[[65, 160], [171, 181], [191, 226], [257, 59]]"
3,MC 171,Wohngebiet,POINT (392699.560 5819341.461),0.531,0.442,"[[88, 123], [291, 62]]"
4,MC 174,Verkehr,POINT (396182.715 5819313.198),0.936,0.694,"[[91, 102], [280, 290]]"


In [21]:
import pandas as pd

sites = pd.read_csv('data/buildings_height/buffer_values/buffer_values.csv')

### combine meterological dataset with street valley analysis
source features: 
1. wind_degree from `df_weather_cleaned.csv`
2. boarder_values: angles without surrounding obstacle which allow for free wind access

In [22]:
# encode wind degree
import pandas as pd
import ast

# pollution data
pollution_pd = pd.read_csv('data/pollution_data/hourly/hourly_emission_impute.csv').drop('Unnamed: 0', axis = 1)

# weather
weather_df = pd.read_csv('data/weather/df_weather_cleaned.csv').drop('Unnamed: 0', axis= 1)

# bind them 
main_df = pd.merge(pollution_pd, weather_df, on = 'MESS_DATUM', how= 'inner')

# canyon & adjust id name
sites['id'] = sites['id'].apply(lambda x : x.lower().replace(' ', '')[:5])
sites = sites[(sites['id']!='mc014')&(sites['id']!='mc085')]
sites = sites[['id', 'border_values']]

In [23]:
def no_building_in_wind_degree(border_values, current_wind_degree):
    '''
    Check if no building in direction of wind degree
    ------------
    Input:
        border_values: nested-list with beginning and end of open degree window
        current_wind_degree: int - degree of wind
    Return: Boolean
    '''
   
    for open_gaps in border_values:
        if open_gaps[0] > open_gaps[1]:
            continues_open_degrees = ([n for n in range(open_gaps[0],361)] + [n for n in range(1, open_gaps[1])])
        else:
            continues_open_degrees = [n for n in range(open_gaps[0],open_gaps[1]+1)]

        if current_wind_degree in continues_open_degrees: # check for 3 degree besides angle
                    
            # to account for values [358,360] 
            upper = (current_wind_degree - 357) if current_wind_degree > 357 else current_wind_degree  
            # to account for values [1,3]
            lower = (current_wind_degree + 357) if current_wind_degree < 4 else current_wind_degree 
            
            if (upper +3 in continues_open_degrees) and (lower -3 in continues_open_degrees):
                return 1
            
    return 0
   

In [24]:
# check if free direction for wind
main_df['free_wind'] = main_df.apply(lambda row: no_building_in_wind_degree(
    border_values= ast.literal_eval(sites[sites['id'] == row['id']].iloc[0][1]),
    current_wind_degree=row['wind_degree']), axis=1)

  border_values= ast.literal_eval(sites[sites['id'] == row['id']].iloc[0][1]),


In [26]:
main_df.to_csv('data/datasets/poll_metre_wind.csv')