In [1]:
# import data
import pandas as pd
import geopandas as gpd

# combined (spatial-temporal) dataset with N02, time_id, meterological & wind access feature
main = pd.read_csv('data/datasets/poll_metre_wind.csv').drop('Unnamed: 0', axis = 1)

# load street canyon (spatial only)
canyon = pd.read_csv('data/datasets/canyon.csv').drop(['Unnamed: 0', 'stattyp', 'geometry', 'border_values'], axis = 1)
canyon['id'] = canyon['id'].apply(lambda x : x.lower().replace(' ', '')[:5])
canyon = canyon[(canyon['id']!='mc014')&(canyon['id']!='mc085')]
main = pd.merge(main, canyon, on = 'id', how= 'outer') # bind

# greenery
greenery_gpd = gpd.read_file('data/green_volume/buffer_values/buffer_values.shp').drop(['geometry','stattyp'], axis = 1)
greenery_gpd['id'] = greenery_gpd['id'].apply(lambda x : x.lower().replace(' ', '')[:5])
greenery_gpd = greenery_gpd[(greenery_gpd['id']!='mc014')&(greenery_gpd['id']!='mc085')]
main = pd.merge(main, greenery_gpd, on = 'id', how= 'outer') # bind

# traffic
traffic_gpd = gpd.read_file('data/traffic/buffer_values/sites_traffic.shp').drop(['geometry', 'stattyp'], axis = 1)
traffic_gpd['id'] = traffic_gpd['id'].apply(lambda x : x.lower().replace(' ', '')[:5])
traffic_gpd = traffic_gpd[(traffic_gpd['id']!='mc014')&(traffic_gpd['id']!='mc085')]
main = pd.merge(main, traffic_gpd, on = 'id', how= 'outer')

# population
popu_gpd = gpd.read_file('data/population/buffer_radius/population_per_site.shp').drop(['geometry','stattyp' ], axis = 1)
popu_gpd['id'] = popu_gpd['id'].apply(lambda x : x.lower().replace(' ', '')[:5])
popu_gpd = popu_gpd[(popu_gpd['id']!='mc014')&(popu_gpd['id']!='mc085')]
main = pd.merge(main, popu_gpd, on = 'id', how= 'outer')

# time
date = pd.read_csv('data/datasets/date.csv').drop('Unnamed: 0', axis= 1)
main = pd.merge(main, date, on = 'MESS_DATUM', how= 'inner')

# leaf area index factor
lai = pd.read_csv("data/datasets/lai_factor.csv").drop('Unnamed: 0', axis= 1)
main = pd.merge(main, lai, on = 'MESS_DATUM', how= 'inner')

#distance city center


main.info()


<class 'pandas.core.frame.DataFrame'>
Int64Index: 140144 entries, 0 to 140143
Data columns (total 32 columns):
 #   Column              Non-Null Count   Dtype  
---  ------              --------------   -----  
 0   MESS_DATUM          140144 non-null  int64  
 1   id                  140144 non-null  object 
 2   Stickstoffdioxid    140144 non-null  float64
 3   prec_mm             140144 non-null  float64
 4   prec_bool           140144 non-null  float64
 5   humidity            140144 non-null  float64
 6   temp                140144 non-null  float64
 7   radiation           140144 non-null  float64
 8   wind_degree         140144 non-null  float64
 9   wind_speed          140144 non-null  float64
 10  air_pressure        140144 non-null  float64
 11  free_wind           140144 non-null  int64  
 12  prop_intercept_200  140144 non-null  float64
 13  prop_intercept_50   140144 non-null  float64
 14  GVI_25              140144 non-null  float64
 15  GVI_50              140144 non-nul

In [2]:
import geopandas 
from shapely.geometry import Point

sites = geopandas.read_file('data/monitoring_station/monitoring_station.shp')[['id', 'geometry']].to_crs('EPSG:4326')
print(f'Coordinate Reference System monitoring sites: {sites.crs}')

def distance_center(point1):
    '''calculate distance from point to centroid
    -------
    * longitude, latitude = coordinates of defined centroid
    * point1: long, lat array with shape: (2,)
    * RETURN: distance between points in km
    '''
    center_point = gpd.GeoDataFrame(index=[0], crs='EPSG:4326', geometry=[Point(13.40945,52.520803)])
    center_point = center_point.to_crs('EPSG:25833').geometry[0]

    point = gpd.GeoDataFrame(index=[0], crs='EPSG:4326', geometry= [point1])
    point = point.to_crs('EPSG:25833').geometry[0]

    return center_point.distance(point) # in km
# Assuming gdf is a GeoDataFrame with points needing latitude and longitude swapped
#sites['geometry'] = sites['geometry'].apply(lambda point: Point(point.y, point.x))
sites['distance_city'] = sites['geometry'].apply(lambda x: distance_center(x))
sites['id'] = sites['id'].apply(lambda x: x.replace(' ','').lower())
sites = sites.drop(['geometry'], axis = 1)
main = pd.merge(main, sites, on = 'id', how= 'outer')


Coordinate Reference System monitoring sites: EPSG:4326
