In [1]:
import pandas as pd
import numpy as np

### 1. Open Noise Pollution Dataset

In [4]:
df_noise = pd.read_csv('../noise2019/noise2019.csv', delimiter = ";")
df_noise.head()

Unnamed: 0,station,year,month,day,period,LAEQ,LAS01,LAS10,LAS50,LAS90,LAS99
0,2,2019,12,31,D,74.7,84.1,79.9,68.8,63.2,59.3
1,2,2019,12,31,E,77.1,85.7,82.4,67.8,61.5,58.8
2,2,2019,12,31,N,67.1,72.7,69.8,65.2,59.5,56.1
3,2,2019,12,31,T,74.0,84.4,78.0,67.3,61.0,57.0
4,3,2019,12,31,D,61.4,71.4,64.2,57.7,53.3,49.8


In [3]:
df_noise.dtypes

station      int64
year         int64
month        int64
day          int64
period      object
LAEQ       float64
LAS01      float64
LAS10      float64
LAS50      float64
LAS90      float64
LAS99      float64
dtype: object

### 2. Open Monitoring Stations Dataset

In [8]:
df_stations = pd.read_csv('../stations/stations.csv', delimiter = ";", encoding = "ISO-8859-1")
df_stations.head(3)

Unnamed: 0,Nº,Nombre,COD_VIA,VIA_CLASE,VIA_PAR,VIA_NOMBRE,Dirección,Longitud_gms,Latitud_gms,LATITUD_ED50,LONGITUD_ED50,Alt.(m),Fecha alta,Coordenada_X_ETRS89,Coordenada_Y_ETRS89,LONGITUD_WGS84,LATITUD_WGS84
0,1,Pº Recoletos,633005,PASEO,DE,RECOLETOS,Frente calle Almirante,3º41'27'' Oº,40º25'24'' N,40.423333,-3.690833,648,40609,4413028676,4474895436,-3.691926,40.42262
1,2,Carlos V,264800,PLAZA,DEL,EMPERADOR CARLOS V,Pza del Emperador Carlos V/ Infanta Isabel,3º41'25 Oº,"40º24'36"" N",40.41,-3.690278,629,36130,441328186,4473395505,-3.69149,40.409109
2,3,Plaza del Carmen,145800,PLAZA,DEL,CARMEN,Plaza del Carmen,"3º42'17"" Oº","40º25'16"" N",40.421111,-3.704722,657,36465,4403463619,4474524357,-3.703166,40.419209


In [9]:
# extract columns needed 
df_stations_locations = df_stations[['Nº', 'LONGITUD_WGS84', 'LATITUD_WGS84']]
df_stations_locations.head()

Unnamed: 0,Nº,LONGITUD_WGS84,LATITUD_WGS84
0,1,-3.691926,40.42262
1,2,-3.69149,40.409109
2,3,-3.703166,40.419209
3,4,-3.712257,40.423882
4,5,-3.711536,40.478232


In [23]:
# rename columns 
stations = df_stations_locations.rename(columns={'Nº':'monitoring_station', 'LONGITUD_WGS84':'longitude', 'LATITUD_WGS84':'latitude'})
stations.head()

Unnamed: 0,monitoring_station,longitude,latitude
0,1,-3.691926,40.42262
1,2,-3.69149,40.409109
2,3,-3.703166,40.419209
3,4,-3.712257,40.423882
4,5,-3.711536,40.478232


### 3. Extract Noise Pollution

In [15]:
laeq_onlyT = df_noise.loc[df_noise['period'] == 'T']
# replace all zeros with nan (this is useful to calculate the mean later)
laeq_onlyT = laeq_onlyT.replace(0, np.NaN)
laeq_onlyT.head()

Unnamed: 0,station,year,month,day,period,LAEQ,LAS01,LAS10,LAS50,LAS90,LAS99
3,2,2019,12,31,T,74.0,84.4,78.0,67.3,61.0,57.0
7,3,2019,12,31,T,61.0,71.0,63.8,57.1,52.4,47.3
11,5,2019,12,31,T,59.9,66.9,62.5,57.2,45.7,41.6
15,6,2019,12,31,T,69.0,76.4,72.8,66.0,59.0,51.3
19,8,2019,12,31,T,64.5,71.9,67.7,62.2,55.3,47.8


In [20]:
# drop columns not needed 
laeq_onlyT_reduced = laeq_onlyT.drop(columns=['year', 'month', 'day', 'period', 'LAS01', 'LAS10','LAS50','LAS90','LAS99'])
laeq_onlyT_reduced.head()

Unnamed: 0,station,LAEQ
3,2,74.0
7,3,61.0
11,5,59.9
15,6,69.0
19,8,64.5


In [22]:
noise_2019 = laeq_onlyT_reduced.groupby('station') \
                .agg({'LAEQ':'mean'}) \
                .rename(columns={'LAEQ':'noise_pollution_mean2019'}) \
                .reset_index()

noise_2019.head()

Unnamed: 0,station,noise_pollution_mean2019
0,1,67.019697
1,2,68.774576
2,3,62.006268
3,4,63.18284
4,5,60.166854


In [29]:
noise_mean_2019 = noise_2019.merge(stations, left_on='station', right_on='monitoring_station')
noise_mean_2019 = noise_mean_2019.drop(columns = ['station'])
noise_mean_2019 = noise_mean_2019.rename(columns={'monitoring_station': 'station','noise_pollution_mean2019': 'noise'})
noise_mean_2019.head(30)

Unnamed: 0,noise,station,longitude,latitude
0,67.019697,1,-3.691926,40.42262
1,68.774576,2,-3.69149,40.409109
2,62.006268,3,-3.703166,40.419209
3,63.18284,4,-3.712257,40.423882
4,60.166854,5,-3.711536,40.478232
5,69.953314,6,-3.690785,40.437568
6,65.741617,8,-3.682316,40.421553
7,63.996726,10,-3.70713,40.445544
8,66.732394,11,-3.677349,40.451473
9,62.774085,12,-3.668571,40.428811


In [30]:
noise_mean_2019.to_csv('../noise2019/noisedata.csv', index=False)