In [2]:
import pandas as pd
import numpy as np

### 1. Open Weather Dataset

In [3]:
df_weather = pd.read_csv('../climate2019/meteo19.csv', delimiter = ";")
df_weather.head(3)

Unnamed: 0,PROVINCIA,MUNICIPIO,ESTACION,MAGNITUD,PUNTO_MUESTREO,ANO,MES,D01,V01,D02,...,D27,V27,D28,V28,D29,V29,D30,V30,D31,V31
0,28,79,102,81,28079102_81_98,2019,1,0.66,V,1.16,...,2.57,V,2.93,V,3.23,V,3.18,V,4.72,V
1,28,79,102,81,28079102_81_98,2019,2,4.32,V,2.98,...,1.36,V,0.97,V,0.0,N,0.0,N,0.0,N
2,28,79,102,81,28079102_81_98,2019,3,1.57,V,1.13,...,1.48,V,1.69,V,2.89,V,1.99,V,1.72,V


In [4]:
df_weather.dtypes

PROVINCIA           int64
MUNICIPIO           int64
ESTACION            int64
MAGNITUD            int64
PUNTO_MUESTREO     object
                   ...   
V29                object
D30               float64
V30                object
D31               float64
V31                object
Length: 69, dtype: object

### 2. Open Monitoring Stations Dataset

In [5]:
df_stations = pd.read_csv('../monitoring_stations/stations.csv', delimiter = ";", encoding = "ISO-8859-1")
# extract columns needed 
df_stations_locations = df_stations[['CÓDIGO_CORTO', 'LONGITUD', 'LATITUD']]
# df_stations_locations = df_stations_locations.drop(columns = [1])
stations_locations = df_stations_locations.loc[: , "CÓDIGO_CORTO":"LATITUD"]
stations = stations_locations.rename(columns={'CÓDIGO_CORTO':'station', 'LONGITUD':'longitude', 'LATITUD':'latitude'})
stations.head()

Unnamed: 0,station,longitude,latitude
0,4.0,-3.712257,40.423882
1,8.0,-3.682316,40.421553
2,16.0,-3.639242,40.440046
3,18.0,-3.731836,40.394782
4,24.0,-3.747345,40.419358


### 3. Extract Temperature

In [6]:
# extract magnitude 83 which corresponds to temperature
magnitude_83 = df_weather.loc[df_weather['MAGNITUD'] == 83]
# replace all zeros with nan (this is useful to calculate the mean later)
magnitude_83 = magnitude_83.replace(0, np.NaN)
magnitude_83.head(3)

Unnamed: 0,PROVINCIA,MUNICIPIO,ESTACION,MAGNITUD,PUNTO_MUESTREO,ANO,MES,D01,V01,D02,...,D27,V27,D28,V28,D29,V29,D30,V30,D31,V31
24,28,79,102,83,28079102_83_98,2019,1,4.8,V,4.2,...,5.2,V,5.7,V,4.9,V,3.9,V,6.5,V
25,28,79,102,83,28079102_83_98,2019,2,3.5,V,1.9,...,10.7,V,10.1,V,,N,,N,,N
26,28,79,102,83,28079102_83_98,2019,3,10.4,V,11.1,...,9.2,V,9.5,V,8.8,V,9.0,V,7.3,V


In [7]:
# remove validation columns (with V as first letter)
magnitude_83_noVColumns = magnitude_83.loc[:, ~magnitude_83.columns.str.startswith('V')]
# drop columns not needed 
magnitude_83_reduced = magnitude_83_noVColumns.drop(columns=['PROVINCIA', 'MUNICIPIO','PUNTO_MUESTREO','MES'])
magnitude_83_reduced.head(3)

Unnamed: 0,ESTACION,MAGNITUD,ANO,D01,D02,D03,D04,D05,D06,D07,...,D22,D23,D24,D25,D26,D27,D28,D29,D30,D31
24,102,83,2019,4.8,4.2,4.7,4.1,3.8,5.3,7.0,...,1.6,6.9,8.6,7.8,5.2,5.2,5.7,4.9,3.9,6.5
25,102,83,2019,3.5,1.9,1.7,2.4,6.0,6.7,6.7,...,9.0,8.9,10.4,10.3,10.5,10.7,10.1,,,
26,102,83,2019,10.4,11.1,10.3,8.6,8.4,7.0,4.9,...,8.3,10.6,11.9,11.7,9.4,9.2,9.5,8.8,9.0,7.3


In [8]:
# extract only days from the dataframe
days = magnitude_83_reduced.loc[: , "D01":"D31"]
# calculate new column with mean aggregating all columns extracted above
magnitude_83_reduced['temperature'] = days.mean(axis=1)
# drop all columns with day values (days)
magnitude_83_dropColumns = magnitude_83_reduced.drop(columns = days)
magnitude_83_dropColumns.head(3)

Unnamed: 0,ESTACION,MAGNITUD,ANO,temperature
24,102,83,2019,4.254839
25,102,83,2019,6.635714
26,102,83,2019,9.258065


In [9]:
temperature_2019 = magnitude_83_dropColumns.groupby('ESTACION') \
                .agg({'temperature':'mean'}) \
                .rename(columns={'temperature':'temperature_mean2019'}) \
                .reset_index()
              
temperature_2019 = temperature_2019.rename(columns={'ESTACION':'monitoring_station'})
temperature_2019

Unnamed: 0,monitoring_station,temperature_mean2019
0,4,16.112382
1,8,16.322819
2,18,16.78297
3,24,14.861672
4,35,16.940645
5,36,15.7074
6,38,16.31278
7,54,16.167682
8,56,16.456592
9,58,14.57736


In [31]:
temp_mean_2019 = temperature_2019.merge(stations, left_on='monitoring_station', right_on='station')
temp_mean_2019 = temp_mean_2019.drop(columns = ['station'])
temp_mean_2019 = temp_mean_2019.rename(columns={'monitoring_station': 'station','temperature_mean2019': 'temp'})
temp_mean_2019.head(30)

Unnamed: 0,station,temp,longitude,latitude
0,4,16.112382,-3.712257,40.423882
1,8,16.322819,-3.682316,40.421553
2,18,16.78297,-3.731836,40.394782
3,24,14.861672,-3.747345,40.419358
4,35,16.940645,-3.703166,40.419209
5,36,15.7074,-3.64531,40.407952
6,38,16.31278,-3.70713,40.445544
7,54,16.167682,-3.612139,40.373012
8,56,16.456592,-3.718768,40.385034
9,58,14.57736,-3.77461,40.51807


In [33]:
temp_mean_2019.to_csv('../climate2019/tempdata.csv', index=False)

### 4. Extract Precipitation

In [10]:
# extract magnitude 89 which corresponds to precipitation
magnitude_89 = df_weather.loc[df_weather['MAGNITUD'] == 89]
# replace all zeros with nan (this is useful to calculate the mean later)
magnitude_89 = magnitude_89.replace(0, np.NaN)
magnitude_89.head(3)

Unnamed: 0,PROVINCIA,MUNICIPIO,ESTACION,MAGNITUD,PUNTO_MUESTREO,ANO,MES,D01,V01,D02,...,D27,V27,D28,V28,D29,V29,D30,V30,D31,V31
72,28,79,102,89,28079102_89_98,2019,1,,V,,...,,V,,V,,V,3.7,V,2.1,V
73,28,79,102,89,28079102_89_98,2019,2,9.3,V,,...,,V,,V,,N,,N,,N
74,28,79,102,89,28079102_89_98,2019,3,,V,,...,,V,,V,,V,,V,1.0,V


In [13]:
# remove validation columns (with V as first letter)
magnitude_89_noVColumns = magnitude_89.loc[:, ~magnitude_89.columns.str.startswith('V')]
# drop columns not needed 
magnitude_89_reduced = magnitude_89_noVColumns.drop(columns=['PROVINCIA', 'MUNICIPIO','PUNTO_MUESTREO','MES'])
magnitude_89_reduced.head(3)

Unnamed: 0,ESTACION,MAGNITUD,ANO,D01,D02,D03,D04,D05,D06,D07,...,D22,D23,D24,D25,D26,D27,D28,D29,D30,D31
72,102,89,2019,,,,,,,,...,0.1,0.1,,,,,,,3.7,2.1
73,102,89,2019,9.3,,,,,,,...,,,,,,,,,,
74,102,89,2019,,,,,,9.7,,...,,,,,,,,,,1.0


In [14]:
# extract only days from the dataframe
days = magnitude_89_reduced.loc[: , "D01":"D31"]
# calculate new column with mean aggregating all columns extracted above
magnitude_89_reduced['precipitation'] = days.mean(axis=1)
# drop all columns with day values (days)
magnitude_89_dropColumns = magnitude_89_reduced.drop(columns = days)
magnitude_89_dropColumns.head(3)

Unnamed: 0,ESTACION,MAGNITUD,ANO,precipitation
72,102,89,2019,2.1
73,102,89,2019,9.3
74,102,89,2019,5.35


In [15]:
precipitation_2019 = magnitude_89_dropColumns.groupby('ESTACION') \
                .agg({'precipitation':'mean'}) \
                .rename(columns={'precipitation':'precipitation_mean2019'}) \
                .reset_index()
              
precipitation_2019 = precipitation_2019.rename(columns={'ESTACION':'monitoring_station'})
precipitation_2019

Unnamed: 0,monitoring_station,precipitation_mean2019
0,24,4.136833
1,39,9.560527
2,54,4.212363
3,56,4.46816
4,59,3.000949
5,102,4.787722
6,103,3.888507
7,104,6.14
8,106,3.695991
9,107,2.981637


In [16]:
prec_mean_2019 = precipitation_2019.merge(stations, left_on='monitoring_station', right_on='station')
prec_mean_2019 = prec_mean_2019.drop(columns = ['station'])
prec_mean_2019 = prec_mean_2019.rename(columns={'monitoring_station': 'station','precipitation_mean2019': 'rainfall'})
prec_mean_2019.head(30)

Unnamed: 0,station,rainfall,longitude,latitude
0,24,4.136833,-3.747345,40.419358
1,39,9.560527,-3.711536,40.478232
2,54,4.212363,-3.612139,40.373012
3,56,4.46816,-3.718768,40.385034
4,59,3.000949,-3.616341,40.460726
5,102,4.787722,-3.635637,40.399793
6,103,3.888507,-3.709525,40.350628
7,104,6.14,-3.679722,40.365833
8,106,3.695991,-3.74,40.442222
9,107,2.981637,-3.656667,40.462778


In [17]:
prec_mean_2019.to_csv('../climate2019/rainfalldata.csv', index=False)

### 5. Extract Solar Radiation

In [18]:
# extract magnitude 88 which corresponds to solar radiation
magnitude_88 = df_weather.loc[df_weather['MAGNITUD'] == 88]
# replace all zeros with nan (this is useful to calculate the mean later)
magnitude_88 = magnitude_88.replace(0, np.NaN)
# remove validation columns (with V as first letter)
magnitude_88_noVColumns = magnitude_88.loc[:, ~magnitude_88.columns.str.startswith('V')]
# drop columns not needed 
magnitude_88_reduced = magnitude_88_noVColumns.drop(columns=['PROVINCIA', 'MUNICIPIO','PUNTO_MUESTREO','MES'])
magnitude_88_reduced.head(3)

Unnamed: 0,ESTACION,MAGNITUD,ANO,D01,D02,D03,D04,D05,D06,D07,...,D22,D23,D24,D25,D26,D27,D28,D29,D30,D31
60,102,88,2019,107.0,108.0,109.0,107.0,108.0,111.0,110.0,...,66.0,114.0,65.0,134.0,128.0,129.0,131.0,72.0,117.0,73.0
61,102,88,2019,86.0,126.0,154.0,138.0,145.0,148.0,147.0,...,181.0,173.0,188.0,188.0,193.0,195.0,164.0,,,
62,102,88,2019,196.0,201.0,198.0,173.0,162.0,85.0,210.0,...,251.0,207.0,249.0,261.0,266.0,264.0,268.0,274.0,268.0,89.0


In [19]:
# extract only days from the dataframe
days = magnitude_88_reduced.loc[: , "D01":"D31"]
# calculate new column with mean aggregating all columns extracted above
magnitude_88_reduced['solar_radiation'] = days.mean(axis=1)
# drop all columns with day values (days)
magnitude_88_dropColumns = magnitude_88_reduced.drop(columns = days)
magnitude_88_dropColumns.head(3)

Unnamed: 0,ESTACION,MAGNITUD,ANO,solar_radiation
60,102,88,2019,103.741935
61,102,88,2019,158.75
62,102,88,2019,220.83871


In [20]:
solar_radiation_2019 = magnitude_88_dropColumns.groupby('ESTACION') \
                .agg({'solar_radiation':'mean'}) \
                .rename(columns={'solar_radiation':'solar_radiation_mean2019'}) \
                .reset_index()
              
solar_radiation_2019 = solar_radiation_2019.rename(columns={'ESTACION':'monitoring_station'})
solar_radiation_2019

Unnamed: 0,monitoring_station,solar_radiation_mean2019
0,24,201.690188
1,54,212.532405
2,59,194.933351
3,102,208.047625
4,103,211.051523
5,104,181.432565
6,106,197.378591
7,107,203.26791
8,108,197.732085


In [21]:
solar_mean_2019 = solar_radiation_2019.merge(stations, left_on='monitoring_station', right_on='station')
solar_mean_2019 = solar_mean_2019.drop(columns = ['station'])
solar_mean_2019 = solar_mean_2019.rename(columns={'monitoring_station': 'station','solar_radiation_mean2019': 'rainfall'})
solar_mean_2019.head(30)

Unnamed: 0,station,rainfall,longitude,latitude
0,24,201.690188,-3.747345,40.419358
1,54,212.532405,-3.612139,40.373012
2,59,194.933351,-3.616341,40.460726
3,102,208.047625,-3.635637,40.399793
4,103,211.051523,-3.709525,40.350628
5,104,181.432565,-3.679722,40.365833
6,106,197.378591,-3.74,40.442222
7,107,203.26791,-3.656667,40.462778
8,108,197.732085,-3.717881,40.476633


In [22]:
solar_mean_2019.to_csv('../climate2019/solardata.csv', index=False)