<img src="logo.png" align="right" style="float" width="400">
<font color="#EF4123"><h1 align="right">Dude, where's my shared car?</h1></font>

**Jupyter 02 - Clima de Madrid**
--------------------------------
-----------------------------------------------------------------------
Cargamos datos del tiempo a nivel local en periodos de una semana o en las próximas 24 horas desde la web de AEMET OpenData. 

Previamente a cualquier consulta, se requiere solicitar una api-key personal a través del siguiente enlace: https://opendata.aemet.es/centrodedescargas/altaUsuario?. Por otra parte, los métodos de consulta están documentados en https://opendata.aemet.es/dist/index.html?.

Datos del Tiempo
---------------
Los métodos de consulta del API REST de AEMET se han implementado en el fichero 'functions_aemet.py'

In [1]:
import pandas as pd
import urllib.request, json 
import os

from functions_aemet import consulta_historico_todas_aemet
from functions_aemet import consulta_historico_estacion_aemet
from functions_aemet import consulta_diaria_aemet
from functions_aemet import consulta_horaria_aemet
from functions_aemet import consulta_historico_todas_aemet
from functions_aemet import consulta_historico_estacion_aemet
from functions_aemet import consulta_historico_anno_estacion_aemet

Configuraciones previas

In [2]:
plotear_en_notebook = True # en caso de False, en un html extern

Carga del api-key, que está almacenada en un CSV externo

In [3]:
# read api-key
apy_key = pd.read_csv('api-key/api-key.csv')
apikey = pd.DataFrame(apy_key, columns=['site','key']).ix[0,'key']
#print(apykey)

.ix is deprecated. Please use
.loc for label based indexing or
.iloc for positional indexing

See the documentation here:
http://pandas.pydata.org/pandas-docs/stable/indexing.html#ix-indexer-is-deprecated
  This is separate from the ipykernel package so we can avoid doing imports until


Carga de predicciones climáticas para las próximas horas

In [4]:
# configuraciones previas
localidad = "26036" #"26036", Calahorra; "28079", Madrid 
periodo = "horaria"  #"horaria"; "diaria", 1 semana
query_semana = ['uvMax','fecha','humedadRelativa.maxima','humedadRelativa.minima','sensTermica.minima','sensTermica.maxima','temperatura.maxima','temperatura.minima']
API = "prediccion/especifica/municipio"

In [5]:
datos_aemet_semana, fecha_consulta = consulta_diaria_aemet(apikey, localidad, query_semana, API)
#print(datos_aemet_semana)
#print(fecha_consulta)

prediccion_hoy_1h, prediccion_hoy_6h = consulta_horaria_aemet(apikey, localidad, API)
#print(prediccion_hoy_1h)
#print(prediccion_hoy_6h)

Carga del histórico (máximo de un mes)

In [6]:
API = "valores/climatologicos/diarios/datos"

#fechaini = fecha_consulta + "T00%3A00%3A00UTC"
fechaini = "2018-10-01T01%3A01%3A01UTC"
fechafin = "2018-11-01T01%3A01%3A01UTC"

#data = consulta_historico_todas_aemet(apikey, API, fechaini, fechafin)
#print(data)

In [7]:
API = "valores/climatologicos/diarios/datos"

# fechas
fechaini = "2018-10-01T01%3A01%3A01UTC"
#fechaini = fecha_consulta + "T00%3A00%3A00UTC"
fechafin = "2018-11-01T01%3A01%3A01UTC"

# estaciones
ciudad_universitaria = "3194U" 
aeropuerto = "3129" 
retiro = "3195" 
cuatro_vientos = "3196" 

historico_cu = consulta_historico_estacion_aemet(apikey, ciudad_universitaria, API, fechaini, fechafin)
#print(historico_cu)
historico_aero = consulta_historico_estacion_aemet(apikey, aeropuerto, API, fechaini, fechafin)
#print(historico_aero)
historico_retiro = consulta_historico_estacion_aemet(apikey, retiro, API, fechaini, fechafin)
#print(historico_retiro)
historico_4vientos = consulta_historico_estacion_aemet(apikey, cuatro_vientos, API, fechaini, fechafin)
#print(historico_4vientos)

Almacenamos el histórico de una estación en un año a especificar

*Nota:* en muchas ocasiones el API REST falla y se interrumpe la descarga

In [8]:
'''anno = "2018"
df = consulta_historico_anno_estacion_aemet(apikey, API,  retiro, anno)
columns = ['dir','fecha','horaPresMax','horaPresMin','horaracha','horatmax','horatmin','prec','presMax','presMin','racha','tmax','tmed','tmin','velmedia']
df = df[columns]
#print(df)

# almacenar fichero
filename = "Datos/climatologia/climatologia_madrid_" + anno + ".csv"
df.to_csv(filename, sep=';', encoding='utf-8', index=False)'''

'anno = "2018"\ndf = consulta_historico_anno_estacion_aemet(apikey, API,  retiro, anno)\ncolumns = [\'dir\',\'fecha\',\'horaPresMax\',\'horaPresMin\',\'horaracha\',\'horatmax\',\'horatmin\',\'prec\',\'presMax\',\'presMin\',\'racha\',\'tmax\',\'tmed\',\'tmin\',\'velmedia\']\ndf = df[columns]\n#print(df)\n\n# almacenar fichero\nfilename = "Datos/climatologia/climatologia_madrid_" + anno + ".csv"\ndf.to_csv(filename, sep=\';\', encoding=\'utf-8\', index=False)'

Almacenamos el histórico de una estación durante varios años. 

*Nota:* en ocasiones el API REST falla y se interrumpe la descarga; se aconseja utilizar el siguiente chunk para tener un fichero unificado con datos de la década.                                                                

In [9]:
'''annos = ["2010","2011","2012","2013","2014","2015","2016","2017","2018"]

df = pd.DataFrame()
for anno in annos:
    print(anno)
    df_aux = consulta_historico_anno_estacion_aemet(apikey, API,  retiro, anno)
    #columns = ['dir','fecha','horaPresMax','horaPresMin','horaracha','horatmax','horatmin','nombre','prec','presMax','presMin','racha','tmax','tmed','tmin','velmedia']
    #df_aux = df_aux[columns]
    df = df.append(df_aux, ignore_index=True)

# filtramos columnas
columns = ['dir','fecha','horaPresMax','horaPresMin','horaracha','horatmax','horatmin','nombre','prec','presMax','presMin','racha','tmax','tmed','tmin','velmedia']
df = df[columns]
    
# almacenar fichero
filename = "Datos/climatologia/climatologia_madrid.csv"
df.to_csv(filename, sep=';', encoding='utf-8', index=False)'''

'annos = ["2010","2011","2012","2013","2014","2015","2016","2017","2018"]\n\ndf = pd.DataFrame()\nfor anno in annos:\n    print(anno)\n    df_aux = consulta_historico_anno_estacion_aemet(apikey, API,  retiro, anno)\n    #columns = [\'dir\',\'fecha\',\'horaPresMax\',\'horaPresMin\',\'horaracha\',\'horatmax\',\'horatmin\',\'nombre\',\'prec\',\'presMax\',\'presMin\',\'racha\',\'tmax\',\'tmed\',\'tmin\',\'velmedia\']\n    #df_aux = df_aux[columns]\n    df = df.append(df_aux, ignore_index=True)\n\n# filtramos columnas\ncolumns = [\'dir\',\'fecha\',\'horaPresMax\',\'horaPresMin\',\'horaracha\',\'horatmax\',\'horatmin\',\'nombre\',\'prec\',\'presMax\',\'presMin\',\'racha\',\'tmax\',\'tmed\',\'tmin\',\'velmedia\']\ndf = df[columns]\n    \n# almacenar fichero\nfilename = "Datos/climatologia/climatologia_madrid.csv"\ndf.to_csv(filename, sep=\';\', encoding=\'utf-8\', index=False)'

Unimos los ficheros anuales en un único CSV

In [10]:
file_pattern = "climatologia_madrid_"
file_extension = "csv"
path = "./Datos/climatologia/"
filename = path + "climatologia_madrid.csv"

df_clima_madrid = pd.DataFrame()
print('Ficheros detectados:')
for file in os.listdir(path):
    if (file.endswith(file_extension) & file.startswith(file_pattern)):
        file_path = os.path.join(path, file)
        print(file_path)
        new_df = pd.read_csv(file_path, decimal = ',', sep=';')
        #df_clima_madrid = df_clima_madrid[columns]
        df_clima_madrid = df_clima_madrid.append(new_df)

#print('\nDataframe final:')
print(df_clima_madrid)
df_clima_madrid.to_csv(filename,  sep=';', encoding='utf-8', index=False)      

Ficheros detectados:
./Datos/climatologia/climatologia_madrid_1980.csv
./Datos/climatologia/climatologia_madrid_1981.csv
./Datos/climatologia/climatologia_madrid_1982.csv
./Datos/climatologia/climatologia_madrid_1983.csv
./Datos/climatologia/climatologia_madrid_1984.csv
./Datos/climatologia/climatologia_madrid_1985.csv
./Datos/climatologia/climatologia_madrid_1986.csv
./Datos/climatologia/climatologia_madrid_1987.csv
./Datos/climatologia/climatologia_madrid_1988.csv
./Datos/climatologia/climatologia_madrid_1989.csv
./Datos/climatologia/climatologia_madrid_1990.csv
./Datos/climatologia/climatologia_madrid_1991.csv
./Datos/climatologia/climatologia_madrid_1992.csv
./Datos/climatologia/climatologia_madrid_1993.csv
./Datos/climatologia/climatologia_madrid_1994.csv
./Datos/climatologia/climatologia_madrid_1995.csv
./Datos/climatologia/climatologia_madrid_1996.csv
./Datos/climatologia/climatologia_madrid_1997.csv
./Datos/climatologia/climatologia_madrid_1998.csv
./Datos/climatologia/climatol

Histórico: limpieza de la tabla. Genera, normaliza y elimina valores NaN

In [11]:
df_clima_madrid.loc[(df_clima_madrid['prec'] == "Ip") ] = "NaN"
df_clima_madrid.loc[(df_clima_madrid['horaPresMax'] == "Varias") ] = "NaN"
df_clima_madrid.loc[(df_clima_madrid['horaPresMin'] == "Varias") ] = "NaN"
df_clima_madrid.loc[(df_clima_madrid['horatmax'] == "Varias") ] = "NaN"
df_clima_madrid.loc[(df_clima_madrid['horatmin'] == "Varias") ] = "NaN"
df_clima_madrid.loc[(df_clima_madrid['horaracha'] == "Varias") ] = "NaN"

cols = ['presMax','presMin','racha','tmax','tmin','velmedia','dir','prec']
cols_hora = ['horaPresMax','horaPresMin']
df_clima_madrid[cols] = df_clima_madrid[cols].apply(lambda x: pd.to_numeric(x.astype(str).str.replace(',',''), errors='coerce'))
#df_clima_madrid[cols_hora] = df_clima_madrid[cols_hora].apply(lambda x: pd.to_numeric(x))

df_clima_madrid['fecha'] = pd.to_datetime(df_clima_madrid.fecha)
#df_clima_madrid = df_clima_madrid.set_index('fecha')

df_clima_madrid = df_clima_madrid.query('fecha != "NaT"')
#print(df_clima_madrid)

df_clima_madrid['prec'] = df_clima_madrid['prec']/10


In [12]:
df_clima_madrid['prec'] = df_clima_madrid['prec']/10

df_clima_madrid.tail()

Unnamed: 0,dir,fecha,horaPresMax,horaPresMin,horaracha,horatmax,horatmin,prec,presMax,presMin,racha,tmax,tmed,tmin,velmedia
346,30.0,2018-12-14,24,0,11:10,13:30,03:30,0.0,947.4,937.8,9.7,10.5,6.8,3.0,2.2
347,26.0,2018-12-15,10,24,18:40,14:50,06:30,0.02,949.5,945.5,7.2,9.0,5.8,2.7,1.4
348,28.0,2018-12-16,23,12,13:20,14:30,23:00,0.03,948.4,941.6,12.5,12.5,9.0,5.6,3.3
350,5.0,2018-12-18,0,24,01:20,21:10,08:30,0.09,950.3,944.0,7.8,8.0,5.2,2.5,0.6
355,6.0,2018-12-23,10,14,01:50,14:40,01:50,0.0,954.6,951.7,4.7,9.5,7.0,4.6,0.6


In [13]:
df_clima_madrid.describe()

Unnamed: 0,dir,prec,presMax,presMin,racha,tmax,tmin,velmedia
count,10459.0,10811.0,10734.0,10735.0,10459.0,10808.0,10808.0,10468.0
mean,16.867769,0.111973,943.318139,939.007136,8.977464,20.31423,10.271956,2.185862
std,12.114912,0.354865,5.724526,6.393408,3.490492,8.658993,6.522012,1.347841
min,0.0,0.0,916.9,905.8,0.0,0.1,-7.4,0.0
25%,5.0,0.0,939.9,935.6,6.4,12.8,5.0,1.4
50%,18.0,0.0,943.2,939.1,8.9,19.4,9.6,1.9
75%,25.0,0.0,946.8,942.9,11.1,27.9,15.8,2.8
max,99.0,5.02,967.3,962.3,28.1,40.0,25.7,12.5


Trabajamos con series temporales

In [14]:
series_tmax = pd.Series()
series_tmax = df_clima_madrid[['tmax']]
type(series_tmax)


pandas.core.frame.DataFrame

In [16]:
'''import statsmodels.api as sm  
from statsmodels.tsa.stattools import acf  
from statsmodels.tsa.stattools import pacf
from statsmodels.tsa.seasonal import seasonal_decompose
import matplotlib.pyplot as plt
import numpy as np

series_tmax = pd.Series()
series_tmax = df_clima_madrid[['tmax']]
#print(series_tmax)
#print(series_tmax.loc[:, series_tmax.isnull().any()])

#series_tmax.iloc[:,:].str.replace(',', '').astype(float)

#series_clima_madrid[['prec'].head()

print("aaa" )
#print(series_tmax.loc[:, series_tmax.isnull().any()])

#Realizamos la descomposicion en componentes
decomposition = seasonal_decompose(series_tmax, freq=365)  
fig = plt.figure()  
fig = decomposition.plot()  
fig.set_size_inches(15, 8)'''

'import statsmodels.api as sm  \nfrom statsmodels.tsa.stattools import acf  \nfrom statsmodels.tsa.stattools import pacf\nfrom statsmodels.tsa.seasonal import seasonal_decompose\nimport matplotlib.pyplot as plt\nimport numpy as np\n\nseries_tmax = pd.Series()\nseries_tmax = df_clima_madrid[[\'tmax\']]\n#print(series_tmax)\n#print(series_tmax.loc[:, series_tmax.isnull().any()])\n\n#series_tmax.iloc[:,:].str.replace(\',\', \'\').astype(float)\n\n#series_clima_madrid[[\'prec\'].head()\n\nprint("aaa" )\n#print(series_tmax.loc[:, series_tmax.isnull().any()])\n\n#Realizamos la descomposicion en componentes\ndecomposition = seasonal_decompose(series_tmax, freq=365)  \nfig = plt.figure()  \nfig = decomposition.plot()  \nfig.set_size_inches(15, 8)'

In [25]:
#series_clima_madrid.head()

In [26]:
#series_clima_madrid = series_clima_madrid.query('fecha != "NaT"')

In [27]:
#series_clima_madrid.head()

Bokeh
------

Usaremos el paquete *Bokeh* para tener representaciones interactivas

In [20]:
from bokeh.palettes import viridis,inferno,grey, cividis, plasma, magma
from bokeh.io import output_notebook, output_file, show, reset_output
from bokeh.models import GeoJSONDataSource, HoverTool, CategoricalColorMapper, LinearColorMapper, ColorBar, LinearAxis, Range1d, ColumnDataSource, formatters
from bokeh.models.widgets import CheckboxGroup
from bokeh.models.ranges import Range1d
from bokeh.models.formatters import DatetimeTickFormatter
from bokeh.plotting import figure, show
from bokeh.layouts import row

In [21]:
ds = ColumnDataSource(prediccion_hoy_1h)
#print(ds.data)

# create a plot and style its properties
prediccion_hoy_1 = figure(plot_width=500, plot_height=500)
prediccion_hoy_1.title.text = "Madrid: predicción para el " + fecha_consulta
prediccion_hoy_1.x_range = Range1d(7, 23, bounds=(0, None))
prediccion_hoy_1.y_range = Range1d(0, 20, bounds=(0, None))
prediccion_hoy_1.xaxis.axis_label = 'Hora'
prediccion_hoy_1.yaxis.axis_label = 'Temperatura'
glyph_1 = prediccion_hoy_1.line('periodo','temperatura',source=ds, legend='Temperatura', color='#dc3700', line_width=3, alpha = 0.8)
glyph_2 = prediccion_hoy_1.line('periodo','viento',source=ds, legend='Viento', color='#a5dc00', line_width=3, alpha = 0.8, y_range_name="extra_y")
prediccion_hoy_1.add_tools(HoverTool(renderers=[glyph_1], tooltips=[("Hora","@periodo"),("Temperatura","@temperatura")],mode='vline'))
prediccion_hoy_1.add_tools(HoverTool(renderers=[glyph_2], tooltips=[("Hora","@periodo"),("Viento","@viento")],mode='vline'))
prediccion_hoy_1.extra_y_ranges = {"extra_y": Range1d(start=0, end=60)}
prediccion_hoy_1.add_layout(LinearAxis(y_range_name="extra_y", axis_label='Viento'), 'right')

prediccion_hoy_2 = figure(plot_width=500, plot_height=500)
prediccion_hoy_2.title.text = "Madrid: predicción para el " + fecha_consulta
prediccion_hoy_2.x_range = Range1d(7, 23, bounds=(0, None))
prediccion_hoy_2.y_range = Range1d(0, 20, bounds=(0, None))
prediccion_hoy_2.xaxis.axis_label = 'Hora'
prediccion_hoy_2.yaxis.axis_label = 'Precipitacion'
#prediccion_hoy_2.add_layout(LinearAxis(y_range_name="foo2", axis_label='Humedad'), 'right')
glyph_3 = prediccion_hoy_2.line('periodo','precipitacion',source=ds, legend='Precipitacion', color='#00a5dc', line_width=3, alpha = 0.8)
glyph_4 = prediccion_hoy_2.line('periodo','humedad_relativa',source=ds, legend='Humedad', color='#00dc37', line_width=3, alpha = 0.8, y_range_name="extra_y")
prediccion_hoy_2.add_tools(HoverTool(renderers=[glyph_3], tooltips=[("Hora","@periodo"),("Precipitación","@precipitacion")],mode='vline'))
prediccion_hoy_2.add_tools(HoverTool(renderers=[glyph_4], tooltips=[("Hora","@periodo"),("Humedad relativa","@humedad_relativa")],mode='vline'))
prediccion_hoy_2.extra_y_ranges = {"extra_y": Range1d(start=0, end=100)}
prediccion_hoy_2.add_layout(LinearAxis(y_range_name="extra_y", axis_label='Humedad Relativa'), 'right')

# mostrar en notebook o en html
if (plotear_en_notebook):
    output_notebook()
else:
    reset_output()
    output_file("output/prediccion_el_tiempo.html")
    
show(row(prediccion_hoy_1, prediccion_hoy_2))

**Representa histórico 1980-2018**

In [22]:
color_mapperLinear = LinearColorMapper(viridis(256))
color_bar = ColorBar(color_mapper=color_mapperLinear,
                     location=(0, 0),
                     label_standoff=12)

In [23]:
df_clima_madrid.tail()

Unnamed: 0,dir,fecha,horaPresMax,horaPresMin,horaracha,horatmax,horatmin,prec,presMax,presMin,racha,tmax,tmed,tmin,velmedia
346,30.0,2018-12-14,24,0,11:10,13:30,03:30,0.0,947.4,937.8,9.7,10.5,6.8,3.0,2.2
347,26.0,2018-12-15,10,24,18:40,14:50,06:30,0.02,949.5,945.5,7.2,9.0,5.8,2.7,1.4
348,28.0,2018-12-16,23,12,13:20,14:30,23:00,0.03,948.4,941.6,12.5,12.5,9.0,5.6,3.3
350,5.0,2018-12-18,0,24,01:20,21:10,08:30,0.09,950.3,944.0,7.8,8.0,5.2,2.5,0.6
355,6.0,2018-12-23,10,14,01:50,14:40,01:50,0.0,954.6,951.7,4.7,9.5,7.0,4.6,0.6


In [24]:
#df_clima_madrid = pd.read_csv("Datos/climatologia/climatologia_madrid.csv")
#print(df_clima_madrid)

df_clima_madrid['fecha'] = pd.to_datetime(df_clima_madrid['fecha'])
ds = ColumnDataSource(df_clima_madrid)
#print(ds.data)

# create a plot and style its properties
clima_decada = figure(plot_width=1000, plot_height=500, x_axis_type="datetime", tools=['xwheel_zoom','pan'], active_scroll='xwheel_zoom')
clima_decada.title.text = "Madrid: histórico de climatología"
clima_decada.xaxis.axis_label = 'Fecha'
clima_decada.yaxis.axis_label = 'Temperatura'

clima_decada.xaxis.formatter=DatetimeTickFormatter(days=['%a %b %Y'], months=["%b %Y"], years=["%Y"])
                  
#glyph_1 = clima_decada.line('fecha','tmax',source=ds, legend='Temperatura Máxima', color='#dc3700', line_width=1, alpha = 0.8)
#glyph_2 = clima_decada.line('fecha','tmin',source=ds, legend='Temperatura Mínima', color='#a5dc00', line_width=1, alpha = 0.8)
#glyph_3 = clima_decada.line('fecha','tmed',source=ds, legend='Temperatura Media', color='#00dc37', line_width=1, alpha = 0.8)
#glyph_4 = clima_decada.line('fecha','prec',source=ds, legend='Precipitación', color='#00a5dc', line_width=1, alpha = 0.8)
glyph_4 = clima_decada.vbar(x='fecha',top='prec',width=0.9,bottom = 0.0, source=ds, legend='Precipitación', color='#00a5dc')


#clima_decada.add_tools(HoverTool(renderers=[glyph_1], tooltips=[("Temperatura","@tmax")],mode='vline'))
#clima_decada.add_tools(HoverTool(renderers=[glyph_2], tooltips=[("Temperatura","@tmin")],mode='vline'))
#clima_decada.add_tools(HoverTool(renderers=[glyph_3], tooltips=[("Temperatura","@tmed")],mode='vline'))
clima_decada.add_tools(HoverTool(renderers=[glyph_4], tooltips=[("Precipitacion","@prec")],mode='mouse'))


# mostrar en notebook o en html
if (plotear_en_notebook):
    output_notebook()
else:
    reset_output()
    output_file("output/clima_decada.html")
    
show(clima_decada)