In [82]:
from scipy.stats import gumbel_r

from fbprophet import Prophet
from fbprophet.diagnostics import cross_validation, performance_metrics
from fbprophet.plot import plot_cross_validation_metric

from sklearn.metrics import mean_absolute_error
import numpy as np


import os
import pandas as pd
from utils import (
    ESTACIONES_XLSX,
    NCEP_NCAR_XLSX,
    NDEFM_XLSX,
    PREDICTIONS_FOLDER,
    download_file,
    DATA_FOLDER
)

import keras

from geopy.distance import geodesic
def km_geodesic(x, y):
    return geodesic(x, y).km

In [83]:
df_maximos = pd.read_excel(ESTACIONES_XLSX, sheet_name='Maximos', header=1, parse_dates=['Año hid']).rename(columns=lambda x: x.replace(' ', ''))
df_totales = pd.read_excel(ESTACIONES_XLSX, sheet_name='Anuales', header=1, parse_dates=['Año hid']).rename(columns=lambda x: x.replace(' ', ''))
df_np95 = pd.read_excel(ESTACIONES_XLSX, sheet_name='Np95', header=1, parse_dates=['Año hid']).rename(columns=lambda x: x.replace(' ', ''))

In [84]:
df_info_estaciones = pd.read_excel(ESTACIONES_XLSX, sheet_name='INFO', header=0, usecols=['Estacion', 'LAT (S), LONG (W)(º)'])
df_info_estaciones[['lat', 'lon']] = df_info_estaciones["LAT (S), LONG (W)(º)"].str.split(',', expand=True).astype(float)
df_info_estaciones.drop(columns="LAT (S), LONG (W)(º)", inplace=True)

In [85]:
df_ndefm = pd.read_excel(NDEFM_XLSX, header=0, parse_dates=['año'])
df_ndefm = df_ndefm[df_ndefm.columns[:-1]]

In [86]:
#df = df_maximos.add_suffix('_max').merge(
#    df_totales.add_suffix('_tot'),
#    left_on='Añohid_max',
#    right_on='Añohid_tot',
#).merge(
#    df_np95.add_suffix('_np95'),
#    left_on='Añohid_max',
#    right_on='Añohid_np95',
#).merge(
#    df_ndefm,
#    left_on='Añohid_max',
#    right_on='año'
#).drop(columns=['año', 'Añohid_tot', 'Añohid_np95']).set_index('Añohid_max')

In [87]:
BACKDATA = 10
FWDDATA = 1

def split_series(x, back_data=10, forward_data=1):
    Xs = []
    ys = []
    i = 0
    while True:
        try:
            x[i+back_data+forward_data]
            X_i = x[i:i+back_data]
            y_i = x[i+back_data:i+back_data+forward_data]
            Xs.append(X_i)
            ys.append(y_i)
            i += 1
        except (IndexError, KeyError):
            break
    return Xs, ys

def split_frame(x, back_data=10, forward_data=1):
    Xs = []
    ys = []
    i = 0
    while True:
        try:
            x.loc[i+back_data+forward_data]
            X_i = x[i:i+back_data].drop(columns=['ds','y']).values
            y_i = x[i+back_data:i+back_data+forward_data].y.values
            Xs.append(X_i)
            ys.append(y_i)
            i += 1
        except (IndexError, KeyError):
            break
    return Xs, ys

In [88]:
df_prophet_max = df_maximos[['Añohid', 'SanLuisTucuman']].dropna().rename(columns={'Añohid':'ds', 'SanLuisTucuman':'y'})[1:]
m = Prophet(growth='linear',
            yearly_seasonality=True,
            weekly_seasonality=False,
            daily_seasonality=False,
            seasonality_mode='multiplicative',
)
m.fit(df_prophet_max)
future = m.make_future_dataframe(periods=10, freq='YS')
fcst = m.predict(future)

fcst = fcst.merge(df_prophet_max, left_on='ds', right_on='ds', how='left')

INFO:fbprophet:n_changepoints greater than number of observations. Using 24.


In [89]:
future, train = fcst[-10:], fcst[:-10]

In [111]:
Xs, Ys = split_frame(train)

In [122]:
in_l = keras.layers.Input(shape=Xs[0].shape)

#rs = keras.layers.Reshape((BACKDATA, 1))(in_l)

l_1 = keras.layers.Bidirectional(keras.layers.LSTM(200, activation='relu', dropout=0., return_sequences=True))(in_l)
l_1 = keras.layers.BatchNormalization()(l_1)

l_2 = keras.layers.Bidirectional(keras.layers.LSTM(200, activation='relu', dropout=0., return_sequences=True))(l_1)
l_2 = keras.layers.BatchNormalization()(l_2)

out = keras.layers.LSTM(1, activation='relu', dropout=0.)(l_2)

m = keras.models.Model(inputs=[in_l], outputs=[out])

m.compile(loss='mean_absolute_error', optimizer='adam')

In [123]:
h = m.fit(np.array(Xs), np.array(Ys), batch_size=4, epochs=250, validation_split=0.2, callbacks=[keras.callbacks.EarlyStopping(patience=10)])

Train on 16 samples, validate on 5 samples
Epoch 1/250
Epoch 2/250
Epoch 3/250
Epoch 4/250
Epoch 5/250
Epoch 6/250
Epoch 7/250
Epoch 8/250
Epoch 9/250
Epoch 10/250
Epoch 11/250
Epoch 12/250
Epoch 13/250
Epoch 14/250
Epoch 15/250
Epoch 16/250
Epoch 17/250
Epoch 18/250
Epoch 19/250
Epoch 20/250


In [124]:
forecasted_values = []

for i in range(10,0,-1):
    _z = fcst[-(i+BACKDATA):-(i)].drop(columns=['y', 'ds']).values[None,...]
    _zp = m.predict(_z)[0]
    forecasted_values.append(_zp)

fcst['y'] = np.append(fcst.y[:-10], forecasted_values)

In [125]:
fcst

Unnamed: 0,ds,trend,yhat_lower,yhat_upper,trend_lower,trend_upper,multiplicative_terms,multiplicative_terms_lower,multiplicative_terms_upper,yearly,yearly_lower,yearly_upper,additive_terms,additive_terms_lower,additive_terms_upper,yhat,y
0,1974-01-01,694.723305,58.995912,138.333536,694.723305,694.723305,-0.859917,-0.859917,-0.859917,-0.859917,-0.859917,-0.859917,0.0,0.0,0.0,97.31888,160.0
1,1975-01-01,689.658703,57.555595,131.696214,689.658703,689.658703,-0.861351,-0.861351,-0.861351,-0.861351,-0.861351,-0.861351,0.0,0.0,0.0,95.62027,110.0
2,1976-01-01,684.594101,66.092158,143.307414,684.594101,684.594101,-0.846397,-0.846397,-0.846397,-0.846397,-0.846397,-0.846397,0.0,0.0,0.0,105.155818,145.0
3,1977-01-01,679.515624,67.990132,145.159891,679.515624,679.515624,-0.842096,-0.842096,-0.842096,-0.842096,-0.842096,-0.842096,0.0,0.0,0.0,107.297912,96.0
4,1978-01-01,674.451022,54.184545,131.076575,674.451022,674.451022,-0.859917,-0.859917,-0.859917,-0.859917,-0.859917,-0.859917,0.0,0.0,0.0,94.479079,81.0
5,1979-01-01,669.386421,55.808404,135.309837,669.386421,669.386421,-0.861351,-0.861351,-0.861351,-0.861351,-0.861351,-0.861351,0.0,0.0,0.0,92.809545,76.0
6,1980-01-01,664.321819,62.638421,141.863905,664.321819,664.321819,-0.846397,-0.846397,-0.846397,-0.846397,-0.846397,-0.846397,0.0,0.0,0.0,102.041931,76.0
7,1981-01-01,659.243342,62.458517,142.356719,659.243342,659.243342,-0.842096,-0.842096,-0.842096,-0.842096,-0.842096,-0.842096,0.0,0.0,0.0,104.096847,107.0
8,1982-01-01,654.17874,54.148278,130.026682,654.17874,654.17874,-0.859917,-0.859917,-0.859917,-0.859917,-0.859917,-0.859917,0.0,0.0,0.0,91.639278,62.0
9,1983-01-01,649.114139,51.363026,129.469509,649.114139,649.114139,-0.861351,-0.861351,-0.861351,-0.861351,-0.861351,-0.861351,0.0,0.0,0.0,89.99882,122.0


In [126]:
pd.DataFrame(forecasted_values).to_csv(os.path.join(PREDICTIONS_FOLDER, 'lstm.csv'), index=True, header=False)