In [43]:
# Import modules and packages
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import datetime as dt
from datetime import datetime

from keras.callbacks import EarlyStopping, ReduceLROnPlateau, ModelCheckpoint, TensorBoard

%matplotlib inline

In [44]:
# Importing Training Set
dataset_train = pd.read_csv('DatosDiarios.csv')
dataset_train.fillna(method='ffill', inplace=True)
print(dataset_train.head(3))
#print(dataset_train.head())

# Select features (columns) to be involved intro training and predictions
cols = list(dataset_train)[1:6] #toma desde la segunda columna (índice = 1)

# Extract dates (will be used in visualization)
datelist_train = list(dataset_train['date']) #lo pasa a lista
datelist_train = [dt.datetime.strptime(date, '%Y-%m-%d').date() for date in datelist_train] #lo pasa a formato datatime.date

print('Training set shape == {}'.format(dataset_train.shape))
print('All timestamps == {}'.format(len(datelist_train)))
print('Featured selected: {}'.format(cols))
print(dataset_train.head())

         date       PM10      PM2.5         O3         NO        NO2  \
0  2018-01-01  16.708333   7.833333  10.500000  20.170833   6.775000   
1  2018-01-02  31.333333  13.708333   7.375000  42.754167  17.204167   
2  2018-01-03  60.583333  16.666667  10.916667  62.720833  22.862500   

         NOx  SO2    CO         PRS         RH      TOUT        SR        WSR  \
0  26.604167  5.6  0.92  725.358333  90.708333  2.910833  0.022583  11.775000   
1  59.037500  5.6  0.92  723.241667  95.541667  0.858750  0.016417   6.308333   
2  84.795833  5.6  0.92  723.762500  73.375000  4.810000  0.109875   6.875000   

         WDR     RAINF  
0  51.291667  0.001250  
1  75.250000  0.001667  
2  71.125000  0.000000  
Training set shape == (1333, 16)
All timestamps == 1333
Featured selected: ['PM10', 'PM2.5', 'O3', 'NO', 'NO2']
         date       PM10      PM2.5         O3         NO        NO2  \
0  2018-01-01  16.708333   7.833333  10.500000  20.170833   6.775000   
1  2018-01-02  31.333333  13.7

In [45]:
dataset_train = dataset_train[cols].astype(str)

for i in cols:
    for j in range(0, len(dataset_train)):
        dataset_train[i][j] = dataset_train[i][j].replace(',', '')

dataset_train = dataset_train.astype(float) #hacerlo todo floats

# Using multiple features (predictors)
training_set = dataset_train.values #hace un array con los valores de las columnas

print('Shape of training set == {}.'.format(training_set.shape))
training_set

Shape of training set == (1333, 5).


array([[16.70833333,  7.83333333, 10.5       , 20.17083333,  6.775     ],
       [31.33333333, 13.70833333,  7.375     , 42.75416667, 17.20416667],
       [60.58333333, 16.66666667, 10.91666667, 62.72083333, 22.8625    ],
       ...,
       [67.20833333, 13.16666667, 21.95833333,  5.99166667,  8.275     ],
       [63.75      , 16.91666667, 21.        ,  7.25833333,  8.20833333],
       [45.625     ,  9.54166667, 19.91666667,  6.77083333,  7.8       ]])

In [46]:
# Feature Scaling
from sklearn.preprocessing import StandardScaler

sc = StandardScaler()
training_set_scaled = sc.fit_transform(training_set)

sc_predict = StandardScaler()
sc_predict.fit_transform(training_set[:, 0:1])

array([[-1.7360261 ],
       [-1.19565518],
       [-0.11491335],
       ...,
       [ 0.12987006],
       [ 0.00209004],
       [-0.66760041]])

In [47]:
# Creating a data structure with 90 timestamps and 1 output
X_train = []
y_train = []

n_future = 7   # Number of days we want top predict into the future
n_past = 7     # Number of past days we want to use to predict the future

for i in range(n_past, len(training_set_scaled) - n_future +1):
    X_train.append(training_set_scaled[i - n_past:i, 0:dataset_train.shape[1] - 1])
    y_train.append(training_set_scaled[i + n_future - 1:i + n_future, 0])

X_train, y_train = np.array(X_train), np.array(y_train)

print('X_train shape == {}.'.format(X_train.shape))
print('y_train shape == {}.'.format(y_train.shape))

X_train shape == (1320, 7, 4).
y_train shape == (1320, 1).


In [48]:
# Import Libraries and packages from Keras
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import LSTM
from keras.layers import Dropout
from tensorflow.keras.optimizers import Adam

In [64]:
# Creating a data structure with 90 timestamps and 1 output
X_train = []
y_train = []

n_future = 7   # Number of days we want top predict into the future
n_past = 7     # Number of past days we want to use to predict the future

for i in range(n_past, len(training_set_scaled) - n_future +1):
    X_train.append(training_set_scaled[i - n_past:i, 0:dataset_train.shape[1] - 1])
    y_train.append(training_set_scaled[i + n_future - 1:i + n_future, 0])

X_train, y_train = np.array(X_train), np.array(y_train)

print('X_train shape == {}.'.format(X_train.shape))
print('y_train shape == {}.'.format(y_train.shape))




# Initializing the Neural Network based on LSTM
model = Sequential()

# Adding 1st LSTM layer
model.add(LSTM(units=64, return_sequences=True, input_shape=(n_past, dataset_train.shape[1]-1)))


# Adding 2nd LSTM layer
model.add(LSTM(units=20, return_sequences=False, activation='ReLu'))



# Adding Dropout
model.add(Dropout(0.55))

# Output layer
model.add(Dense(units=1, activation='linear'))


model.compile(optimizer = Adam(learning_rate=0.005), loss='mean_squared_error')
#es = EarlyStopping(monitor='accuracy', min_delta=1e-10, patience=10, verbose=1)
#rlr = ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=10, verbose=1)
#mcp = ModelCheckpoint(filepath='weights.h5', monitor='val_loss', verbose=1, save_best_only=True, save_weights_only=True)

#tb = TensorBoard('logs')

history = model.fit(X_train, y_train, shuffle=True, epochs=100, validation_split=0.2, verbose=1, batch_size=256)





# Generate list of sequence of days for predictions
datelist_future = pd.date_range(datelist_train[-1], periods=n_future, freq='1d').tolist() #crea lista de los siguientes días, indicado anteriormente
#print(datelist_future)

'''
Remeber, we have datelist_train from begining.
'''

# Convert Pandas Timestamp to Datetime object (for transformation) --> FUTURE
datelist_future_ = []
for this_timestamp in datelist_future:
    datelist_future_.append(this_timestamp.date())

#print(datelist_future_)



# Perform predictions
predictions_future = model.predict(X_train[-n_future:])

predictions_train = model.predict(X_train[n_past:])





# Inverse the predictions to original measurements

# ---> Special function: convert <datetime.date> to <Timestamp>
def datetime_to_timestamp(x):
    '''
        x : a given datetime value (datetime.date)
    '''
    return datetime.strptime(x.strftime('%Y%m%d'), '%Y%m%d')


y_pred_future = sc_predict.inverse_transform(predictions_future)
y_pred_train = sc_predict.inverse_transform(predictions_train)

PREDICTIONS_FUTURE = pd.DataFrame(y_pred_future, columns=['Precio/Litro']).set_index(pd.Series(datelist_future))
PREDICTION_TRAIN = pd.DataFrame(y_pred_train, columns=['Precio/Litro']).set_index(pd.Series(datelist_train[2 * n_past + n_future -1:]))

# Convert <datetime.date> to <Timestamp> for PREDCITION_TRAIN
PREDICTION_TRAIN.index = PREDICTION_TRAIN.index.to_series().apply(datetime_to_timestamp)

PREDICTION_TRAIN




# Set plot size 
from pylab import rcParams
rcParams['figure.figsize'] = 14, 5

# Plot parameters
START_DATE_FOR_PLOTTING = '2018'

plt.plot(dataset_train.loc[START_DATE_FOR_PLOTTING:].index+17400, dataset_train.loc[START_DATE_FOR_PLOTTING:]['PM10'], color='b', label='Actual Stock Price')
plt.plot(PREDICTIONS_FUTURE.index, PREDICTIONS_FUTURE['Precio/Litro'], color='r', label='Predicted Gas Price')
plt.plot(PREDICTION_TRAIN.loc[START_DATE_FOR_PLOTTING:].index, PREDICTION_TRAIN.loc[START_DATE_FOR_PLOTTING:]['Precio/Litro'], color='orange', label='Training predictions')

plt.axvline(x = min(PREDICTIONS_FUTURE.index), color='green', linewidth=2, linestyle='--')

plt.grid(which='major', color='#cccccc', alpha=0.5)

plt.legend(shadow=True)
plt.title('Predcitions Gas Prices', family='Arial', fontsize=12)
plt.xlabel('Timeline', family='Arial', fontsize=10)
plt.ylabel('Gas Price Value', family='Arial', fontsize=10)
plt.xticks(rotation=45, fontsize=8)
plt.show()


X_train shape == (1320, 7, 4).
y_train shape == (1320, 1).


ValueError: Unknown activation function: ReLu. Please ensure this object is passed to the `custom_objects` argument. See https://www.tensorflow.org/guide/keras/save_and_serialize#registering_the_custom_object for details.