## Predicción de Series Temporales LSTM - Embeddings

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pylab as plt
%matplotlib inline
plt.rcParams['figure.figsize'] = (16, 9)
plt.style.use('fast')

from keras.models import Sequential
from keras.layers import Dense,Activation,Flatten
from sklearn.preprocessing import MinMaxScaler

from keras.layers import Input, Embedding, Dense, Flatten, Dropout, concatenate, LSTM
from keras.layers import BatchNormalization, SpatialDropout1D
from keras.callbacks import Callback
from keras.models import Model
from keras.optimizers import Adam

In [4]:
#Cargamos nuestro Dataset
df = pd.read_excel('/content/drive/MyDrive/Proyecto de Modelamiento 3/Base de datos/TRM.xlsx',parse_dates=[0],index_col=0, names=['Fecha','Tasa de cambio representativa del mercado (TRM)','Dia','Mes'])
df.head()

Unnamed: 0_level_0,Tasa de cambio representativa del mercado (TRM),Dia,Mes
Fecha,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2020-04-08,3910.15,0,4
2020-04-09,3886.79,1,4
2020-04-10,3886.79,2,4
2020-04-11,3886.79,3,4
2020-04-12,3886.79,4,4


In [5]:
df.describe()

Unnamed: 0,Tasa de cambio representativa del mercado (TRM),Dia,Mes
count,479.0,479.0,479.0
mean,3700.602046,14.580376,6.304802
std,138.541267,8.764308,3.08509
min,3410.82,0.0,1.0
25%,3611.44,7.0,4.0
50%,3709.0,15.0,6.0
75%,3791.335,22.0,9.0
max,4046.04,30.0,12.0


## Preprocesado de los datos

In [6]:
PASOS=7
# convert series to supervised learning
def series_to_supervised(data, n_in=1, n_out=1, dropnan=True):
    n_vars = 1 if type(data) is list else data.shape[1]
    df = pd.DataFrame(data)
    cols, names = list(), list()
    # input sequence (t-n, ... t-1)
    for i in range(n_in, 0, -1):
        cols.append(df.shift(i))
        names += [('var%d(t-%d)' % (j+1, i)) for j in range(n_vars)]
    # forecast sequence (t, t+1, ... t+n)
    for i in range(0, n_out):
        cols.append(df.shift(-i))
        if i == 0:
            names += [('var%d(t)' % (j+1)) for j in range(n_vars)]
        else:
            names += [('var%d(t+%d)' % (j+1, i)) for j in range(n_vars)]
    # put it all together
    agg = pd.concat(cols, axis=1)
    agg.columns = names
    # drop rows with NaN values
    if dropnan:
        agg.dropna(inplace=True)
    return agg

In [7]:
# load dataset
values = df['Tasa de cambio representativa del mercado (TRM)'].values

# ensure all data is float
values = values.astype('float32')
# normalize features
scaler = MinMaxScaler(feature_range=(-1, 1))

values=values.reshape(-1, 1) # esto lo hacemos porque tenemos 1 sola dimension

scaled = scaler.fit_transform(values)

reframed = series_to_supervised(scaled, PASOS, 1)
reframed.reset_index(inplace=True, drop=True)

contador=0
reframed['Dia']=df['Dia']
reframed['Mes']=df['Mes']

for i in range(reframed.index[0],reframed.index[-1]):
    reframed['Dia'].loc[contador]=df['Dia'][i+8]
    reframed['Mes'].loc[contador]=df['Mes'][i+8]
    contador=contador+1
reframed.head()

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  iloc._setitem_with_indexer(indexer, value)


Unnamed: 0,var1(t-7),var1(t-6),var1(t-5),var1(t-4),var1(t-3),var1(t-2),var1(t-1),var1(t),Dia,Mes
0,0.572148,0.498599,0.498599,0.498599,0.498599,0.498599,0.446712,0.408614,8.0,4.0
1,0.498599,0.498599,0.498599,0.498599,0.498599,0.446712,0.408614,0.605775,9.0,4.0
2,0.498599,0.498599,0.498599,0.498599,0.446712,0.408614,0.605775,0.675325,10.0,4.0
3,0.498599,0.498599,0.498599,0.446712,0.408614,0.605775,0.675325,0.770222,11.0,4.0
4,0.498599,0.498599,0.446712,0.408614,0.605775,0.675325,0.770222,0.770222,12.0,4.0


In [8]:
reordenado=reframed[ ['Dia','Mes','var1(t-7)','var1(t-6)','var1(t-5)','var1(t-4)','var1(t-3)','var1(t-2)','var1(t-1)','var1(t)'] ]
reordenado.dropna(inplace=True)
reordenado

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  


Unnamed: 0,Dia,Mes,var1(t-7),var1(t-6),var1(t-5),var1(t-4),var1(t-3),var1(t-2),var1(t-1),var1(t)
0,8.0,4.0,0.572148,0.498599,0.498599,0.498599,0.498599,0.498599,0.446712,0.408614
1,9.0,4.0,0.498599,0.498599,0.498599,0.498599,0.498599,0.446712,0.408614,0.605775
2,10.0,4.0,0.498599,0.498599,0.498599,0.498599,0.446712,0.408614,0.605775,0.675325
3,11.0,4.0,0.498599,0.498599,0.498599,0.446712,0.408614,0.605775,0.675325,0.770222
4,12.0,4.0,0.498599,0.498599,0.446712,0.408614,0.605775,0.675325,0.770222,0.770222
...,...,...,...,...,...,...,...,...,...,...
466,26.0,7.0,0.251976,0.360631,0.360631,0.400648,0.435849,0.459715,0.459715,0.459715
467,27.0,7.0,0.360631,0.360631,0.400648,0.435849,0.459715,0.459715,0.459715,0.553320
468,28.0,7.0,0.360631,0.400648,0.435849,0.459715,0.459715,0.459715,0.553320,0.598407
469,29.0,7.0,0.400648,0.435849,0.459715,0.459715,0.459715,0.553320,0.598407,0.547054


## Dividimos en set de Entrenamiento y Validación

In [17]:
training_data = reordenado.drop('var1(t)',axis=1).values
target_data=reordenado['var1(t)']
#training_data.head()
valid_data = training_data[470-30:470]
valid_target=target_data[470-30:470]

training_data = training_data[0:470]
target_data=target_data[0:470]
print(training_data.shape,target_data.shape,valid_data.shape,valid_target.shape)
#training_data.head()

(470, 9) (470,) (30, 9) (30,)


# Creamos la Red Neuronal

## Utilizaremos una LSTM con Embeddings

### Tenemos como entradas 9 columnas (2 embeddings y 7 pasos)

In [18]:
def crear_modeloEmbeddings():
    emb_dias = 2 #tamanio profundidad de embeddings
    emb_meses = 4

    in_dias = Input(shape=[1], name = 'Dia')
    emb_dias = Embedding(7+1, emb_dias)(in_dias)
    in_meses = Input(shape=[1], name = 'Mes')
    emb_meses = Embedding(12+1, emb_meses)(in_meses)

    in_cli = Input(shape=[PASOS], name = 'cli')

    fe = concatenate([(emb_dias), (emb_meses)])

    x = Flatten()(fe)
    x = Dense(PASOS,activation='tanh')(x)
    outp = Dense(1,activation='tanh')(x)
    model = Model(inputs=[in_dias,in_meses,in_cli], outputs=outp)

    model.compile(loss='mean_absolute_error', 
                  optimizer='adam',
                  metrics=['MSE'])

    model.summary()
    return model

In [21]:
EPOCHS=40

model = crear_modeloEmbeddings()

continuas=training_data[['var1(t-7)','var1(t-6)','var1(t-5)','var1(t-4)','var1(t-3)','var1(t-2)','var1(t-1)']]
valid_continuas=valid_data[['var1(t-7)','var1(t-6)','var1(t-5)','var1(t-4)','var1(t-3)','var1(t-2)','var1(t-1)']]

history=model.fit([training_data['Dia'],training_data['Mes'],continuas], target_data, epochs=EPOCHS
                 ,validation_data=([valid_data['Dia'],valid_data['Mes'],valid_continuas],valid_target))

Model: "model_4"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
Dia (InputLayer)                [(None, 1)]          0                                            
__________________________________________________________________________________________________
Mes (InputLayer)                [(None, 1)]          0                                            
__________________________________________________________________________________________________
embedding_10 (Embedding)        (None, 1, 2)         16          Dia[0][0]                        
__________________________________________________________________________________________________
embedding_11 (Embedding)        (None, 1, 4)         52          Mes[0][0]                        
____________________________________________________________________________________________

  """


IndexError: ignored