https://github.com/bnsreenu/python_for_microscopists/blob/master/166a-Intro_to_time_series_Forecasting_using_LSTM.py

In [1]:
import numpy as np
import matplotlib.pyplot as plt
import math
import pandas as pd
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import LSTM, Flatten, GlobalAveragePooling1D, SimpleRNN, TimeDistributed
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error

In [2]:
import tensorflow as tf
import keras as keras
print("TensorFlow version:", tf.__version__)
print("Keras version:", keras.__version__)

TensorFlow version: 2.13.0
Keras version: 2.13.1


In [3]:
dataframe = pd.read_csv("data/raw/DAX_Data.csv", usecols=[1])
dataframe = dataframe.dropna()
#plt.plot(dataframe)
dataframe.shape

(9213, 1)

In [4]:
#Convert pandas dataframe to numpy array
dataset = dataframe.values
dataset = dataset.astype('float32')
dataset.shape
# Datentypen ändern von int64 zu float32
# sobald die values sind die bereits float 64 

(9213, 1)

In [5]:
print(dataset.shape)
dataset

(9213, 1)


array([[ 1005.19],
       [  956.49],
       [  996.1 ],
       ...,
       [18406.06],
       [18576.58],
       [18649.08]], dtype=float32)

In [6]:
# LSTM uses sigmoid and tanh that are sensitive to magnitude so values need to be normalized
# normalize the dataset
scaler = MinMaxScaler(feature_range=(0, 1)) #Also try QuantileTransformer
dataset = scaler.fit_transform(dataset)
dataset.shape

# Daten umarrangerien damit sie eine Saklierung haben von 0 bis 1. Wobei 1 = max und 0 = min
#scaler.fit = transformiert erst die daten die zeile davor

(9213, 1)

In [7]:
train_size = int(len(dataset) * 0.66)
test_size = len(dataset) - train_size
train, test = dataset[0:train_size,:], dataset[train_size:len(dataset),:]


# daten aufteilen in train und test
# validation data kommt er später

In [8]:
def to_sequences(dataset, seq_size=1):
    x = []
    y = []

    for i in range(len(dataset)-seq_size-1):
        #print(i)
        window = dataset[i:(i+seq_size), 0]
        x.append(window)
        y.append(dataset[i+seq_size, 0])
        
    return np.array(x),np.array(y)


# Dieser Prozess hilft dem RNN-Modell, zeitliche Abhängigkeiten 
# und Muster im Datensatz zu lernen, indem es auf frühere Werte in 
# den Sequenzen zurückgreift, um zukünftige Werte vorherzusagen.

In [9]:
seq_size = 5  # Number of time steps to look back 
#Larger sequences (look further back) may improve forecasting.

trainX, trainY = to_sequences(train, seq_size)
testX, testY = to_sequences(test, seq_size)



print("Shape of training set: {}".format(trainX.shape))
print("Shape of test set: {}".format(testX.shape))

Shape of training set: (6074, 5)
Shape of test set: (3127, 5)


In [10]:
# Reshape input to be [samples, time steps, features]
trainX = np.reshape(trainX, (trainX.shape[0], 1, trainX.shape[1]))
testX = np.reshape(testX, (testX.shape[0], 1, testX.shape[1]))

In [11]:
model = Sequential()
model.add(Dense(128, input_shape=(None, seq_size)))  # Dense Layer
model.add(LSTM(64, return_sequences=True))          # Erste LSTM-Schicht
model.add(LSTM(64, return_sequences=True))          # Zweite LSTM-Schicht
model.add(TimeDistributed(Dense(1)))                # TimeDistributed Layer
model.compile(loss='mean_squared_error', optimizer='adam')
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense (Dense)               (None, None, 128)         768       
                                                                 
 lstm (LSTM)                 (None, None, 64)          49408     
                                                                 
 lstm_1 (LSTM)               (None, None, 64)          33024     
                                                                 
 time_distributed (TimeDist  (None, None, 1)           65        
 ributed)                                                        
                                                                 
Total params: 83265 (325.25 KB)
Trainable params: 83265 (325.25 KB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________


In [13]:
model.fit(trainX, trainY, validation_data=(testX, testY), verbose=2, epochs=40)

Epoch 1/40
190/190 - 0s - loss: 0.0129 - val_loss: 0.2080 - 386ms/epoch - 2ms/step
Epoch 2/40
190/190 - 0s - loss: 0.0129 - val_loss: 0.1928 - 311ms/epoch - 2ms/step
Epoch 3/40
190/190 - 0s - loss: 0.0129 - val_loss: 0.2051 - 298ms/epoch - 2ms/step
Epoch 4/40
190/190 - 0s - loss: 0.0129 - val_loss: 0.2150 - 298ms/epoch - 2ms/step
Epoch 5/40
190/190 - 0s - loss: 0.0129 - val_loss: 0.2277 - 299ms/epoch - 2ms/step
Epoch 6/40
190/190 - 0s - loss: 0.0129 - val_loss: 0.1972 - 334ms/epoch - 2ms/step
Epoch 7/40
190/190 - 0s - loss: 0.0129 - val_loss: 0.2014 - 369ms/epoch - 2ms/step
Epoch 8/40
190/190 - 0s - loss: 0.0128 - val_loss: 0.2131 - 361ms/epoch - 2ms/step
Epoch 9/40
190/190 - 0s - loss: 0.0129 - val_loss: 0.2030 - 316ms/epoch - 2ms/step
Epoch 10/40
190/190 - 0s - loss: 0.0129 - val_loss: 0.2033 - 314ms/epoch - 2ms/step
Epoch 11/40
190/190 - 0s - loss: 0.0129 - val_loss: 0.2116 - 325ms/epoch - 2ms/step
Epoch 12/40
190/190 - 0s - loss: 0.0129 - val_loss: 0.2095 - 330ms/epoch - 2ms/step
E

<keras.src.callbacks.History at 0x2989dab00>

In [14]:
print(trainX.shape, trainY.shape, testX.shape, testY.shape)

(6074, 1, 5) (6074,) (3127, 1, 5) (3127,)


In [None]:
trainPredict = model.predict(trainX)
testPredict = model.predict(testX)
trainPredict.shape, testPredict.shape

In [None]:
trainX.shape, trainY.shape, testX.shape, testY.shape


In [None]:
trainPredict = scaler.inverse_transform(trainPredict)
trainY = scaler.inverse_transform([trainY])
testPredict = scaler.inverse_transform(testPredict)
testY = scaler.inverse_transform([testY])

In [None]:
print(trainPredict.shape, trainY.shape)
testPredict.shape, testY.shape

In [None]:
trainPredict[:,0].shape, trainPredict.shape


In [None]:
# calculate root mean squared error
trainScore = math.sqrt(mean_squared_error(trainY[0], trainPredict[:,0]))
print('Train Score: %.2f RMSE' % (trainScore))

testScore = math.sqrt(mean_squared_error(testY[0], testPredict[:,0]))
print('Test Score: %.2f RMSE' % (testScore))

In [None]:
# shift train predictions for plotting
#we must shift the predictions so that they align on the x-axis with the original dataset. 
trainPredictPlot = np.empty_like(dataset)
trainPredictPlot[:, :] = np.nan
trainPredictPlot[seq_size:len(trainPredict)+seq_size, :] = trainPredict

In [None]:
# shift test predictions for plotting
testPredictPlot = np.empty_like(dataset)
testPredictPlot[:, :] = np.nan
testPredictPlot[len(trainPredict)+(seq_size*2)+1:len(dataset)-1, :] = testPredict

In [None]:
# plot baseline and predictions
#plt.plot(scaler.inverse_transform(dataset))
plt.plot(trainPredictPlot)
plt.plot(testPredictPlot)
plt.show()