In [1]:
import numpy as np 
import pandas as pd 
import matplotlib.pyplot as plt
from datetime import datetime
from keras.models import Sequential
from keras.layers import Dense, LSTM, Dropout, GRU
from keras.layers import *
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error, mean_absolute_error
from sklearn.model_selection import train_test_split
from keras.callbacks import EarlyStopping
from keras.optimizers import Adam, SGD

In [3]:
dataset = pd.read_csv('timeseries.csv', sep = ";", parse_dates=['Date'])

In [4]:
dataset.shape

In [5]:
# переведем данные к типу float
dataset['series1'] = dataset['series1'].apply(lambda x: float(x.replace(',','.'))) 

In [301]:
plt.figure(figsize=(20,7))
plt.plot(dataset['Date'].values, dataset['series1'].values, color = 'b')
plt.xlabel('Date')
plt.show()

## Data Preprocessing

In [6]:
# возьмем для обучения первые 1600 данных, остальные - для теста
num_shape = 1600

train = dataset.iloc[:num_shape, 1:2].values
test = dataset.iloc[num_shape:, 1:2].values

Нормализуем данные, для того, чтобы алгоритм лучше находил глобальную минимумы

In [7]:
sc = MinMaxScaler(feature_range = (0, 1))
train_scaled = sc.fit_transform(train)

Now we take one row and cut it with a window of 60 elements

In [16]:
X_train = []

#Predicted on next day
y_train = []

window = 30

for i in range(window, num_shape):
    X_train_ = np.reshape(train_scaled[i-window:i, 0], (window, 1))
    X_train.append(X_train_)
    y_train.append(train_scaled[i, 0])
X_train = np.stack(X_train)
y_train = np.stack(y_train)

### Training of the basic LSTM model

In [17]:
# Initializing the Recurrent Neural Network
model = Sequential()
#Adding the first LSTM layer with a sigmoid activation function and some Dropout regularization
#Units - dimensionality of the output space

model.add(LSTM(units = 50, return_sequences = True, input_shape = (X_train.shape[1], 1)))
model.add(Dropout(0.2))

model.add(LSTM(units = 50, return_sequences = True))
model.add(Dropout(0.2))

model.add(LSTM(units = 50, return_sequences = True))
model.add(Dropout(0.2))

model.add(LSTM(units = 50))
model.add(Dropout(0.2))

# Adding the output layer
model.add(Dense(units = 1))
model.summary()

In [18]:
model.compile(optimizer = 'adam', loss = 'mean_squared_error')
model.fit(X_train, y_train, epochs = 500, batch_size = 32);

## Prediction

In [19]:
df_volume = np.vstack((train, test))

inputs = df_volume[df_volume.shape[0] - test.shape[0] - window:]
inputs = inputs.reshape(-1,1)
inputs = sc.transform(inputs)

num_2 = df_volume.shape[0] - num_shape + window

X_test = []

for i in range(window, num_2):
    X_test_ = np.reshape(inputs[i-window:i, 0], (window, 1))
    X_test.append(X_test_)
    
X_test = np.stack(X_test)

In [20]:
predict = model.predict(X_test)
predict = sc.inverse_transform(predict)

In [21]:
diff = predict - test

print("MSE:", np.mean(diff**2))
print("MAE:", np.mean(abs(diff)))
print("RMSE:", np.sqrt(np.mean(diff**2)))

In [24]:
plt.figure(figsize=(20,7))
plt.plot(dataset['Date'].values[1500:], df_volume[1500:], color = 'red', label = 'True')
plt.plot(dataset['Date'][-predict.shape[0]:].values, predict, color = 'blue', label = 'Predicted')
plt.xticks(np.arange(100,dataset[1600:].shape[0],200))
plt.title('Timeseries Prediction')
plt.xlabel('Date')
plt.ylabel('Values')
plt.legend()

### 3-month prediction

Предскажим результаты на следующие 3 месяца

In [27]:
def model_training(predict, days, dataset):
    pred_ = predict[-1].copy()
    prediction_full = []
    window = 30
    df_copy = dataset.iloc[:, 1:2][1:].values

    for j in range(days):
        df_ = np.vstack((df_copy, pred_))
        train_ = df_[:num_shape]
        test_ = df_[num_shape:]

        df_volume_ = np.vstack((train_, test_))

        inputs_ = df_volume_[df_volume_.shape[0] - test_.shape[0] - window:]
        inputs_ = inputs_.reshape(-1,1)
        inputs_ = sc.transform(inputs_)

        X_test_2 = []

        for k in range(window, num_2):
            X_test_3 = np.reshape(inputs_[k-window:k, 0], (window, 1))
            X_test_2.append(X_test_3)

        X_test_ = np.stack(X_test_2)
        predict_ = model.predict(X_test_)
        pred_ = sc.inverse_transform(predict_)
        prediction_full.append(pred_[-1][0])
        df_copy = df_[j:]
    return prediction_full

In [28]:
predict_df = pd.DataFrame(columns = ['Date', 'series1'])
predict_df['Date'] = pd.date_range(start='07/01/2019', end='07/31/2019')
predict_df['series1']  = model_training(predict, 31,dataset)
dataset_1 = pd.concat([dataset, predict_df])

In [29]:
predict_df = pd.DataFrame(columns = ['Date', 'series1'])
predict_df['Date'] = pd.date_range(start='08/01/2019', end='08/31/2019')
predict_df['series1'] = model_training(predict, 31,dataset_1)
dataset_2 = pd.concat([dataset_1, predict_df])

In [30]:
predict_df = pd.DataFrame(columns = ['Date', 'series1'])
predict_df['Date'] = pd.date_range(start='09/01/2019', end='09/30/2019')
predict_df['series1'] = model_training(predict, 30,dataset_2)
dataset_3 = pd.concat([dataset_2, predict_df])

In [33]:
dataset_3[dataset_3['Date'] < '07/01/2019'].set_index('Date').series1.plot(figsize=(15,5), title= '', fontsize=12,label = 'Timeseries')
dataset_3[dataset_3['Date'] >= '07/01/2019'].set_index('Date').series1.plot(figsize=(15,5), title= '', fontsize=12, label = 'Predicted')
plt.legend()
plt.savefig('foo.png');

In [40]:
dataset_3['series_1'] = dataset_3['series1'].apply(lambda x: round(x,1))
dataset_3[dataset_3['Date'] >= '07/01/2019'].to_csv('predicted.csv')