In [2]:
# Импортируем библиотеки

import numpy as np 
import pandas as pd
import matplotlib.pyplot as plt
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM
from tensorflow.keras.layers import Dense
from sklearn.metrics import mean_squared_error

In [0]:
def split_sequence(sequence, n_steps):
    n = len(sequence)
    X, y = list(), list()
    for i in range(n):
        end_ix = i + n_steps
        if end_ix > n-1:
            break
        seq_x, seq_y = sequence[i:end_ix], sequence[end_ix]
        X.append(seq_x)
        y.append(seq_y)
    return np.array(X), np.array(y)

In [0]:
def plot_difference(true_sequence, predict_values, unit):
    print(f'Prediction of model with {unit}:')
    df = pd.DataFrame({'real value': true_sequence, 'predicted value': np.round(predict_values, 3), 'difference': np.round(abs(true_sequence-predict_values), 1)})
    with pd.option_context('display.max_rows', None, 'display.max_columns', None):
        print(df)

# **Prediction of arithmetic progression**

$a_n = a_1 + (n-1)*d$  

Пусть $d=15$

## Data generation

In [0]:
def generate_arithmetic_progression(n_steps=4, d=15): # d - шаг арифметической прогрессии

    num = 700
    batch_size = 2000
    i = 0
    
    X = np.empty((batch_size, n_steps))
    y = np.empty((batch_size, 1))

    start_val = 10

    while i < batch_size:
    
        array =  np.arange(start_val, start_val +(num-1)*d + 1, d)
        j = 0

        while j <= num - (n_steps + 1):
            if i >= batch_size: break
            X[i, :] = array[j:n_steps+j]
            y[i, :] = array[j+n_steps]
            i += 1
            j += 1
        
        start_val -= 1

    _, index = np.unique(X, axis=0, return_index=True)
    X, y = X[index], y[index]
    X = X.reshape((X.shape[0], X.shape[1], 1))
    return X, y

In [0]:
n_steps = 4
X_train, y_train = generate_arithmetic_progression(n_steps)

In [0]:
print(f'X_train shape is {X_train.shape}')
print(f'y_train shape is {y_train.shape}')

X_train shape is (2000, 4, 1)
y_train shape is (2000, 1)


## **Training model with one LSTM layer**

In [0]:
def predict(true_sequence, model):
    predict_values = true_sequence[:n_steps]
    k = n_steps
    length = true_sequence.size
    while k != length:
        X = predict_values[-n_steps::]
        X = X.reshape((1, n_steps, 1))
        f_x = np.round(model.predict(X, verbose=0))
        predict_values = np.append(predict_values, f_x)
        k += 1
    return predict_values

In [0]:
sequence = np.arange(8111, 9000, 15)

In [0]:
units_number = list(range(1, 11)) + list(range(15, 65, 5))
losses = []
min_loss = None
min_unit = None

for unit in units_number: 
    model = Sequential()
    model.add(LSTM(unit, activation='softplus', input_shape=(n_steps, 1)))
    model.add(Dense(1))
    model.compile(optimizer='adam', loss='mse')

    model.fit(X_train, y_train, epochs=300, validation_split=0.2, verbose=0)

    predict_y = predict(sequence, model)
    mse = mean_squared_error(sequence, predict_y)

    if min_loss is None or mse < min_loss:
        min_unit = unit
        min_loss = mse
        best_model = model
    losses.append(mse)
    
    print(f'Units in LSTM layer: {unit}, MSE is: {np.round(mse, 4)}')
    if unit in (1, 10, 20, 40):
        plot_difference(sequence, predict_y, unit)

Units in LSTM layer: 1, MSE is: 68403615.0
Prediction of model with 1:
    real value  predicted value  difference
0         8111           8111.0         0.0
1         8126           8126.0         0.0
2         8141           8141.0         0.0
3         8156           8156.0         0.0
4         8171             26.0      8145.0
5         8186             26.0      8160.0
6         8201             26.0      8175.0
7         8216             26.0      8190.0
8         8231             26.0      8205.0
9         8246             26.0      8220.0
10        8261             26.0      8235.0
11        8276             26.0      8250.0
12        8291             26.0      8265.0
13        8306             26.0      8280.0
14        8321             26.0      8295.0
15        8336             26.0      8310.0
16        8351             26.0      8325.0
17        8366             26.0      8340.0
18        8381             26.0      8355.0
19        8396             26.0      8370.0
20   

In [0]:
sequence = np.arange(4353, 6000, 15)
predict_y = predict(sequence, best_model)
mse = mean_squared_error(sequence, predict_y)
print(f'mse is {mse}')
plot_difference(sequence, predict_y, min_unit)

mse is 0.0
Prediction of model with 8:
     real value  predicted value  difference
0          4353           4353.0         0.0
1          4368           4368.0         0.0
2          4383           4383.0         0.0
3          4398           4398.0         0.0
4          4413           4413.0         0.0
5          4428           4428.0         0.0
6          4443           4443.0         0.0
7          4458           4458.0         0.0
8          4473           4473.0         0.0
9          4488           4488.0         0.0
10         4503           4503.0         0.0
11         4518           4518.0         0.0
12         4533           4533.0         0.0
13         4548           4548.0         0.0
14         4563           4563.0         0.0
15         4578           4578.0         0.0
16         4593           4593.0         0.0
17         4608           4608.0         0.0
18         4623           4623.0         0.0
19         4638           4638.0         0.0
20         4653 

# **Prediction of geometric progression**

$ b_n = b_1*q^{n-1}$  
Пусть $q=2, b_1 = \forall n \in ${2, 3...100}

## **Data generation**

In [0]:
def generate_geometric_progression(start=1, num=15, d=1.5):
    x = [start]
    i = 1
    while i < num:
        next_value = x[-1] * d
        x.append(next_value)
        i += 1
    return x

In [0]:
def generate_data(n_steps=3):
  
    batch_size = 3000
    i = 0
    num = 20
    
    X = np.empty((batch_size, n_steps))
    y = np.empty((batch_size, 1))

    start_val = 2

    while i < batch_size:

        array = generate_geometric_progression(start_val, num, 2)
        j = 0

        while j <= num - (n_steps + 1):
            if i >= batch_size: 
                break
            X[i, :] = array[j:n_steps+j]
            y[i, :] = array[j+n_steps:j+n_steps + 1]
            i += 1
            j += 1

        start_val += 2
        print(start_val)

    _, index = np.unique(X, axis=0, return_index=True)
    X, y = X[index], y[index]
    X = X.reshape((X.shape[0], X.shape[1], 1))
    return X, y

In [17]:
print(f'X_train shape is {X_train.shape}')
print(f'y_train shape is {y_train.shape}')

X_train shape is (1090, 4, 1)
y_train shape is (1090, 1)


# **Training model with one LSTM layer**

In [0]:
def predict(true_sequence, model):
    predict_values = true_sequence[:n_steps]
    k = n_steps
    length = true_sequence.size
    while k != length:
        X = predict_values[-n_steps::]
        X = X.reshape((1, n_steps, 1))
        f_x = model.predict(X, verbose=0)
        predict_values = np.append(predict_values, f_x)
        k += 1
    return predict_values

In [0]:
def generate_test_values(start=1, num=15, d=1.5):

    x = [start]
    i = 1
    
    while i < num:
        next_value = x[-1] * d
        x.append(next_value)
        i += 1

    return np.array(x)

In [0]:
sequence = generate_test_values(3, 23, 2)

In [0]:
units_number = list(range(1, 11)) + list(range(15, 65, 5))
losses = []
min_loss = None
min_unit = None

for unit in units_number: 
    model = Sequential()
    model.add(LSTM(unit, activation='softplus', input_shape=(n_steps, 1)))
    model.add(Dense(1))
    model.compile(optimizer='adam', loss='mse')

    model.fit(X_train, y_train, epochs=800, validation_split=0.2, verbose=0)

    predict_y = predict(sequence, model)
    mse = mean_squared_error(sequence, predict_y)
    if min_loss is None or mse < min_loss:
        min_unit = unit
        min_loss = mse
        best_model = model
    losses.append(mse)
    
    print(f'Units in LSTM layer: {unit}, MSE is: {np.round(mse, 4)}')
    if unit in (3, 7, 15, 30, 40, 60):
        plot_difference(sequence, predict_y, unit)

Units in LSTM layer: 1, MSE is: 9178455169603.006
Units in LSTM layer: 2, MSE is: 6047793066.8327
Units in LSTM layer: 3, MSE is: 784242577.6989
Prediction of model with 3:
    real value  predicted value  difference
0            3     3.000000e+00         0.0
1            6     6.000000e+00         0.0
2           12     1.200000e+01         0.0
3           24     2.400000e+01         0.0
4           48     4.718600e+01         0.8
5           96     9.533000e+01         0.7
6          192     1.850060e+02         7.0
7          384     3.816900e+02         2.3
8          768     7.669600e+02         1.0
9         1536     1.526471e+03         9.5
10        3072     3.016699e+03        55.3
11        6144     6.098262e+03        45.7
12       12288     1.219630e+04        91.7
13       24576     2.435312e+04       222.9
14       49152     4.862555e+04       526.5
15       98304     9.744991e+04       854.1
16      196608     1.948628e+05      1745.2
17      393216     3.895741e+05    

In [0]:
sequence = generate_test_values(5, 20, 2)
predict_y = predict(sequence, best_model)
mse = mean_squared_error(sequence, predict_y)
print(f'mse is {mse}')
plot_difference(sequence, predict_y, min_unit)

mse is 4727.6534022197275
Prediction of model with 45:
    real value  predicted value  difference
0            5            5.000         0.0
1           10           10.000         0.0
2           20           20.000         0.0
3           40           40.000         0.0
4           80           79.986         0.0
5          160          159.883         0.1
6          320          320.002         0.0
7          640          639.836         0.2
8         1280         1279.843         0.2
9         2560         2559.979         0.0
10        5120         5119.506         0.5
11       10240        10243.703         3.7
12       20480        20483.930         3.9
13       40960        40966.805         6.8
14       81920        81932.781        12.8
15      163840       163871.109        31.1
16      327680       327726.219        46.2
17      655360       655441.188        81.2
18     1310720      1310864.000       144.0
19     2621440      2621692.750       252.8
