# Обучение предиктивной модели

In [1]:
## Загрузка необходимых библиотек
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
import math

from sklearn.metrics import r2_score, mean_squared_error
from sklearn.model_selection import train_test_split, cross_validate, GridSearchCV
from sklearn.preprocessing import LabelEncoder, StandardScaler

import tensorflow as tf
from tensorflow import keras
import keras
import keras.layers as L
from keras.utils.np_utils import to_categorical

## 1. Подготовка учебного датасета

### 1.1 Датасет типа "total"

In [2]:
## Чтение данных из файла
weather = pd.read_csv('DATA/weather_data_total4.csv.gz', compression='gzip', header=0, sep=';'
                      , quotechar='"')
municip = pd.read_csv('INPUT/id_муниципалитета.csv.zip', compression='zip', header=0, sep=';'
                      , dtype={'digit_id': 'object'}, quotechar='"')
settl = pd.read_csv('DATA/Settlement_Id.zip', compression='zip', header=0, sep=';'
                    , dtype={'digit_id': 'object'}, quotechar='"')
target = pd.read_csv('INPUT/target.csv.zip', compression='zip', header=0, sep=';', decimal=','
                     , dtype={'digit_id': 'object'}, quotechar='"')
diff_trend = pd.read_csv('DATA/data_ssa.csv.gz', compression='gzip', header=0, sep=';'
                         , dtype={'yield_code': 'object'}, quotechar='"')

In [3]:
## Добавление id метеостанций к фрейму с целевой переменной
# добавление данных по урожайности и накопленного тренда урожайности
target = target.loc[~target['check'].isin([0])]
target['triplet'] = target['digit_id'].astype(str).str[:3]
target['solo'] = target['digit_id'].astype(str).str[0]+'00'
diff_trend = diff_trend.loc[pd.notnull(diff_trend['diff_cumulative']), ['year', 'yield_code', 'diff_cumulative']]
# создание массива по урожайности
target = pd.merge(target, diff_trend, how='left', left_on=['year', 'triplet'], right_on=['year', 'yield_code'])
target = pd.merge(target, diff_trend, how='left', left_on=['year', 'solo'], right_on=['year', 'yield_code'])

# очистка урожайности от тренда
target['trendless'] = np.nan
target.loc[pd.isnull(target['trendless']), 'trendless'] = target.loc[pd.isnull(target['trendless']), 'yield'] - target.loc[pd.isnull(target['trendless']), 'diff_cumulative_x']
target.loc[pd.isnull(target['trendless']), 'trendless'] = target.loc[pd.isnull(target['trendless']), 'yield'] - target.loc[pd.isnull(target['trendless']), 'diff_cumulative_y']
target.dropna(subset=['trendless'], inplace=True)
target = target[['digit_id', 'year', 'federal_distr', 'region', 'category', 'trendless']]

# добавление данных по метеостанциям
meteo = settl[['digit_id', 'meteoid']].drop_duplicates()
target = pd.merge(target, meteo, how='left', on=['digit_id'])

# добавление данных с идентификаторами муниципалитетов
target = pd.merge(target, municip[['digit_id', 'id_municip']], how='left', on=['digit_id'])
target.drop_duplicates(keep='first', inplace=True, ignore_index=True) # удаление дубликатов

In [4]:
target.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 48068 entries, 0 to 48067
Data columns (total 8 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   digit_id       48068 non-null  object 
 1   year           48068 non-null  int64  
 2   federal_distr  48068 non-null  object 
 3   region         48068 non-null  object 
 4   category       48068 non-null  object 
 5   trendless      48068 non-null  float64
 6   meteoid        48068 non-null  int64  
 7   id_municip     48068 non-null  int64  
dtypes: float64(1), int64(3), object(4)
memory usage: 2.9+ MB


In [5]:
## Средние значения показателей для заполнения значений Nan
geolocation = settl.groupby(['digit_id']).agg(
    north_border = pd.NamedAgg(column = 'latitude', aggfunc = 'max'),
    south_border = pd.NamedAgg(column = 'latitude', aggfunc = 'min'),
    east_border = pd.NamedAgg(column = 'longitude', aggfunc = 'max'),
    west_border = pd.NamedAgg(column = 'longitude', aggfunc = 'min'),
    central_lat = pd.NamedAgg(column = 'latitude', aggfunc = 'mean'),
    central_lon = pd.NamedAgg(column = 'longitude', aggfunc = 'mean'),
    settl_count = pd.NamedAgg(column = 'settlement', aggfunc = 'count')
)
geolocation.reset_index(inplace=True) # перевод индексов группировки в столбцы
target = pd.merge(target, geolocation, how='left', on=['digit_id'])

In [6]:
## Создание учебного датафрейма
target['rotation'] = target['year']%4 # добавление фичи по севообороту
target.drop(columns=['digit_id'], inplace=True)
data = pd.merge(weather, target, how='right', on=['year', 'meteoid'])
data.drop_duplicates(keep='first', inplace=True, ignore_index=True)
data.dropna(axis=0, how='any', inplace=True)
#data['region_id'] = data['region_id'].astype(int)
data.rename(columns={"trendless": "target"}, inplace=True)

In [7]:
## Список неинформативных столбцов
antitop = [
    'min_baric_tendency',
    'min_u_humidity',
    'max_baric_tendency',
    'max_wind_speed',
    'max_wind_gust_between',
    'diff_cloudiness',
    'max_height_clouds',
    'diff_height_clouds',
    'min_cloudiness',
    'min_height_clouds',
    'min_wind_speed'
]

In [8]:
## Удаление неинформативных столбцов из датафрейма total (для датафрейма "weather_data_total4.csv.gz")
# создание списка неинформативных столбцов
drop_col_tot = []

for items in antitop:
    c = 'total_{}'.format(items)
    drop_col_tot.append(c)

data.drop(columns=drop_col_tot, inplace=True)

In [9]:
data = data.loc[(data['category'] !='КФХ')]
## Преобразование категориальных переменных признака "category" в числовые значения
le = LabelEncoder() # метод преобразования категориальных признаков
# преобразование в числовые значения
data['category'] = le.fit_transform(data['category'])
data['federal_distr'] = le.fit_transform(data['federal_distr'])
data['region'] = le.fit_transform(data['region'])

In [10]:
## Удаление лишних данных
del settl
del meteo
del weather
del municip

In [11]:
data.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 31248 entries, 13361 to 48067
Data columns (total 45 columns):
 #   Column                        Non-Null Count  Dtype  
---  ------                        --------------  -----  
 0   year                          31248 non-null  float64
 1   meteoid                       31248 non-null  int64  
 2   total_min_air_temp            31248 non-null  float64
 3   total_min_ground_temp         31248 non-null  float64
 4   total_min_po_press            31248 non-null  float64
 5   total_max_air_temp            31248 non-null  float64
 6   total_max_po_press            31248 non-null  float64
 7   total_max_u_humidity          31248 non-null  float64
 8   total_max_cloudiness          31248 non-null  float64
 9   total_avg_air_temp            31248 non-null  float64
 10  total_avg_po_press            31248 non-null  float64
 11  total_avg_baric_tendency      31248 non-null  float64
 12  total_wind_rumb_radians       31248 non-null  float64
 1

In [17]:
## Формирование датасета и вектора целевой переменной
train_data = data.copy()
train_label = data['target']

# удаление лишних столбцов
train_data = train_data.drop(columns=['year', 'meteoid', 'target'])

In [18]:
# разделение выборки на обучающую и тестовую (80/20) 
X_train, X_test, y_train, y_test = train_test_split(train_data, train_label, test_size=0.25, random_state=42)

## 2. Обучение моделей

### 2.1 Подготовка датасета для обучения и тестирования

In [19]:
scaler = StandardScaler()
train =  scaler.fit_transform(X_train)
test = scaler.fit_transform(X_test)
train, test =  np.array(train), np.array(test)
train_label, test_label = np.array(y_train), np.array(y_test)

In [20]:
# разделение выборки на обучающую и тестовую (80/20) 
X_train, X_test, y_train, y_test = train_test_split(train, train_label, test_size=0.2, random_state=42)

In [21]:
# reshape input to be [samples, time steps, features]
trainX = np.reshape(train, (train.shape[0], 1, train.shape[1]))
testX = np.reshape(test, (test.shape[0], 1, test.shape[1]))

### 2.2 Обучение рекурентной нейросети (LSTM)

In [22]:
#Define a model that utilizes bidirectional LSTM RNN
BATCH_SIZE=32
hid_size = int(trainX.shape[-1])

lstm_tot = keras.models.Sequential()
lstm_tot.add(L.LSTM(hid_size, return_sequences=True, input_shape=trainX.shape[1:], recurrent_activation='sigmoid'))
lstm_tot.add(L.LSTM(hid_size*2, return_sequences=True, recurrent_activation='sigmoid'))
forward_layer = L.LSTM(hid_size*2, return_sequences=True, dropout=0.3, recurrent_dropout=0.3)
backward_layer = L.LSTM(hid_size*2, recurrent_activation='sigmoid', activation='relu', return_sequences=True, 
                        go_backwards=True, dropout=0.3, recurrent_dropout=0.3)
lstm_tot.add(L.Bidirectional(forward_layer, backward_layer=backward_layer))
lstm_tot.add(L.LSTM(hid_size, return_sequences=True, recurrent_activation='sigmoid'))
lstm_tot.add(L.LSTM(10, activation='relu'))
lstm_tot.add(L.Dense(1 , activation='linear'))

In [23]:
# Description of the model
lstm_tot.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm (LSTM)                  (None, 1, 42)             14280     
_________________________________________________________________
lstm_1 (LSTM)                (None, 1, 84)             42672     
_________________________________________________________________
bidirectional (Bidirectional (None, 1, 168)            113568    
_________________________________________________________________
lstm_4 (LSTM)                (None, 1, 42)             35448     
_________________________________________________________________
lstm_5 (LSTM)                (None, 10)                2120      
_________________________________________________________________
dense (Dense)                (None, 1)                 11        
Total params: 208,099
Trainable params: 208,099
Non-trainable params: 0
__________________________________________________

In [24]:
lstm_tot.compile(optimizer='adam', loss='mean_squared_error',
                 metrics=[tf.keras.metrics.RootMeanSquaredError()])
callbacks = [keras.callbacks.ModelCheckpoint("RESULTS/callbacks/lstm_tot_at_epoch_{epoch}.h5")]

In [25]:
lstm_tot.fit(trainX, train_label, epochs=1000, batch_size=BATCH_SIZE, verbose=2, callbacks=callbacks)

Epoch 1/1000
733/733 - 18s - loss: 30.9613 - root_mean_squared_error: 5.5643
Epoch 2/1000
733/733 - 7s - loss: 19.6417 - root_mean_squared_error: 4.4319
Epoch 3/1000
733/733 - 6s - loss: 18.0875 - root_mean_squared_error: 4.2529
Epoch 4/1000
733/733 - 6s - loss: 17.0895 - root_mean_squared_error: 4.1339
Epoch 5/1000
733/733 - 7s - loss: 16.3688 - root_mean_squared_error: 4.0458
Epoch 6/1000
733/733 - 6s - loss: 15.9359 - root_mean_squared_error: 3.9920
Epoch 7/1000
733/733 - 6s - loss: 15.6324 - root_mean_squared_error: 3.9538
Epoch 8/1000
733/733 - 6s - loss: 15.2366 - root_mean_squared_error: 3.9034
Epoch 9/1000
733/733 - 7s - loss: 14.9933 - root_mean_squared_error: 3.8721
Epoch 10/1000
733/733 - 6s - loss: 14.7085 - root_mean_squared_error: 3.8352
Epoch 11/1000
733/733 - 6s - loss: 14.4428 - root_mean_squared_error: 3.8004
Epoch 12/1000
733/733 - 7s - loss: 14.2349 - root_mean_squared_error: 3.7729
Epoch 13/1000
733/733 - 6s - loss: 14.0684 - root_mean_squared_error: 3.7508
Epoch 1

733/733 - 16s - loss: 8.2509 - root_mean_squared_error: 2.8724
Epoch 108/1000
733/733 - 17s - loss: 8.1469 - root_mean_squared_error: 2.8543
Epoch 109/1000
733/733 - 17s - loss: 8.1338 - root_mean_squared_error: 2.8520
Epoch 110/1000
733/733 - 18s - loss: 8.1721 - root_mean_squared_error: 2.8587
Epoch 111/1000
733/733 - 20s - loss: 8.0798 - root_mean_squared_error: 2.8425
Epoch 112/1000
733/733 - 22s - loss: 8.0139 - root_mean_squared_error: 2.8309
Epoch 113/1000
733/733 - 29s - loss: 7.9833 - root_mean_squared_error: 2.8255
Epoch 114/1000
733/733 - 28s - loss: 7.9921 - root_mean_squared_error: 2.8270
Epoch 115/1000
733/733 - 25s - loss: 7.9224 - root_mean_squared_error: 2.8147
Epoch 116/1000
733/733 - 25s - loss: 7.9522 - root_mean_squared_error: 2.8200
Epoch 117/1000
733/733 - 25s - loss: 8.0663 - root_mean_squared_error: 2.8401
Epoch 118/1000
733/733 - 27s - loss: 7.8349 - root_mean_squared_error: 2.7991
Epoch 119/1000
733/733 - 22s - loss: 7.7831 - root_mean_squared_error: 2.7898
E

Epoch 213/1000
733/733 - 14s - loss: 5.6805 - root_mean_squared_error: 2.3834
Epoch 214/1000
733/733 - 14s - loss: 5.8435 - root_mean_squared_error: 2.4173
Epoch 215/1000
733/733 - 14s - loss: 5.7659 - root_mean_squared_error: 2.4012
Epoch 216/1000
733/733 - 14s - loss: 5.7197 - root_mean_squared_error: 2.3916
Epoch 217/1000
733/733 - 14s - loss: 5.7941 - root_mean_squared_error: 2.4071
Epoch 218/1000
733/733 - 18s - loss: 5.6654 - root_mean_squared_error: 2.3802
Epoch 219/1000
733/733 - 22s - loss: 5.7844 - root_mean_squared_error: 2.4051
Epoch 220/1000
733/733 - 15s - loss: 5.7583 - root_mean_squared_error: 2.3997
Epoch 221/1000
733/733 - 18s - loss: 5.7004 - root_mean_squared_error: 2.3876
Epoch 222/1000
733/733 - 16s - loss: 5.5457 - root_mean_squared_error: 2.3549
Epoch 223/1000
733/733 - 21s - loss: 5.6357 - root_mean_squared_error: 2.3740
Epoch 224/1000
733/733 - 20s - loss: 5.6510 - root_mean_squared_error: 2.3772
Epoch 225/1000
733/733 - 34s - loss: 5.7208 - root_mean_squared_

733/733 - 20s - loss: 4.4362 - root_mean_squared_error: 2.1062
Epoch 319/1000
733/733 - 21s - loss: 4.4866 - root_mean_squared_error: 2.1181
Epoch 320/1000
733/733 - 32s - loss: 4.4088 - root_mean_squared_error: 2.0997
Epoch 321/1000
733/733 - 24s - loss: 4.3666 - root_mean_squared_error: 2.0896
Epoch 322/1000
733/733 - 28s - loss: 4.4999 - root_mean_squared_error: 2.1213
Epoch 323/1000
733/733 - 23s - loss: 4.4173 - root_mean_squared_error: 2.1017
Epoch 324/1000
733/733 - 23s - loss: 4.4409 - root_mean_squared_error: 2.1074
Epoch 325/1000
733/733 - 21s - loss: 4.3585 - root_mean_squared_error: 2.0877
Epoch 326/1000
733/733 - 16s - loss: 4.4430 - root_mean_squared_error: 2.1078
Epoch 327/1000
733/733 - 20s - loss: 4.3607 - root_mean_squared_error: 2.0882
Epoch 328/1000
733/733 - 21s - loss: 4.3986 - root_mean_squared_error: 2.0973
Epoch 329/1000
733/733 - 15s - loss: 4.4504 - root_mean_squared_error: 2.1096
Epoch 330/1000
733/733 - 14s - loss: 4.4361 - root_mean_squared_error: 2.1062
E

Epoch 424/1000
733/733 - 14s - loss: 3.5077 - root_mean_squared_error: 1.8729
Epoch 425/1000
733/733 - 15s - loss: 3.7812 - root_mean_squared_error: 1.9445
Epoch 426/1000
733/733 - 14s - loss: 3.7728 - root_mean_squared_error: 1.9424
Epoch 427/1000
733/733 - 14s - loss: 3.6591 - root_mean_squared_error: 1.9129
Epoch 428/1000
733/733 - 13s - loss: 3.6315 - root_mean_squared_error: 1.9057
Epoch 429/1000
733/733 - 15s - loss: 3.6229 - root_mean_squared_error: 1.9034
Epoch 430/1000
733/733 - 26s - loss: 3.5500 - root_mean_squared_error: 1.8841
Epoch 431/1000
733/733 - 20s - loss: 3.6117 - root_mean_squared_error: 1.9004
Epoch 432/1000
733/733 - 15s - loss: 3.7399 - root_mean_squared_error: 1.9339
Epoch 433/1000
733/733 - 14s - loss: 3.6313 - root_mean_squared_error: 1.9056
Epoch 434/1000
733/733 - 14s - loss: 3.7664 - root_mean_squared_error: 1.9407
Epoch 435/1000
733/733 - 13s - loss: 3.5504 - root_mean_squared_error: 1.8842
Epoch 436/1000
733/733 - 14s - loss: 3.5977 - root_mean_squared_

733/733 - 16s - loss: 3.2111 - root_mean_squared_error: 1.7920
Epoch 530/1000
733/733 - 22s - loss: 3.2607 - root_mean_squared_error: 1.8057
Epoch 531/1000
733/733 - 17s - loss: 3.2892 - root_mean_squared_error: 1.8136
Epoch 532/1000
733/733 - 15s - loss: 3.2384 - root_mean_squared_error: 1.7996
Epoch 533/1000
733/733 - 17s - loss: 3.2027 - root_mean_squared_error: 1.7896
Epoch 534/1000
733/733 - 16s - loss: 3.1567 - root_mean_squared_error: 1.7767
Epoch 535/1000
733/733 - 14s - loss: 3.2455 - root_mean_squared_error: 1.8015
Epoch 536/1000
733/733 - 13s - loss: 3.1052 - root_mean_squared_error: 1.7622
Epoch 537/1000
733/733 - 15s - loss: 3.2880 - root_mean_squared_error: 1.8133
Epoch 538/1000
733/733 - 14s - loss: 3.3326 - root_mean_squared_error: 1.8255
Epoch 539/1000
733/733 - 14s - loss: 3.2810 - root_mean_squared_error: 1.8113
Epoch 540/1000
733/733 - 14s - loss: 3.2073 - root_mean_squared_error: 1.7909
Epoch 541/1000
733/733 - 14s - loss: 3.1560 - root_mean_squared_error: 1.7765
E

Epoch 635/1000
733/733 - 14s - loss: 3.0639 - root_mean_squared_error: 1.7504
Epoch 636/1000
733/733 - 15s - loss: 2.9275 - root_mean_squared_error: 1.7110
Epoch 637/1000
733/733 - 17s - loss: 2.8945 - root_mean_squared_error: 1.7013
Epoch 638/1000
733/733 - 15s - loss: 2.8849 - root_mean_squared_error: 1.6985
Epoch 639/1000
733/733 - 15s - loss: 2.9781 - root_mean_squared_error: 1.7257
Epoch 640/1000
733/733 - 17s - loss: 2.9091 - root_mean_squared_error: 1.7056
Epoch 641/1000
733/733 - 14s - loss: 2.9814 - root_mean_squared_error: 1.7267
Epoch 642/1000
733/733 - 13s - loss: 2.7906 - root_mean_squared_error: 1.6705
Epoch 643/1000
733/733 - 13s - loss: 2.9276 - root_mean_squared_error: 1.7110
Epoch 644/1000
733/733 - 15s - loss: 2.8728 - root_mean_squared_error: 1.6949
Epoch 645/1000
733/733 - 14s - loss: 2.9836 - root_mean_squared_error: 1.7273
Epoch 646/1000
733/733 - 14s - loss: 3.0149 - root_mean_squared_error: 1.7363
Epoch 647/1000
733/733 - 14s - loss: 2.8972 - root_mean_squared_

733/733 - 20s - loss: 2.8091 - root_mean_squared_error: 1.6760
Epoch 741/1000
733/733 - 18s - loss: 2.8116 - root_mean_squared_error: 1.6768
Epoch 742/1000
733/733 - 19s - loss: 2.7745 - root_mean_squared_error: 1.6657
Epoch 743/1000
733/733 - 14s - loss: 2.6695 - root_mean_squared_error: 1.6338
Epoch 744/1000
733/733 - 18s - loss: 2.8103 - root_mean_squared_error: 1.6764
Epoch 745/1000
733/733 - 28s - loss: 2.7257 - root_mean_squared_error: 1.6510
Epoch 746/1000
733/733 - 23s - loss: 2.7157 - root_mean_squared_error: 1.6479
Epoch 747/1000
733/733 - 20s - loss: 2.8360 - root_mean_squared_error: 1.6840
Epoch 748/1000
733/733 - 28s - loss: 2.7065 - root_mean_squared_error: 1.6451
Epoch 749/1000
733/733 - 21s - loss: 2.8418 - root_mean_squared_error: 1.6858
Epoch 750/1000
733/733 - 19s - loss: 2.6716 - root_mean_squared_error: 1.6345
Epoch 751/1000
733/733 - 17s - loss: 2.6891 - root_mean_squared_error: 1.6399
Epoch 752/1000
733/733 - 25s - loss: 2.7332 - root_mean_squared_error: 1.6533
E

Epoch 846/1000
733/733 - 19s - loss: 2.6250 - root_mean_squared_error: 1.6202
Epoch 847/1000
733/733 - 16s - loss: 2.5967 - root_mean_squared_error: 1.6114
Epoch 848/1000
733/733 - 21s - loss: 2.4631 - root_mean_squared_error: 1.5694
Epoch 849/1000
733/733 - 15s - loss: 2.6594 - root_mean_squared_error: 1.6308
Epoch 850/1000
733/733 - 15s - loss: 2.5779 - root_mean_squared_error: 1.6056
Epoch 851/1000
733/733 - 20s - loss: 2.4767 - root_mean_squared_error: 1.5737
Epoch 852/1000
733/733 - 16s - loss: 2.6085 - root_mean_squared_error: 1.6151
Epoch 853/1000
733/733 - 14s - loss: 2.5460 - root_mean_squared_error: 1.5956
Epoch 854/1000
733/733 - 23s - loss: 2.6346 - root_mean_squared_error: 1.6231
Epoch 855/1000
733/733 - 15s - loss: 2.4643 - root_mean_squared_error: 1.5698
Epoch 856/1000
733/733 - 18s - loss: 2.4460 - root_mean_squared_error: 1.5640
Epoch 857/1000
733/733 - 14s - loss: 2.5240 - root_mean_squared_error: 1.5887
Epoch 858/1000
733/733 - 17s - loss: 2.6105 - root_mean_squared_

Epoch 951/1000
733/733 - 19s - loss: 2.3513 - root_mean_squared_error: 1.5334
Epoch 952/1000
733/733 - 16s - loss: 2.4262 - root_mean_squared_error: 1.5576
Epoch 953/1000
733/733 - 25s - loss: 2.5628 - root_mean_squared_error: 1.6009
Epoch 954/1000
733/733 - 16s - loss: 2.4137 - root_mean_squared_error: 1.5536
Epoch 955/1000
733/733 - 17s - loss: 2.3580 - root_mean_squared_error: 1.5356
Epoch 956/1000
733/733 - 15s - loss: 2.3284 - root_mean_squared_error: 1.5259
Epoch 957/1000
733/733 - 17s - loss: 2.4637 - root_mean_squared_error: 1.5696
Epoch 958/1000
733/733 - 23s - loss: 2.4021 - root_mean_squared_error: 1.5499
Epoch 959/1000
733/733 - 14s - loss: 2.5127 - root_mean_squared_error: 1.5852
Epoch 960/1000
733/733 - 13s - loss: 2.3915 - root_mean_squared_error: 1.5465
Epoch 961/1000
733/733 - 11s - loss: 2.4511 - root_mean_squared_error: 1.5656
Epoch 962/1000
733/733 - 15s - loss: 2.4223 - root_mean_squared_error: 1.5564
Epoch 963/1000
733/733 - 14s - loss: 2.3827 - root_mean_squared_

<keras.callbacks.History at 0x14929a804c0>

In [26]:
# make predictions
trainPredict = lstm_tot.predict(trainX)
testPredict = lstm_tot.predict(testX)

In [27]:
# calculate root mean squared error
trainScore = math.sqrt(mean_squared_error(train_label, trainPredict))
print('Train Score: %.2f RMSE' % (trainScore))
testScore = math.sqrt(mean_squared_error(test_label, testPredict))
print('Test Score: %.2f RMSE' % (testScore))

Train Score: 1.23 RMSE
Test Score: 3.49 RMSE


### 2.3 Обучение полносвязной нейросети

In [28]:
BATCH_SIZE=32
hid_size = int(trainX.shape[-1])
# определение полносвязной модели
nnmodel = keras.models.Sequential()
nnmodel.add(L.Dense(hid_size, input_shape=trainX.shape[1:], activation='relu'))
nnmodel.add(L.BatchNormalization())
nnmodel.add(L.Dense(hid_size, activation='relu'))
nnmodel.add(L.Dropout(0.3))
nnmodel.add(L.BatchNormalization())
nnmodel.add(L.Dense(20, activation='relu'))
nnmodel.add(L.Dropout(0.3))
nnmodel.add(L.BatchNormalization())
nnmodel.add(L.Dense(10, activation='relu'))
nnmodel.add(L.Dense(1, activation='linear'))
nnmodel.summary()

Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_1 (Dense)              (None, 1, 42)             1806      
_________________________________________________________________
batch_normalization (BatchNo (None, 1, 42)             168       
_________________________________________________________________
dense_2 (Dense)              (None, 1, 42)             1806      
_________________________________________________________________
dropout (Dropout)            (None, 1, 42)             0         
_________________________________________________________________
batch_normalization_1 (Batch (None, 1, 42)             168       
_________________________________________________________________
dense_3 (Dense)              (None, 1, 20)             860       
_________________________________________________________________
dropout_1 (Dropout)          (None, 1, 20)            

In [29]:
nnmodel.compile(optimizer='adam', loss='mean_squared_error',
                metrics=[tf.keras.metrics.RootMeanSquaredError()])
nncallbacks = [keras.callbacks.ModelCheckpoint("RESULTS/callbacks/nnmodel_at_epoch_{epoch}.h5")]

In [31]:
nnmodel.fit(trainX, train_label, batch_size=BATCH_SIZE, epochs=1000, verbose=2, callbacks=nncallbacks)

Epoch 1/1000
733/733 - 4s - loss: 39.3558 - root_mean_squared_error: 6.2734
Epoch 2/1000
733/733 - 9s - loss: 25.6415 - root_mean_squared_error: 5.0637
Epoch 3/1000
733/733 - 7s - loss: 23.9181 - root_mean_squared_error: 4.8906
Epoch 4/1000
733/733 - 8s - loss: 22.9976 - root_mean_squared_error: 4.7956
Epoch 5/1000
733/733 - 7s - loss: 22.3446 - root_mean_squared_error: 4.7270
Epoch 6/1000
733/733 - 5s - loss: 22.0340 - root_mean_squared_error: 4.6940
Epoch 7/1000
733/733 - 8s - loss: 21.4526 - root_mean_squared_error: 4.6317
Epoch 8/1000
733/733 - 7s - loss: 21.1048 - root_mean_squared_error: 4.5940
Epoch 9/1000
733/733 - 14s - loss: 20.8783 - root_mean_squared_error: 4.5693
Epoch 10/1000
733/733 - 22s - loss: 20.7902 - root_mean_squared_error: 4.5596
Epoch 11/1000
733/733 - 11s - loss: 20.4285 - root_mean_squared_error: 4.5198
Epoch 12/1000
733/733 - 12s - loss: 20.3454 - root_mean_squared_error: 4.5106
Epoch 13/1000
733/733 - 10s - loss: 20.2116 - root_mean_squared_error: 4.4957
Epo

733/733 - 4s - loss: 16.3428 - root_mean_squared_error: 4.0426
Epoch 108/1000
733/733 - 3s - loss: 16.5232 - root_mean_squared_error: 4.0649
Epoch 109/1000
733/733 - 4s - loss: 16.4669 - root_mean_squared_error: 4.0579
Epoch 110/1000
733/733 - 3s - loss: 16.6196 - root_mean_squared_error: 4.0767
Epoch 111/1000
733/733 - 3s - loss: 16.4862 - root_mean_squared_error: 4.0603
Epoch 112/1000
733/733 - 3s - loss: 16.5589 - root_mean_squared_error: 4.0693
Epoch 113/1000
733/733 - 3s - loss: 16.5277 - root_mean_squared_error: 4.0654
Epoch 114/1000
733/733 - 3s - loss: 16.4541 - root_mean_squared_error: 4.0564
Epoch 115/1000
733/733 - 3s - loss: 16.6125 - root_mean_squared_error: 4.0758
Epoch 116/1000
733/733 - 4s - loss: 16.5919 - root_mean_squared_error: 4.0733
Epoch 117/1000
733/733 - 3s - loss: 16.3562 - root_mean_squared_error: 4.0443
Epoch 118/1000
733/733 - 3s - loss: 16.4448 - root_mean_squared_error: 4.0552
Epoch 119/1000
733/733 - 4s - loss: 16.3467 - root_mean_squared_error: 4.0431
E

Epoch 213/1000
733/733 - 3s - loss: 15.7245 - root_mean_squared_error: 3.9654
Epoch 214/1000
733/733 - 3s - loss: 15.8304 - root_mean_squared_error: 3.9787
Epoch 215/1000
733/733 - 3s - loss: 15.7889 - root_mean_squared_error: 3.9735
Epoch 216/1000
733/733 - 3s - loss: 15.5767 - root_mean_squared_error: 3.9467
Epoch 217/1000
733/733 - 3s - loss: 16.0644 - root_mean_squared_error: 4.0080
Epoch 218/1000
733/733 - 3s - loss: 15.7139 - root_mean_squared_error: 3.9641
Epoch 219/1000
733/733 - 3s - loss: 15.6967 - root_mean_squared_error: 3.9619
Epoch 220/1000
733/733 - 3s - loss: 15.5321 - root_mean_squared_error: 3.9411
Epoch 221/1000
733/733 - 3s - loss: 15.7048 - root_mean_squared_error: 3.9629
Epoch 222/1000
733/733 - 3s - loss: 15.5339 - root_mean_squared_error: 3.9413
Epoch 223/1000
733/733 - 3s - loss: 15.5972 - root_mean_squared_error: 3.9493
Epoch 224/1000
733/733 - 3s - loss: 15.4164 - root_mean_squared_error: 3.9264
Epoch 225/1000
733/733 - 4s - loss: 15.5200 - root_mean_squared_

733/733 - 3s - loss: 15.1918 - root_mean_squared_error: 3.8977
Epoch 319/1000
733/733 - 4s - loss: 14.9531 - root_mean_squared_error: 3.8669
Epoch 320/1000
733/733 - 3s - loss: 15.1304 - root_mean_squared_error: 3.8898
Epoch 321/1000
733/733 - 3s - loss: 15.2193 - root_mean_squared_error: 3.9012
Epoch 322/1000
733/733 - 3s - loss: 15.4179 - root_mean_squared_error: 3.9266
Epoch 323/1000
733/733 - 3s - loss: 15.5826 - root_mean_squared_error: 3.9475
Epoch 324/1000
733/733 - 3s - loss: 15.0157 - root_mean_squared_error: 3.8750
Epoch 325/1000
733/733 - 3s - loss: 15.1636 - root_mean_squared_error: 3.8940
Epoch 326/1000
733/733 - 5s - loss: 14.9190 - root_mean_squared_error: 3.8625
Epoch 327/1000
733/733 - 5s - loss: 15.0865 - root_mean_squared_error: 3.8841
Epoch 328/1000
733/733 - 6s - loss: 15.0088 - root_mean_squared_error: 3.8741
Epoch 329/1000
733/733 - 8s - loss: 15.0042 - root_mean_squared_error: 3.8735
Epoch 330/1000
733/733 - 7s - loss: 14.9831 - root_mean_squared_error: 3.8708
E

Epoch 424/1000
733/733 - 3s - loss: 14.8914 - root_mean_squared_error: 3.8589
Epoch 425/1000
733/733 - 3s - loss: 14.8587 - root_mean_squared_error: 3.8547
Epoch 426/1000
733/733 - 3s - loss: 14.9066 - root_mean_squared_error: 3.8609
Epoch 427/1000
733/733 - 3s - loss: 14.7223 - root_mean_squared_error: 3.8370
Epoch 428/1000
733/733 - 3s - loss: 14.7554 - root_mean_squared_error: 3.8413
Epoch 429/1000
733/733 - 3s - loss: 14.7066 - root_mean_squared_error: 3.8349
Epoch 430/1000
733/733 - 3s - loss: 14.8555 - root_mean_squared_error: 3.8543
Epoch 431/1000
733/733 - 3s - loss: 14.6553 - root_mean_squared_error: 3.8282
Epoch 432/1000
733/733 - 3s - loss: 14.7492 - root_mean_squared_error: 3.8405
Epoch 433/1000
733/733 - 3s - loss: 14.6955 - root_mean_squared_error: 3.8335
Epoch 434/1000
733/733 - 3s - loss: 14.8673 - root_mean_squared_error: 3.8558
Epoch 435/1000
733/733 - 3s - loss: 14.9220 - root_mean_squared_error: 3.8629
Epoch 436/1000
733/733 - 3s - loss: 15.1976 - root_mean_squared_

733/733 - 3s - loss: 14.4842 - root_mean_squared_error: 3.8058
Epoch 530/1000
733/733 - 3s - loss: 14.7115 - root_mean_squared_error: 3.8356
Epoch 531/1000
733/733 - 3s - loss: 14.5640 - root_mean_squared_error: 3.8163
Epoch 532/1000
733/733 - 3s - loss: 14.7271 - root_mean_squared_error: 3.8376
Epoch 533/1000
733/733 - 2s - loss: 14.6975 - root_mean_squared_error: 3.8337
Epoch 534/1000
733/733 - 3s - loss: 14.7781 - root_mean_squared_error: 3.8442
Epoch 535/1000
733/733 - 3s - loss: 14.6703 - root_mean_squared_error: 3.8302
Epoch 536/1000
733/733 - 3s - loss: 14.8265 - root_mean_squared_error: 3.8505
Epoch 537/1000
733/733 - 3s - loss: 14.8366 - root_mean_squared_error: 3.8518
Epoch 538/1000
733/733 - 3s - loss: 14.3319 - root_mean_squared_error: 3.7858
Epoch 539/1000
733/733 - 3s - loss: 14.5782 - root_mean_squared_error: 3.8181
Epoch 540/1000
733/733 - 3s - loss: 14.6428 - root_mean_squared_error: 3.8266
Epoch 541/1000
733/733 - 3s - loss: 14.7496 - root_mean_squared_error: 3.8405
E

Epoch 635/1000
733/733 - 3s - loss: 14.4990 - root_mean_squared_error: 3.8078
Epoch 636/1000
733/733 - 3s - loss: 14.3057 - root_mean_squared_error: 3.7823
Epoch 637/1000
733/733 - 3s - loss: 15.2875 - root_mean_squared_error: 3.9099
Epoch 638/1000
733/733 - 3s - loss: 14.6279 - root_mean_squared_error: 3.8246
Epoch 639/1000
733/733 - 3s - loss: 14.3718 - root_mean_squared_error: 3.7910
Epoch 640/1000
733/733 - 3s - loss: 14.4495 - root_mean_squared_error: 3.8013
Epoch 641/1000
733/733 - 3s - loss: 14.2982 - root_mean_squared_error: 3.7813
Epoch 642/1000
733/733 - 3s - loss: 14.3036 - root_mean_squared_error: 3.7820
Epoch 643/1000
733/733 - 3s - loss: 14.5987 - root_mean_squared_error: 3.8208
Epoch 644/1000
733/733 - 3s - loss: 14.4346 - root_mean_squared_error: 3.7993
Epoch 645/1000
733/733 - 3s - loss: 14.4584 - root_mean_squared_error: 3.8024
Epoch 646/1000
733/733 - 3s - loss: 14.4642 - root_mean_squared_error: 3.8032
Epoch 647/1000
733/733 - 3s - loss: 14.4125 - root_mean_squared_

733/733 - 3s - loss: 14.2936 - root_mean_squared_error: 3.7807
Epoch 741/1000
733/733 - 3s - loss: 14.4659 - root_mean_squared_error: 3.8034
Epoch 742/1000
733/733 - 3s - loss: 14.1609 - root_mean_squared_error: 3.7631
Epoch 743/1000
733/733 - 3s - loss: 14.2629 - root_mean_squared_error: 3.7766
Epoch 744/1000
733/733 - 3s - loss: 14.3713 - root_mean_squared_error: 3.7909
Epoch 745/1000
733/733 - 3s - loss: 14.3651 - root_mean_squared_error: 3.7901
Epoch 746/1000
733/733 - 3s - loss: 14.6629 - root_mean_squared_error: 3.8292
Epoch 747/1000
733/733 - 3s - loss: 14.4226 - root_mean_squared_error: 3.7977
Epoch 748/1000
733/733 - 3s - loss: 14.3207 - root_mean_squared_error: 3.7843
Epoch 749/1000
733/733 - 3s - loss: 14.3428 - root_mean_squared_error: 3.7872
Epoch 750/1000
733/733 - 3s - loss: 14.4622 - root_mean_squared_error: 3.8029
Epoch 751/1000
733/733 - 3s - loss: 14.2164 - root_mean_squared_error: 3.7705
Epoch 752/1000
733/733 - 3s - loss: 14.3559 - root_mean_squared_error: 3.7889
E

Epoch 846/1000
733/733 - 3s - loss: 14.1683 - root_mean_squared_error: 3.7641
Epoch 847/1000
733/733 - 3s - loss: 14.3881 - root_mean_squared_error: 3.7932
Epoch 848/1000
733/733 - 3s - loss: 14.2427 - root_mean_squared_error: 3.7740
Epoch 849/1000
733/733 - 6s - loss: 14.3618 - root_mean_squared_error: 3.7897
Epoch 850/1000
733/733 - 8s - loss: 14.1331 - root_mean_squared_error: 3.7594
Epoch 851/1000
733/733 - 6s - loss: 14.2055 - root_mean_squared_error: 3.7690
Epoch 852/1000
733/733 - 4s - loss: 14.0596 - root_mean_squared_error: 3.7496
Epoch 853/1000
733/733 - 5s - loss: 14.7354 - root_mean_squared_error: 3.8387
Epoch 854/1000
733/733 - 4s - loss: 14.3073 - root_mean_squared_error: 3.7825
Epoch 855/1000
733/733 - 5s - loss: 14.2322 - root_mean_squared_error: 3.7726
Epoch 856/1000
733/733 - 4s - loss: 14.1233 - root_mean_squared_error: 3.7581
Epoch 857/1000
733/733 - 3s - loss: 14.2545 - root_mean_squared_error: 3.7755
Epoch 858/1000
733/733 - 4s - loss: 14.2764 - root_mean_squared_

733/733 - 4s - loss: 14.4578 - root_mean_squared_error: 3.8023
Epoch 952/1000
733/733 - 3s - loss: 14.2379 - root_mean_squared_error: 3.7733
Epoch 953/1000
733/733 - 3s - loss: 14.1363 - root_mean_squared_error: 3.7598
Epoch 954/1000
733/733 - 4s - loss: 14.2710 - root_mean_squared_error: 3.7777
Epoch 955/1000
733/733 - 4s - loss: 14.2342 - root_mean_squared_error: 3.7728
Epoch 956/1000
733/733 - 4s - loss: 14.1876 - root_mean_squared_error: 3.7666
Epoch 957/1000
733/733 - 4s - loss: 14.2012 - root_mean_squared_error: 3.7684
Epoch 958/1000
733/733 - 3s - loss: 14.6682 - root_mean_squared_error: 3.8299
Epoch 959/1000
733/733 - 3s - loss: 14.2984 - root_mean_squared_error: 3.7813
Epoch 960/1000
733/733 - 3s - loss: 14.1383 - root_mean_squared_error: 3.7601
Epoch 961/1000
733/733 - 3s - loss: 13.9086 - root_mean_squared_error: 3.7294
Epoch 962/1000
733/733 - 4s - loss: 14.1374 - root_mean_squared_error: 3.7600
Epoch 963/1000
733/733 - 4s - loss: 13.8721 - root_mean_squared_error: 3.7245
E

<keras.callbacks.History at 0x14970d4b520>

In [32]:
# make predictions
trainPredict = nnmodel.predict(trainX)
testPredict = nnmodel.predict(testX)

In [33]:
# reshape data
train_pred = []
for x in trainPredict:
    for y in x:
        for z in y:
            train_pred.append(z)

test_pred = []
for u in testPredict:
    for v in u:
        for w in v:
            test_pred.append(w)

In [34]:
# calculate root mean squared error
trainScore = math.sqrt(mean_squared_error(train_label, train_pred))
print('Train Score: %.2f RMSE' % (trainScore))
testScore = math.sqrt(mean_squared_error(test_label, test_pred))
print('Test Score: %.2f RMSE' % (testScore))

Train Score: 5.97 RMSE
Test Score: 6.48 RMSE
