# 1. Импортируем библиотеки и данные

In [None]:
# Импортируем библиотеки:

import warnings
warnings.filterwarnings('ignore')

import numpy as np
import pandas as pd
import random as rnd
import seaborn as sns
import matplotlib.pyplot as plt
import datetime

from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
from statsmodels.tsa.holtwinters import ExponentialSmoothing
from statsmodels.tsa.ar_model import AR, ARResults
from statsmodels.tsa.statespace.sarimax import SARIMAX
from statsmodels.graphics.tsaplots import plot_acf, plot_pacf
from statsmodels.tsa.seasonal import seasonal_decompose
from sklearn.preprocessing import MinMaxScaler
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import LSTM
from keras.preprocessing.sequence import TimeseriesGenerator

In [None]:
# Функция: загрузка данных из базы планирования

def data_load_():
    '''Загрузка данных из базы планирования'''
    try:
        fot_database = pd.read_csv('fot_database.csv', encoding = 'utf8', sep = '\t', index_col = 'report_date', parse_dates = True)
        fot_database_dict = pd.read_csv('fot_database_dict.csv', encoding = 'utf8', sep = '\t')
        
        all_tb = fot_database['tb_korr'].unique()
        all_gosb = fot_database['gosb_korr'].unique()
        all_segm = fot_database['segm_korr'].unique()
        all_type = fot_database['type'].unique()
        forecasts_table = pd.DataFrame(columns = ['SNaive','St_m', 'HW', 'SARIMA', 'LSTM', 'Complex', 'tb_gosb_segm_type', 'SNaive_MAPE', 'St_m_MAPE', 'HW_MAPE', 'SARIMA_MAPE', 'LSTM_MAPE', 'Complex_MAPE','St_m_param', 'HW_param', 'SARIMA_param', 'Train_period_start', 'Train_period_end', 'Test_period_start', 'Test_period_end', 'Train_period_len', 'Test_period_len', 'Train_len / (Train+Test)_len', 'Test_len / Forecast_len', 'Forecast_type', 'Date_time', 'Comment'])
        print('Данные успешно загружены')
    except:
        print('Ошибка загрузки')
    return fot_database, fot_database_dict, forecasts_table, all_tb, all_gosb, all_segm, all_type

In [None]:
# Загружаем данные из базы планирования:

data_loaded = data_load_()
fot_database = data_loaded[0]
fot_database_dict = data_loaded[1]
forecasts_table = data_loaded[2]
all_tb = data_loaded[3]
all_gosb = data_loaded[4]
all_segm = data_loaded[5]
all_type = data_loaded[6]

# 2. Выберем сегменты и отделения для прогноза

In [None]:
# Функция: выбор сегментов и отделений для прогноза

def branch_choice(fot_database_dict, all_tb, all_gosb, all_segm, all_type):
    '''Выбор сегментов и отделений для прогноза'''
    print('Укажите отделения и сегменты для построения прогноза. Можно указать несколько отделений или сегментов через пробел. Для выбора всех отделений или сегментов используйте слово "all" или "все" без кавычек. Необходимо соблюдение регистра: ')
    tb_gosb_segm_type_choice = []
    tb_choice = input('ТБ: ').split(' ')
    if tb_choice == ['all'] or tb_choice == ['все']:
        tb_choice = all_tb
    gosb_choice = input('ГОСБ: ').split(' ')
    if gosb_choice == ['all'] or gosb_choice == ['все']:
        gosb_choice = all_gosb
    segm_choice = input('Сегмент (КФИ, СКМ, КСБ, ММБ, РГС_МВД, РГС_МО, РГС_МЧС, РГС_Росгвардия, РГС_ФСИН, РГС_ФССП, РГС_малые_холдинги, РГС_Минздрав, РГС_несиловой, БМО): ').split(' ')
    if segm_choice == ['all'] or segm_choice == ['все']:
        segm_choice = all_segm
    type_choice = input('Тип (реестр, ЕПП): ').split(' ')
    if type_choice == ['all'] or type_choice == ['все']:
        type_choice = all_type
    for tb in tb_choice:
        for gosb in gosb_choice:
            for segm in segm_choice:
                for tp in type_choice:
                    a = f'{tb}_{gosb}_{segm}_{tp}'
                    if a in fot_database_dict['fot_database_dict'].unique():
                        tb_gosb_segm_type_choice.append(a)
    return tb_gosb_segm_type_choice

# 3. Выберем границы обучающего, тестового и прогнозного периода

In [None]:
# Функция: определение границ периодов

def date_choice(i, fot_database, tb_gosb_segm_type_choice, auto_choice_date, train_date_start, train_date_end, test_date_end, forecast_date_end):
    '''Определение границ обучающего, тестового и прогнозного периода'''
    av_salary_fact = fot_database[['av_salary']][fot_database['tb_gosb_segm_type'] == tb_gosb_segm_type_choice[i]]
    av_salart_fact = av_salary_fact.fillna(0)
    av_salary_fact = av_salary_fact.replace(np.inf, 0)
    
    if auto_choice_date in ['yes', 'да']:
        train_border = round(len(av_salary_fact) * 0.8)
        train_data = av_salary_fact[:train_border].fillna(0)
        test_data = av_salary_fact[train_border:].fillna(0)
        forecast_period = pd.date_range(start = pd.date_range(start = test_data.index[-1], periods = 2, freq = 'M')[1], periods = len(test_data), freq = 'M')
        chosen_tb_gosb_segm_type = tb_gosb_segm_type_choice[i]
    else:
        train_data = av_salary_fact[train_date_start:train_date_end].fillna(0)
        train_border = len(train_data)
        test_date_start = pd.date_range(start = train_date_end, periods = 2, freq = 'M')[1]
        test_data = av_salary_fact[test_date_start:test_date_end].fillna(0)
        forecast_period_start = pd.date_range(start = test_date_end, end = forecast_date_end, freq = 'M')[1]
        forecast_period = pd.date_range(start = forecast_period_start, end = forecast_date_end, freq = 'M')
        chosen_tb_gosb_segm_type = tb_gosb_segm_type_choice[i]
    return av_salary_fact, train_border, train_data, test_data, chosen_tb_gosb_segm_type, forecast_period

# 4. Определим основные функции для построения и записи прогнозов

In [None]:
# Функция: тестирование наивного сезонного алгоритма

def SNaive_test(train_data, test_data, av_salary_fact):
    '''Тестирование наивного сезонного алгоритма'''
    SNaive_test_prediction_list = []
    for i in range(-12, 0):
        SNaive_test_prediction_list.append(train_data['av_salary'].iloc[i]) 
    SNaive_test_prediction_list_2 = []
    a = 0
    for i in range(0, len(test_data)):
        if i - a * 12 < 12:
            SNaive_test_prediction_list_2.append(SNaive_test_prediction_list[i - a * 12])
        else:
            a += 1
            SNaive_test_prediction_list_2.append(SNaive_test_prediction_list[i - a * 12])
    SNaive_test_prediction = pd.DataFrame(index = test_data.index, data = SNaive_test_prediction_list_2[:len(test_data)], columns = ['av_salary'])
    SNaive_MAPE = mean_absolute_error(test_data, SNaive_test_prediction) / np.mean(test_data)[0]
    return SNaive_test_prediction, SNaive_MAPE

In [None]:
# Функция: прогнозирование наивным сезонным алгоритмом

def SNaive_forecast(train_data, test_data, av_salary_fact, forecast_period):
    '''Прогнозирование наивным сезонным алгоритмом'''
    forecast_base = pd.concat([train_data, test_data])
    SNaive_forecast_list = []
    for i in range(-12, 0):
        SNaive_forecast_list.append(forecast_base['av_salary'].iloc[i]) 
    SNaive_forecast_list_2 = []
    a = 0
    for i in range(0, len(forecast_period)):
        if i - a * 12 < 12:
            SNaive_forecast_list_2.append(SNaive_forecast_list[i - a * 12])
        else:
            a += 1
            SNaive_forecast_list_2.append(SNaive_forecast_list[i - a * 12])
    SNaive_forecast = pd.DataFrame(index = forecast_period, data = SNaive_forecast_list_2[:len(forecast_period)], columns = ['av_salary'])
    return SNaive_forecast

In [None]:
# Функция: тестирование стандартного алгоритма (сезонность к базовому периоду):

def St_m_test(train_data, test_data, av_salary_fact, base_period):
    '''Тестирование стандартной модели прогнозирования драйверов ФОТ'''
    train_base_median = []
    base_period_start = -base_period
    base_period_end = 0
    while -base_period_start <= len(train_data):
        if base_period_end == 0:
            train_base_median.append(np.median(train_data[base_period_start:]))
        else:
            train_base_median.append(np.median(train_data[base_period_start:base_period_end]))
        base_period_start = base_period_start - 12
        base_period_end = base_period_end - 12
    j = -12
    i = 1
    seasonal_kf = []
    while i < len(train_base_median):
        while j <= -1:
            seasonal_kf.append(train_data['av_salary'].iloc[j - (i - 1) * 12] / train_base_median[i])
            j += 1
        j = -12
        i += 1
    i = 0
    j = 0
    seasonal_kf_med = []
    seasonal_kf_med_ = []
    while i < 12:
        while j < len(seasonal_kf):
            seasonal_kf_med_.append(seasonal_kf[j])
            j = j + 12
        seasonal_kf_med.append(np.median(seasonal_kf_med_))
        seasonal_kf_med_ = []
        i = i + 1
        j = i
    test_prediction_St_m = []
    base = train_base_median[0]
    a = 0
    for i in range(0, len(test_data)):
        if i - a * 12 < 12:
            test_prediction_St_m.append(base * seasonal_kf_med[i - a * 12])
        else:
            a += 1
            base = np.median(test_prediction_St_m[-base_period:])
            test_prediction_St_m.append(base * seasonal_kf_med[i - a * 12])
    test_prediction_St_m_ = pd.DataFrame(data = test_prediction_St_m, index = test_data.index, columns = ['av_salary']).fillna(0)
    St_m_MAPE = mean_absolute_error(test_data, test_prediction_St_m_['av_salary']) / np.mean(test_data)[0]
    return test_prediction_St_m_, St_m_MAPE

In [None]:
# Функция: тестирование стандартного алгоритма (сезонность к базовому периоду):

def St_m_test_add(train_data, test_data, av_salary_fact, base_period):
    '''Тестирование стандартной модели прогнозирования драйверов ФОТ'''
    train_base_median = []
    base_period_start = -base_period
    base_period_end = 0
    while -base_period_start <= len(train_data):
        if base_period_end == 0:
            train_base_median.append(np.median(train_data[base_period_start:]))
        else:
            train_base_median.append(np.median(train_data[base_period_start:base_period_end]))
        base_period_start = base_period_start - 12
        base_period_end = base_period_end - 12
    j = -12
    i = 1
    seasonal_kf = []
    while i < len(train_base_median):
        while j <= -1:
            seasonal_kf.append(train_data['av_salary'].iloc[j - (i - 1) * 12] - train_base_median[i])
            j += 1
        j = -12
        i += 1
    i = 0
    j = 0
    seasonal_kf_med = []
    seasonal_kf_med_ = []
    while i < 12:
        while j < len(seasonal_kf):
            seasonal_kf_med_.append(seasonal_kf[j])
            j = j + 12
        seasonal_kf_med.append(np.median(seasonal_kf_med_))
        seasonal_kf_med_ = []
        i = i + 1
        j = i
    test_prediction_St_m = []
    base = train_base_median[0]
    a = 0
    for i in range(0, len(test_data)):
        if i - a * 12 < 12:
            test_prediction_St_m.append(base + seasonal_kf_med[i - a * 12])
        else:
            a += 1
            base = np.median(test_prediction_St_m[-base_period:])
            test_prediction_St_m.append(base + seasonal_kf_med[i - a * 12])
    test_prediction_St_m_ = pd.DataFrame(data = test_prediction_St_m, index = test_data.index, columns = ['av_salary']).fillna(0)
    St_m_MAPE = mean_absolute_error(test_data, test_prediction_St_m_['av_salary']) / np.mean(test_data)[0]
    return test_prediction_St_m_, St_m_MAPE

In [None]:
# Функция: прогнозирование стандартным алгоритмом (сезонность к базовому периоду):

def St_m_forecast(train_data, test_data, av_salary_fact, base_period, forecast_period):
    '''Прогнозирование драйверов ФОТ стандартным образом'''
    forecast_base = pd.concat([train_data, test_data])
    train_base_median = []
    base_period_start = -base_period
    base_period_end = 0
    while -base_period_start <= len(forecast_base):
        if base_period_end == 0:
            train_base_median.append(np.median(forecast_base[base_period_start:]))
        else:
            train_base_median.append(np.median(forecast_base[base_period_start:base_period_end]))
        base_period_start = base_period_start - 12
        base_period_end = base_period_end - 12
    j = -12
    i = 1
    seasonal_kf = []
    while i < len(train_base_median):
        while j <= -1:
            seasonal_kf.append(forecast_base['av_salary'].iloc[j - (i - 1) * 12] / train_base_median[i])
            j += 1
        j = -12
        i += 1
    i = 0
    j = 0
    seasonal_kf_med = []
    seasonal_kf_med_ = []
    while i < 12:
        while j < len(seasonal_kf):
            seasonal_kf_med_.append(seasonal_kf[j])
            j = j + 12
        seasonal_kf_med.append(np.median(seasonal_kf_med_))
        seasonal_kf_med_ = []
        i = i + 1
        j = i
    forecast_St_m = []
    base = train_base_median[0]
    a = 0
    for i in range(0, len(forecast_period)):
        if i - a * 12 < 12:
            forecast_St_m.append(base * seasonal_kf_med[i - a * 12])
        else:
            a += 1
            base = np.median(forecast_St_m[-base_period:])
            forecast_St_m.append(base * seasonal_kf_med[i - a * 12])
    forecast_St_m_ = pd.DataFrame(data = forecast_St_m, index = forecast_period, columns = ['av_salary'])
    return forecast_St_m_

In [None]:
# Функция: прогнозирование стандартным алгоритмом (сезонность к базовому периоду):

def St_m_forecast_add(train_data, test_data, av_salary_fact, base_period, forecast_period):
    '''Прогнозирование драйверов ФОТ стандартным образом'''
    forecast_base = pd.concat([train_data, test_data])
    train_base_median = []
    base_period_start = -base_period
    base_period_end = 0
    while -base_period_start <= len(forecast_base):
        if base_period_end == 0:
            train_base_median.append(np.median(forecast_base[base_period_start:]))
        else:
            train_base_median.append(np.median(forecast_base[base_period_start:base_period_end]))
        base_period_start = base_period_start - 12
        base_period_end = base_period_end - 12
    j = -12
    i = 1
    seasonal_kf = []
    while i < len(train_base_median):
        while j <= -1:
            seasonal_kf.append(forecast_base['av_salary'].iloc[j - (i - 1) * 12] - train_base_median[i])
            j += 1
        j = -12
        i += 1
    i = 0
    j = 0
    seasonal_kf_med = []
    seasonal_kf_med_ = []
    while i < 12:
        while j < len(seasonal_kf):
            seasonal_kf_med_.append(seasonal_kf[j])
            j = j + 12
        seasonal_kf_med.append(np.median(seasonal_kf_med_))
        seasonal_kf_med_ = []
        i = i + 1
        j = i
    forecast_St_m = []
    base = train_base_median[0]
    a = 0
    for i in range(0, len(forecast_period)):
        if i - a * 12 < 12:
            forecast_St_m.append(base + seasonal_kf_med[i - a * 12])
        else:
            a += 1
            base = np.median(forecast_St_m[-base_period:])
            forecast_St_m.append(base + seasonal_kf_med[i - a * 12])
    forecast_St_m_ = pd.DataFrame(data = forecast_St_m, index = forecast_period, columns = ['av_salary'])
    return forecast_St_m_

In [None]:
# Функция: тестирование алгоритма тройного экспоненциального сглаживания Хольта-Винтерса

def HW_test(train_data, test_data, av_salary_fact, trend_dict, seasonal_dict):
    best_err_HW = 999999999
    for i in trend_dict:
        for j in seasonal_dict:
            try:
                fitted_model_HW = ExponentialSmoothing(train_data['av_salary'], trend = i, seasonal = j, seasonal_periods = 12).fit()
                test_prediction_HW = fitted_model_HW.forecast(len(test_data))
                MAE_HW = mean_absolute_error(test_data, test_prediction_HW)
                if MAE_HW < best_err_HW:
                    best_err_HW = MAE_HW
                    best_trend_HW = i
                    best_seasonal_HW = j
            except:
                continue 
    fitted_model_HW = ExponentialSmoothing(train_data['av_salary'], trend = best_trend_HW, seasonal = best_seasonal_HW, seasonal_periods = 12).fit()
    test_prediction_HW = fitted_model_HW.forecast(len(test_data))
    HW_MAPE = mean_absolute_error(test_data, test_prediction_HW) / np.mean(test_data)[0]
    return test_prediction_HW, best_trend_HW, best_seasonal_HW, HW_MAPE

In [None]:
# Функция: прогноз алгоритмом тройного экспоненциального сглаживания Хольта-Винтерса

def HW_forecast(train_data, test_data, av_salary_fact, best_trend_HW, best_seasonal_HW, forecast_period):
    forecast_base = pd.concat([train_data, test_data])
    final_model_HW = ExponentialSmoothing(forecast_base['av_salary'], trend = best_trend_HW, seasonal = best_seasonal_HW, seasonal_periods = 12).fit()
    forecast_HW = final_model_HW.forecast(len(forecast_period))
    return forecast_HW

In [None]:
# Функция: тестирование сезонной авторегрессионной интегрированной модели скользящего среднего Бокса-Дженкинса

def SARIMA_test(train_data, test_data, av_salary_fact, p_, d_, q_, P_, D_, Q_):
    best_err_SARIMA = 999999999
    for p in p_:
        for d in d_:
            for q in q_:
                for P in P_:
                    for D in D_:
                        for Q in Q_:
                            try:
                                model = SARIMAX(train_data['av_salary'], order = (p, d, q), seasonal_order = (P, D, Q,12))
                                results = model.fit()
                                test_prediction_SARIMA = results.predict(start = len(train_data), end = len(train_data) + len(test_data) - 1, dynamic = False, typ = 'levels')
                                MAE_SARIMA = mean_absolute_error(test_data, test_prediction_SARIMA)
                                if MAE_SARIMA < best_err_SARIMA:
                                    best_err_SARIMA = MAE_SARIMA
                                    best_p = p
                                    best_d = d
                                    best_q = q
                                    best_P = P
                                    best_D = D
                                    best_Q = Q
                            except:
                                continue

    model = SARIMAX(train_data['av_salary'], order = (best_p, best_d, best_q), seasonal_order = (best_P, best_D, best_Q, 12))
    results = model.fit()
    test_prediction_SARIMA = results.predict(start = len(train_data), end = len(train_data) + len(test_data) - 1, dynamic = False, typ = 'levels')
    SARIMA_MAPE = mean_absolute_error(test_data, test_prediction_SARIMA) / np.mean(test_data)[0]
    return test_prediction_SARIMA, SARIMA_MAPE, best_p, best_d, best_q, best_P, best_D, best_Q

In [None]:
# Функция: сезонная авторегрессионная интегрированная модель скользящего среднего Бокса-Дженкинса

def SARIMA_forecast(train_data, test_data, av_salary_fact, best_p, best_d, best_q, best_P, best_D, best_Q, forecast_period):
    forecast_base = pd.concat([train_data, test_data])
    final_model_SARIMA = SARIMAX(forecast_base['av_salary'], order = (best_p, best_d, best_q), seasonal_order = (best_P, best_D, best_Q, 12))
    results_SARIMA = final_model_SARIMA.fit()
    forecast_SARIMA = results_SARIMA.predict(start = len(forecast_base), end = len(forecast_base) + len(forecast_period) - 1, dynamic = False, typ = 'levels')
    return forecast_SARIMA

In [None]:
def LSTM_test (train_data, test_data, av_salary_fact):
    scaler = MinMaxScaler()
    scaler.fit(train_data)
    scaled_train = scaler.transform(train_data)
    scaled_test = scaler.transform(test_data)
    n_input = 12
    n_features = 1
    generator = TimeseriesGenerator(scaled_train, scaled_train, length=n_input, batch_size=1)
    model = Sequential()
    model.add(LSTM(100, activation='relu', input_shape=(n_input, n_features)))
    model.add(Dense(1))
    model.compile(optimizer='adam', loss='mse')
    model.fit_generator(generator,epochs=100)
    test_predictions = []
    first_eval_batch = scaled_train[-n_input:]
    current_batch = first_eval_batch.reshape((1, n_input, n_features))
    for i in range(len(test_data)):
        current_pred = model.predict(current_batch)[0]
        test_predictions.append(current_pred) 
        current_batch = np.append(current_batch[:,1:,:],[[current_pred]],axis=1)
    true_predictions = scaler.inverse_transform(test_predictions)
    LSTM_MAPE = mean_absolute_error(test_data, true_predictions) / np.mean(test_data)[0]
    test_prediction_LSTM_df = pd.DataFrame(index = test_data.index, data = true_predictions, columns = ['LSTM'])
    return test_prediction_LSTM_df, LSTM_MAPE

In [None]:
def LSTM_forecast (train_data, test_data, av_salary_fact, forecast_period):
    forecast_base = pd.concat([train_data, test_data])
    scaler = MinMaxScaler()
    scaler.fit(forecast_base)
    scaled_train = scaler.transform(forecast_base)
    n_input = 12
    n_features = 1
    generator = TimeseriesGenerator(scaled_train, scaled_train, length=n_input, batch_size=1)
    model = Sequential()
    model.add(LSTM(100, activation='relu', input_shape=(n_input, n_features)))
    model.add(Dense(1))
    model.compile(optimizer='adam', loss='mse')
    model.fit_generator(generator,epochs=100)
    test_predictions = []
    first_eval_batch = scaled_train[-n_input:]
    current_batch = first_eval_batch.reshape((1, n_input, n_features))
    for i in range(len(forecast_period)):
        current_pred = model.predict(current_batch)[0]
        test_predictions.append(current_pred) 
        current_batch = np.append(current_batch[:,1:,:],[[current_pred]],axis=1)
    true_predictions = scaler.inverse_transform(test_predictions)
    LSTM_predictions = pd.DataFrame(index = forecast_period, data = true_predictions, columns = ['LSTM'])
    return LSTM_predictions

In [None]:
# Функция: тестирование комплексной модели прогнозирования

def Complex_test(auto_choice_models, SNaive_choice, St_m_choice, HW_choice, SARIMA_choice, SNaive_test_prediction, test_prediction_St_m_, test_prediction_HW, test_prediction_SARIMA, test_prediction_LSTM, LSTM_final):
    Complex_list = []
    for i in range(0, len(test_data)):
        if (auto_choice_models in ['yes', 'да']) or (SNaive_choice in ['yes', 'да']):
            a = SNaive_test_prediction['av_salary'][i]
        else:
            a = np.nan
        if (auto_choice_models in ['yes', 'да']) or (St_m_choice in ['yes', 'да']):
            b = test_prediction_St_m_['av_salary'][i]
        else:
            b = np.nan
        if (auto_choice_models in ['yes', 'да']) or (HW_choice in ['yes', 'да']):
            c = test_prediction_HW[i]
        else:
            c = np.nan
        if (auto_choice_models in ['yes', 'да']) or (SARIMA_choice in ['yes', 'да']):
            d = test_prediction_SARIMA[i]
        else:
            d = np.nan
        if (auto_choice_models in ['yes', 'да']) or (LSTM_choice in ['yes', 'да']):
            e = test_prediction_LSTM['LSTM'][i]
        else:
            e = np.nan
        Complex_list.append(np.nanmedian([a, b, c, d, e]))
    Complex_test_prediction = pd.DataFrame(data = Complex_list, index = test_data.index, columns = ['av_salary'])
    Complex_MAPE = mean_absolute_error(test_data, Complex_test_prediction['av_salary']) / np.mean(test_data)[0]
    return Complex_test_prediction, Complex_MAPE

In [None]:
# Функция: комплексная модель прогнозирования

def Complex_forecast(auto_choice_models, SNaive_choice, St_m_choice, HW_choice, SARIMA_choice, SNaive_final, St_m_final, HW_final, SARIMA_final, test_prediction_LSTM, LSTM_final):
    Complex_list = []
    for i in range(0, len(forecast_period)):
        if (auto_choice_models in ['yes', 'да']) or (SNaive_choice in ['yes', 'да']):
            a = SNaive_final['av_salary'][i]
        else:
            a = np.nan
        if (auto_choice_models in ['yes', 'да']) or (St_m_choice in ['yes', 'да']):
            b = St_m_final['av_salary'][i]
        else:
            b = np.nan
        if (auto_choice_models in ['yes', 'да']) or (HW_choice in ['yes', 'да']):
            c = HW_final[i]
        else:
            c = np.nan
        if (auto_choice_models in ['yes', 'да']) or (SARIMA_choice in ['yes', 'да']):
            d = SARIMA_final[i]
        else:
            d = np.nan
        if (auto_choice_models in ['yes', 'да']) or (LSTM_choice in ['yes', 'да']):
            e = LSTM_final['LSTM'][i]
        else:
            e = np.nan
        Complex_list.append(np.nanmedian([a, b, c, d, e]))
    Complex_final = pd.DataFrame(data = Complex_list, index = forecast_period, columns = ['av_salary'])
    return Complex_final

In [None]:
# Функция: запись прогноза в файл

def forecasts_to_table(SNaive_final, St_m_final, HW_final, SARIMA_final, Complex_final, chosen_tb_gosb_segm_type, SNaive_MAPE, HW_MAPE, SARIMA_MAPE, St_m_MAPE, Complex_MAPE, best_trend_HW, best_seasonal_HW, best_p, best_d, best_q, best_P, best_D, best_Q, best_base_period, train_data, test_data, forecasts_table, auto_choice_date, auto_choice_models, test_prediction_LSTM, LSTM_final):
    '''Сохранение прогнозов в датафрейм'''
    forecasts = pd.DataFrame(data = (SNaive_final['av_salary'], St_m_final['av_salary'], HW_final, SARIMA_final, LSTM_final['LSTM'], Complex_final['av_salary'])).transpose()
    forecasts.columns = ['SNaive', 'St_m', 'HW', 'SARIMA', 'LSTM', 'Complex']
    forecasts['tb_gosb_segm_type'] = chosen_tb_gosb_segm_type
    forecasts['SNaive_MAPE'] = SNaive_MAPE
    forecasts['St_m_MAPE'] = St_m_MAPE
    forecasts['HW_MAPE'] = HW_MAPE
    forecasts['SARIMA_MAPE'] = SARIMA_MAPE
    forecasts['LSTM_MAPE'] = LSTM_MAPE
    forecasts['Complex_MAPE'] = Complex_MAPE
    forecasts['St_m_param'] = f'{best_base_period}'
    forecasts['HW_param'] = f'tr = {best_trend_HW}, se = {best_seasonal_HW}'
    forecasts['SARIMA_param'] = f'({best_p}, {best_d}, {best_q}) x ({best_P}, {best_D}, {best_Q}, 12)'
    forecasts['Train_period_start'] = train_data.index[0]
    forecasts['Train_period_end'] = train_data.index[-1]
    forecasts['Test_period_start'] = test_data.index[0]
    forecasts['Test_period_end'] = test_data.index[-1]
    forecasts['Train_period_len'] = len(train_data)
    forecasts['Test_period_len'] = len(test_data)
    forecasts['Train_len / (Train+Test)_len'] = len(train_data) / len(av_salary_fact)
    forecasts['Test_len / Forecast_len'] = len(test_data) / len(Complex_final)
    if auto_choice_date in ['yes', 'да']:
        forecasts['Period_type'] = 'Auto'
    else:
        forecasts['Period_type'] = 'Manual'
    if auto_choice_models in ['yes', 'да']:
        forecasts['Forecast_type'] = 'Auto'
    else:
        forecasts['Forecast_type'] = 'Manual'
    forecasts['Date_time'] = datetime.datetime.now().strftime("%d-%m-%Y %H:%M")
    forecasts_table = pd.concat([forecasts_table, forecasts])
    return forecasts_table

# 5. Проведём расчёт

In [None]:
# Проведём расчёт:

# Просим пользователя выбрать отделение:
tb_gosb_segm_type_choice = branch_choice(fot_database_dict, all_tb, all_gosb, all_segm, all_type)

# Просим пользователя выбрать даты:
auto_choice_date = input('Определить границы периродов автоматически? (yes, да) / (no, нет): ')
if auto_choice_date not in ['yes', 'да']:
    print('Введите даты в формате YYYY-MM-DD:')
    train_date_start = input('Начало обучающего периода: ')
    train_date_end = input('Окончание обучающего периода (должен быть не менее 12 месяцев, рекомендуется около 80% от доступных данных): ')
    test_date_end = input('Окончание тестового периода (должен быть не более обучающего, рекомендуется около 20% от доступных данных): ')
    forecast_date_end = input('Окончание прогнозного периода (рекомендуется не более тестового периода): ')
else:
    train_date_start = train_date_end = test_date_end = forecast_date_end = '1900-01-01'

# Просим пользователя выбрать алгоритмы и их параметры:    
auto_choice_models = input('Использовать все алгоритмы прогнозирования с автоматическими настройками? (yes, да) / (no, нет): ')
if auto_choice_models not in ['yes', 'да']:
    SNaive_choice = input('Использовать наивный сезонный алгоритм SNaive? (yes, да) / (no, нет): ')
    St_m_choice = input('Использовать стандартный алгоритм (сезонность к базе) St_m? (yes, да) / (no, нет): ')
    if St_m_choice in ['yes', 'да']:
        St_m_param_auto = input('Использовать автоматический поиск оптимального базового периода для St_m? (yes, да) / (no, нет): ')
        if St_m_param_auto not in ['yes', 'да']:
            base_period = int(input('Укажите количество месяцев базового периода для усреднения: '))
    HW_choice = input('Использовать алгоритм тройного экспоненциального сглаживания Хольта-Винтерса HW? (yes, да) / (no, нет): ')
    if HW_choice in ['yes', 'да']:
        HW_param_auto = input('Использовать автоматический выбор тренда и сезонности для HW? (yes, да) / (no, нет): ')
        if HW_param_auto not in ['yes', 'да']:
            trend_dict = [input('Выберите тип тренда: мультипликативный (mul) или аддитивный (add): ')]
            seasonal_dict = [input('Выберите тип сезонности: мультпликативная (mul) или аддитивная (add): ')]
    SARIMA_choice = input('Использовать сезонную авторегрессионную интегрированную модель скользящего среднего Бокса-Дженкинса SARIMA? (yes, да) / (no, нет): ')
    if SARIMA_choice in ['yes', 'да']:
        SARIMA_param_auto = input('Использовать автоматический выбор параметров для SARIMA (0 / 1 для всех параметров)? (yes, да) / (no, нет): ')
        if SARIMA_param_auto not in ['yes', 'да']:
            pdq = input('Введите через пробел порядок регрессии (p), дифференцирования (d), скользящего среднего (q) для временного ряда: ').split(' ')
            PDQ = input('Введите через пробел порядок регрессии (P), дифференцирования (D), скользящего среднего (Q) для сезонности: ').split(' ')
            p_ = [int(pdq[0])]
            d_ = [int(pdq[1])]
            q_ = [int(pdq[2])]
            P_ = [int(PDQ[0])]
            D_ = [int(PDQ[1])]
            Q_ = [int(PDQ[2])]
    LSTM_choice = input('Использовать нейросеть LSTM? (yes, да) / (no, нет): ')
else:
    SNaive_choice = St_m_choice = HW_choice = SARIMA_choice = LSTM_choice = 'yes'
    St_m_param_auto = HW_param_auto = SARIMA_param_auto = LSTM_choice = 'yes'
    
# Строим прогнозы:
for i in range(0, len(tb_gosb_segm_type_choice)):
    try:
        dates = date_choice(i, fot_database, tb_gosb_segm_type_choice, auto_choice_date, train_date_start, train_date_end, test_date_end, forecast_date_end)
        av_salary_fact = dates[0]
        train_border = dates[1]
        train_data = dates[2]
        test_data = dates[3]
        chosen_tb_gosb_segm_type = dates[4]
        forecast_period = dates[5]
        train_data['av_salary'].plot.line(figsize = (15, 8), color = '#41719c', title = chosen_tb_gosb_segm_type, legend = True, label = 'Обучающие данные', grid = True)
        test_data['av_salary'].plot.line(figsize = (15, 8), color = '#4c84b6', title = chosen_tb_gosb_segm_type, legend = True, label = 'Тестовые данные', grid = True)

        # Наивный сезонный алгоритм:
        if (auto_choice_models in ['yes', 'да']) or (SNaive_choice in ['yes', 'да']):
            SNaive_results = SNaive_test(train_data, test_data, av_salary_fact)
            SNaive_test_prediction = SNaive_results[0]
            SNaive_MAPE = SNaive_results[1]
            SNaive_test_prediction['av_salary'].plot.line(figsize = (15, 8), color = '#ae5a21', linestyle = '--', title = chosen_tb_gosb_segm_type, legend = True, label = f'SNaive test ({round(SNaive_MAPE * 100, 1)}%)', grid = True)
            SNaive_final = SNaive_forecast(train_data, test_data, av_salary_fact, forecast_period)
            SNaive_final['av_salary'].plot.line(figsize = (15, 8), color = '#ae5a21', linestyle = '--', title = chosen_tb_gosb_segm_type, legend = True, label = 'SNaive', grid = True)
        else:
            SNaive_test_prediction = pd.DataFrame(data = [], index = test_data.index, columns = ['av_salary'])
            SNaive_MAPE = np.nan
            SNaive_final = pd.DataFrame(data = [], index = forecast_period, columns = ['av_salary'])

        # Стандартный алгоритм база - сезонность:
        if (auto_choice_models in ['yes', 'да']) or (St_m_choice in ['yes', 'да']):
            if (auto_choice_models in ['no', 'нет']) and (St_m_param_auto in ['no', 'нет']):
                St_m_results = St_m_test(train_data, test_data, av_salary_fact, base_period)
                best_base_period = base_period
            else:
                best_err_St_m = 999999999
                for base_period in range(3, 13):
                    St_m_results = St_m_test(train_data, test_data, av_salary_fact, base_period)
                    test_prediction_St_m_ = St_m_results[0]
                    St_m_MAPE = St_m_results[1]
                    if St_m_MAPE < best_err_St_m:
                        best_err_St_m = St_m_MAPE
                        best_base_period = base_period
                St_m_results = St_m_test(train_data, test_data, av_salary_fact, best_base_period)
            test_prediction_St_m_ = St_m_results[0]
            St_m_MAPE = St_m_results[1]
            test_prediction_St_m_['av_salary'].plot.line(figsize = (15, 8), color = '#787878', linestyle = '--', title = chosen_tb_gosb_segm_type, legend = True, label = f'St_m test ({best_base_period}) ({round(St_m_MAPE * 100, 1)}%)', grid = True)
            St_m_final = St_m_forecast(train_data, test_data, av_salary_fact, best_base_period, forecast_period)
            St_m_final['av_salary'].plot.line(figsize = (15, 8), color = '#787878', linestyle = '--', title = chosen_tb_gosb_segm_type, legend = True, label = f'St_m ({best_base_period})', grid = True)
        else:
            test_prediction_St_m_ = pd.DataFrame(data = [], index = test_data.index, columns = ['av_salary'])
            St_m_MAPE = np.nan
            St_m_final = pd.DataFrame(data = [], index = forecast_period, columns = ['av_salary'])

        # Тройное экспоненциальное сглаживание:
        if (auto_choice_models in ['yes', 'да']) or (HW_choice in ['yes', 'да']):
            if (auto_choice_models in ['no', 'нет']) and (HW_param_auto in ['no', 'нет']):
                HW_results = HW_test(train_data, test_data, av_salary_fact, trend_dict, seasonal_dict)
            else:
                trend_dict = ['mul', 'add'] 
                seasonal_dict = ['mul', 'add']
                HW_results = HW_test(train_data, test_data, av_salary_fact, trend_dict, seasonal_dict)
            test_prediction_HW = HW_results[0]
            best_trend_HW = HW_results[1]
            best_seasonal_HW = HW_results[2]
            HW_MAPE = HW_results[3]
            test_prediction_HW.index = test_data.index
            test_prediction_HW.plot.line(figsize = (15, 8), color = '#bc8c00', linestyle ='--', title = chosen_tb_gosb_segm_type, legend = True, label = f'HW test (tr={best_trend_HW}, se={best_seasonal_HW}) ({round(HW_MAPE * 100, 1)}%)', grid = True)
            HW_final = HW_forecast(train_data, test_data, av_salary_fact, best_trend_HW, best_seasonal_HW, forecast_period)
            HW_final.plot.line(figsize = (15, 8), color = '#bc8c00', linestyle ='--', title = chosen_tb_gosb_segm_type, legend = True, label = f'HW (tr={best_trend_HW}, se={best_seasonal_HW})', grid = True)
        else:
            test_prediction_HW = pd.DataFrame(data = [], index = test_data.index, columns = ['av_salary'])
            HW_MAPE = np.nan
            HW_final = pd.DataFrame(data = [], index = forecast_period, columns = ['av_salary'])

        # Сезонный авторегресиионный интегрированный алгоритм скользящего среднего:
        if (auto_choice_models in ['yes', 'да']) or (SARIMA_choice in ['yes', 'да']):
            if (SARIMA_param_auto in ['no', 'нет']):

                SARIMA_results = SARIMA_test(train_data, test_data, av_salary_fact, p_, d_, q_, P_, D_, Q_)
            else:
                p_ = d_ = q_ = P_ = D_ = Q_ = [0, 1]
                SARIMA_results = SARIMA_test(train_data, test_data, av_salary_fact, p_, d_, q_, P_, D_, Q_)
            test_prediction_SARIMA = SARIMA_results[0]
            SARIMA_MAPE = SARIMA_results[1]
            best_p = SARIMA_results[2]
            best_d = SARIMA_results[3]
            best_q = SARIMA_results[4]
            best_P = SARIMA_results[5]
            best_D = SARIMA_results[6]
            best_Q = SARIMA_results[7]
            test_prediction_SARIMA.plot.line(figsize = (15, 8), color = '#507e32', linestyle = '--', title = chosen_tb_gosb_segm_type, legend = True, label = f'SARIMA test ({best_p}, {best_d}, {best_q}) x ({best_P}, {best_D}, {best_Q}, 12) ({round(SARIMA_MAPE * 100, 1)}%)', grid = True)
            SARIMA_final = SARIMA_forecast(train_data, test_data, av_salary_fact, best_p, best_d, best_q, best_P, best_D, best_Q, forecast_period)
            SARIMA_final.plot.line(figsize = (15, 8), color = '#507e32', linestyle = '--', title = chosen_tb_gosb_segm_type, legend = True, label = f'SARIMA ({best_p}, {best_d}, {best_q}) x ({best_P}, {best_D}, {best_Q}, 12)', grid = True)
        else:
            test_prediction_SARIMA = pd.DataFrame(data = [], index = test_data.index, columns = ['av_salary'])
            SARIMA_MAPE = np.nan
            SARIMA_final = pd.DataFrame(data = [], index = forecast_period, columns = ['av_salary'])

        # Рассчитаем прогноз при помощи нейросети LSTM:
        if (auto_choice_models in ['yes', 'да']) or (LSTM_choice in ['yes', 'да']):
            LSTM_test_res = LSTM_test(train_data, test_data, av_salary_fact)
            test_prediction_LSTM = LSTM_test_res[0]
            LSTM_MAPE = LSTM_test_res[1]
            test_prediction_LSTM['LSTM'].plot.line(figsize = (15, 8), color = 'black', linestyle = '--', title = chosen_tb_gosb_segm_type, legend = True, label = f'LSTM test ({round(LSTM_MAPE * 100, 1)}%)', grid = True)
            LSTM_res = LSTM_forecast(train_data, test_data, av_salary_fact, forecast_period)
            LSTM_final = LSTM_res
            LSTM_final['LSTM'].plot.line(figsize = (15, 8), color = 'black', linestyle = '--', title = chosen_tb_gosb_segm_type, legend = True, label = f'LSTM', grid = True)
        
        # Рассчитаем комплексный прогноз:

        Complex_test_results = Complex_test(auto_choice_models, SNaive_choice, St_m_choice, HW_choice, SARIMA_choice, SNaive_test_prediction, test_prediction_St_m_, test_prediction_HW, test_prediction_SARIMA, test_prediction_LSTM, LSTM_final)
        Complex_test_prediction = Complex_test_results[0]
        Complex_MAPE = Complex_test_results[1]
        Complex_test_prediction['av_salary'].plot.line(figsize = (15, 8), color = '#5694cb', linestyle = '--', title = chosen_tb_gosb_segm_type, legend = True, label = f'Прогноз test ({round(Complex_MAPE * 100, 1)}%)', grid = True)
        Complex_final = Complex_forecast(auto_choice_models, SNaive_choice, St_m_choice, HW_choice, SARIMA_choice, SNaive_final, St_m_final, HW_final, SARIMA_final, test_prediction_LSTM, LSTM_final)
        Complex_final['av_salary'].plot.line(figsize = (15, 8), color = '#5694cb', title = chosen_tb_gosb_segm_type, legend = True, label = f'Прогноз', grid = True)

        # Выводим графики:
        plt.show()
        print(f'{chosen_tb_gosb_segm_type} ok')

        # Сохраняем прогноз в датафрейм:

        forecasts_table = forecasts_to_table(SNaive_final, St_m_final, HW_final, SARIMA_final, Complex_final, chosen_tb_gosb_segm_type, SNaive_MAPE, HW_MAPE, SARIMA_MAPE, St_m_MAPE, Complex_MAPE, best_trend_HW, best_seasonal_HW, best_p, best_d, best_q, best_P, best_D, best_Q, best_base_period, train_data, test_data, forecasts_table, auto_choice_date, auto_choice_models, test_prediction_LSTM, LSTM_final)
    except:
        # dates = date_choice(i, fot_database, tb_gosb_segm_type_choice, auto_choice_date, train_date_start, train_date_end, test_date_end, forecast_date_end)
        # chosen_tb_gosb_segm_type = dates[4]
        #plt.show()
        print(f'{chosen_tb_gosb_segm_type} error')

# Сохраняем прогноз в файл:
save_to_file = input('Расчёт окончен. Сохранить результаты в файл? (yes, да) / (no, нет): ')
if save_to_file in ['yes', 'да']:
    file_name = input('Введите имя файла без расширения, без кавычек. Если название совпадает с ранее сохранённым, файл будет заменён: ')
    file_name += '.txt'
    need_comment = input('Дополнить расчёт комментарием или пояснением? (yes, да) / (no, нет): ')
    if need_comment in ['yes', 'да']:
        user_comment = input('Введите комментарий или пояснение к Вашему расчёту: ')
        forecasts_table['Comment'] = user_comment
    forecasts_table.to_csv(file_name, sep = '\t', decimal = ',')
    forecasts_table = pd.DataFrame(columns = ['SNaive','St_m', 'HW', 'SARIMA', 'Complex', 'tb_gosb_segm_type', 'SNaive_MAPE', 'St_m_MAPE', 'HW_MAPE', 'SARIMA_MAPE', 'Complex_MAPE','St_m_param', 'HW_param', 'SARIMA_param', 'Train_period_start', 'Train_period_end', 'Test_period_start', 'Test_period_end', 'Train_period_len', 'Test_period_len', 'Train_len / (Train+Test)_len', 'Test_len / Forecast_len', 'Forecast_type', 'Date_time', 'Comment'])
    print('Файл сохранён. Перезапустите ячейку для нового расчёта.')
else:
    forecasts_table = pd.DataFrame(columns = ['SNaive','St_m', 'HW', 'SARIMA', 'Complex', 'tb_gosb_segm_type', 'SNaive_MAPE', 'St_m_MAPE', 'HW_MAPE', 'SARIMA_MAPE', 'Complex_MAPE','St_m_param', 'HW_param', 'SARIMA_param', 'Train_period_start', 'Train_period_end', 'Test_period_start', 'Test_period_end', 'Train_period_len', 'Test_period_len', 'Train_len / (Train+Test)_len', 'Test_len / Forecast_len', 'Forecast_type', 'Date_time', 'Comment'])
    print('Сохранение отменено. Перезапустите ячейку для нового расчёта.')