In [2]:
from statsmodels.tsa.statespace.sarimax import SARIMAX
import math
import numpy as np
import pandas as pd
from statsmodels.tsa.stattools import adfuller
from sklearn.metrics import mean_squared_error
import numpy as np
from sklearn.preprocessing import MinMaxScaler

In [9]:
def get_predictions_by_column(data, month):
    
    scaler = MinMaxScaler(feature_range=(0,1))
    scaled_data = scaler.fit_transform(data.values.reshape(-1, 1)) 
    scaled_data = scaled_data.reshape(len(scaled_data))
    
    adf_test = adfuller(scaled_data)
    
    training_data_len = math.ceil(len(scaled_data) * .8)
    train = scaled_data[0:training_data_len]
    test = scaled_data[training_data_len:]

    model = SARIMAX(train, 
                order = (0, 1, 0), 
                seasonal_order = (0, 1, 0, 12))
    result = model.fit()
    
    start_test = len(train)
    end_test = len(train) + len(test) - 1
    predictions = result.predict(start_test, end_test)
        
    start = len(data)
    end = (len(data) - 1) + month
    forecast = result.predict(start, end)
    forecast = scaler.inverse_transform(forecast.reshape(-1, 1))
    forecast = forecast.reshape(len(forecast))
    
    metrics = []
    metrics.append(mean_squared_error(test, predictions))
    metrics.append(np.sqrt(mean_squared_error(test, predictions)))
        
    return forecast, adf_test[1], metrics

In [10]:
def get_table_of_predictions(data, month):
    
    d_of_values = {}
    count = len(data.columns.tolist())
    
    for i in range(1, count):
        res, adf, metrics = get_predictions_by_column(data.iloc[:,i], month)
        d_of_values[data.columns.tolist()[i]] = res
        print(f'For {i} dataset p-value = {adf}, MSE = {metrics[0]}, RMSE = {metrics[1]}')
    
    df = pd.DataFrame(d_of_values) 
    
    return df

In [11]:
data = pd.read_csv('Общий.csv')
month = 17
df = get_table_of_predictions(data, month)
df

For 1 dataset p-value = 0.5202987796002655, MSE = 1.465865133767084, RMSE = 1.2107291744098199
For 2 dataset p-value = 0.9990879026048354, MSE = 0.0961387480829995, RMSE = 0.3100624906095536
For 3 dataset p-value = 0.7971746391473259, MSE = 0.06631961597029688, RMSE = 0.25752595203259976
For 4 dataset p-value = 0.052300213164295034, MSE = 1.2384695409210986e-07, RMSE = 0.0003519189595519256
For 5 dataset p-value = 0.9951734124192901, MSE = 0.025275318261886826, RMSE = 0.15898213189502405
For 6 dataset p-value = 0.058665453546698804, MSE = 0.1588074524553989, RMSE = 0.39850652749409127


  warn("Maximum Likelihood optimization failed to converge. "


Unnamed: 0,AFLT,LKOH,MAGN,SBER,SIBN,SNGS
0,460.01,3283.999934,67.49,280.95,372.849999,10.825
1,462.81,3178.999934,67.145,284.75,347.55,10.88
2,470.56,3015.999934,64.415,290.15,344.2,9.34
3,474.78,2833.999912,66.4,299.16,348.9,9.045
4,478.58,2947.999912,64.68,299.6,353.15,8.445
5,478.78,3130.999912,70.175,305.75,357.6,6.445
6,491.73,3176.999912,71.195,310.1,363.9,6.43
7,510.01,3331.999912,69.24,311.94,368.7,5.95
8,523.03,3363.999912,72.54,314.0,376.5,2.255
9,527.91,3451.999912,75.125,325.3,391.7,5.21
