In [1]:
import pandas as pd
import numpy as np
from statsmodels.tsa.arima.model import ARIMA
from sklearn.metrics import mean_squared_error
from concurrent.futures import ThreadPoolExecutor
import warnings
warnings.filterwarnings('ignore')

In [2]:
#load data
data = pd.read_csv("..\..\..\Data\Day\S&P 500 Historical Data00-20.csv")

In [3]:
#transform data
data = data.replace(",", "", regex=True)
data.Date = pd.to_datetime(data.Date)
data = data.sort_values(by=["Date"])
data.set_index('Date')
data.Price = data.Price.astype(float)
data.High = data.High.astype(float)
data.Low = data.Low.astype(float)
data.Open = data.Open.astype(float)
data["Vol."] = data["Vol."].astype(float)

In [4]:
data.head()

Unnamed: 0,Date,Price,Open,High,Low,Vol.,Change %
4999,2000-01-03,1455.2,1469.2,1478.0,1438.4,,-0.95%
4998,2000-01-04,1399.4,1455.2,1455.2,1397.4,,-3.83%
4997,2000-01-05,1402.1,1399.4,1413.3,1377.7,,0.19%
4996,2000-01-06,1403.5,1402.1,1411.9,1392.0,,0.10%
4995,2000-01-07,1441.5,1403.5,1441.5,1400.5,,2.71%


In [5]:
data.index = data.index[::-1]

In [6]:
data.head()

Unnamed: 0,Date,Price,Open,High,Low,Vol.,Change %
0,2000-01-03,1455.2,1469.2,1478.0,1438.4,,-0.95%
1,2000-01-04,1399.4,1455.2,1455.2,1397.4,,-3.83%
2,2000-01-05,1402.1,1399.4,1413.3,1377.7,,0.19%
3,2000-01-06,1403.5,1402.1,1411.9,1392.0,,0.10%
4,2000-01-07,1441.5,1403.5,1441.5,1400.5,,2.71%


In [7]:
special_data = data[(data['Date'] > '2010-01-01') & (data['Date'] < '2014-01-01')]
special_data.index = data.index[:len(special_data)]

In [8]:
special_data.head(10)

Unnamed: 0,Date,Price,Open,High,Low,Vol.,Change %
0,2010-01-04,1132.99,1116.56,1133.87,1116.56,,1.60%
1,2010-01-05,1136.52,1132.66,1136.63,1129.66,,0.31%
2,2010-01-06,1137.14,1135.71,1139.19,1133.95,,0.05%
3,2010-01-07,1141.69,1136.27,1142.46,1131.32,,0.40%
4,2010-01-08,1144.98,1140.52,1145.39,1136.22,,0.29%
5,2010-01-11,1146.98,1145.96,1149.74,1142.02,,0.17%
6,2010-01-12,1136.22,1143.81,1143.81,1131.77,,-0.94%
7,2010-01-13,1145.68,1137.31,1148.4,1133.18,,0.83%
8,2010-01-14,1148.46,1145.68,1150.41,1143.8,,0.24%
9,2010-01-15,1136.03,1147.72,1147.77,1131.39,,-1.08%


In [9]:
special_data = special_data.drop(columns=['Open', 'High', 'Low', 'Vol.', 'Change %'])

In [10]:
special_data.head()

Unnamed: 0,Date,Price
0,2010-01-04,1132.99
1,2010-01-05,1136.52
2,2010-01-06,1137.14
3,2010-01-07,1141.69
4,2010-01-08,1144.98


In [11]:
# Функция для построения модели ARIMA и получения прогноза
def build_arima_model_and_forecast(data, p, d, q, window_size=200, forecast_days=90):
    forecasts = []

    for i in range(len(data) - window_size - forecast_days + 1):
        window = data[i:i + window_size]
        actual_values = data['Price'][i + window_size:i + window_size + forecast_days]

        # Создаем и обучаем модель ARIMA
        model = ARIMA(window['Price'], order=(p, d, q))
        results = model.fit()

        # Делаем прогноз на 90 дней вперед
        forecast = results.get_forecast(steps=forecast_days)
        forecast_mean = forecast.predicted_mean.values

        # Рассчитываем относительное отклонение в процентах (Percentage Error)
        percentage_error = ((forecast_mean - actual_values) / actual_values) * 100

        # Добавляем результаты в список
        result = {
            'p': p,
            'd': d,
            'q': q,
            'begin_date': window['Date'].iloc[0],
            'end_date': window['Date'].iloc[-1] + pd.Timedelta(days=forecast_days - 1),
            'window_size': window_size,
            'forecast_days': forecast_days,
            'forecast_precision': np.mean(np.abs(percentage_error))
        }
        forecasts.append(result)

    return forecasts

In [14]:
import concurrent.futures
# Задаем значения p, d, q
p_values = range(1, 11)
d_values = range(1, 11)
q_values = range(1, 11)

# Создаем пустой DataFrame для сохранения результатов
columns = ['p', 'd', 'q', 'begin_date', 'end_date', 'window_size', 'forecast_days', 'forecast_precision']
results_df = pd.DataFrame(columns=columns)
 
# Проходим по всем комбинациям p, d, q и строим прогнозы в нескольких потоках
with ThreadPoolExecutor(max_workers = 20) as executor:  # Установите количество потоков по своему усмотрению
    # Создаем список задач для каждой комбинации p, d, q
    tasks = [(p, d, q) for p in p_values for d in d_values for q in q_values]
    
    # Используем цикл для распределения задач по потокам
    for args in tasks:
        # Строим прогноз для текущих значений p, d, q
        forecasts = build_arima_model_and_forecast(special_data, *args)
        results_df = pd.concat([results_df, pd.DataFrame(forecasts)], ignore_index = True)#results_df.append(forecasts, ignore_index=True)

# Выводим результаты
print(results_df)

KeyboardInterrupt: 

In [36]:
results_df

Unnamed: 0,p,d,q,begin_date,end_date,window_size,forecast_days,forecast_precision
0,1,1,1,2010-01-04,2011-01-15,200,90,6896.810112
1,1,1,1,2010-01-05,2011-01-16,200,90,9755.311733
2,1,1,1,2010-01-06,2011-01-17,200,90,8260.552916
3,1,1,1,2010-01-07,2011-01-18,200,90,8066.319072
4,1,1,1,2010-01-08,2011-01-19,200,90,7891.470740
...,...,...,...,...,...,...,...,...
6448,1,1,9,2012-10-31,2013-11-13,200,90,9585.365738
6449,1,1,9,2012-11-01,2013-11-16,200,90,12279.472611
6450,1,1,9,2012-11-02,2013-11-17,200,90,11244.660840
6451,1,1,9,2012-11-05,2013-11-18,200,90,13693.797612
