In [2]:
# Функция для создания датасета для LSTM
def create_lstm_dataset(data, time_steps=1, forecast_days=1):
    X, y = [], []
    for i in range(len(data) - time_steps - forecast_days + 1):
        end_ix = i + time_steps
        out_end_ix = end_ix + forecast_days
        if out_end_ix > len(data):
            break
        seq_x, seq_y = data[i:end_ix, :], data[end_ix:out_end_ix, -1]
        X.append(seq_x)
        y.append(seq_y)
    return np.array(X), np.array(y)


In [3]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
import joblib

from free_utils import (
    file_names,
    n_intervals
)

In [4]:
file_id = 7
file_path = '../data_updated/' + file_names[file_id]

# Read the file into a DataFrame
dataset = pd.read_csv(file_path)
dataset

Unnamed: 0,Дата,Цена,Откр.,Макс.,Мин.,Объём,Изм. %,dayofweek,quarter,month,year,dayofyear,dayofmonth,weekofyear,SMA_20,EMA_20,RSI,MACD,Signal_Line
0,2013-01-14,65.16,65.40,65.93,65.08,10149000,-0.31,0,1,1,2013,14,14,3,66.1045,65.881669,33.211679,0.057862,0.198163
1,2013-01-15,66.14,65.16,66.57,64.20,25189000,1.50,1,1,1,2013,15,15,3,66.1355,65.957635,34.666667,0.132515,0.233238
2,2013-01-16,66.10,66.41,67.67,65.57,9318000,-0.06,2,1,1,2013,16,16,3,66.2320,65.938438,33.840304,0.126090,0.258419
3,2013-01-17,65.95,65.98,66.79,65.33,4524000,-0.23,3,1,1,2013,17,17,3,66.3530,65.921432,48.520710,0.120128,0.291501
4,2013-01-18,66.44,66.00,66.70,65.57,5370000,0.74,4,1,1,2013,18,18,3,66.4310,65.918425,48.449040,0.126045,0.334344
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2689,2023-10-09,727.00,727.00,739.20,725.00,41117000,-0.19,0,4,10,2023,282,9,41,,742.313816,,0.232624,0.449786
2690,2023-10-10,758.80,726.20,765.00,726.10,89358000,4.37,1,4,10,2023,283,10,41,,743.925796,,1.823673,0.504076
2691,2023-10-11,745.60,756.00,776.00,742.10,76936000,-1.74,2,4,10,2023,284,11,41,,742.360091,,0.590086,0.174177
2692,2023-10-12,746.00,747.10,753.60,737.50,29616000,0.05,3,4,10,2023,285,12,41,,742.019048,,0.350997,0.070199


In [5]:
    
df = dataset
df = df.dropna()
# Извлекаем целевую переменную и столбец с датами
target_column = df.pop('Цена')
dates = df.pop('Дата')

# Вставляем целевую переменную в конец DataFrame
df['Цена'] = target_column

# Преобразуем в numpy массив
dataset = df.values


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['Цена'] = target_column


In [6]:
    
# Создаем датасет для LSTM
time_steps = n_intervals  # Количество временных шагов
forecast_days = 1  # Количество дней для предсказания
# Normalize the dataset using Min-Max scaling
scaler = MinMaxScaler(feature_range=(0, 1))
normalized_dataset = scaler.fit_transform(dataset)

X_ltsm, y_ltsm = create_lstm_dataset(normalized_dataset, time_steps, forecast_days)


In [7]:

# Разделяем на обучающий и тестовый наборы данных
train_size = int(len(X_ltsm) * 0.8)
X_train_ltsm, X_test_ltsm = X_ltsm[:train_size], X_ltsm[train_size:]
y_train_ltsm, y_testt_ltsm = y_ltsm[:train_size], y_ltsm[train_size:]
# Сохраняем scaler_X и scaler_y в файлы

scaler_filename = '../scalers/'+file_names[file_id]+'_scaler.pkl'
with open(scaler_filename, 'wb') as scaler_file:
    joblib.dump(scaler, scaler_file)


# Выводим размеры полученных датасетов
print("Размеры X_train_ltsm, y_train_ltsm, X_test_ltsm, y_test_ltsm:")
print(X_train_ltsm.shape, y_train_ltsm.shape, X_test_ltsm.shape, y_testt_ltsm.shape)


Размеры X_train_ltsm, y_train_ltsm, X_test_ltsm, y_test_ltsm:
(2044, 120, 18) (2044, 1) (511, 120, 18) (511, 1)


In [8]:
    import os
    
    # Путь к папке, где будем сохранять датасеты
    datasets_folder = '../datasets'
    
    # Создадим папку, если её нет
    os.makedirs(datasets_folder, exist_ok=True)
    
    # Сохранение данных X_train и y_train_ltsm
    X_train_ltsm_file_path = os.path.join(datasets_folder, f'X_train_ltsm_{file_names[file_id]}')
    X_test_ltsm_file_path = os.path.join(datasets_folder, f'X_test_ltsm_{file_names[file_id]}')
    y_train_ltsm_file_path = os.path.join(datasets_folder, f'y_train_ltsm_{file_names[file_id]}')
    y_test_ltsm_file_path = os.path.join(datasets_folder, f'y_test_ltsm_{file_names[file_id]}')
    
    np.save(X_train_ltsm_file_path, X_train_ltsm)
    np.save(X_test_ltsm_file_path, X_test_ltsm)
    
    np.save(y_train_ltsm_file_path, y_train_ltsm)
    np.save(y_test_ltsm_file_path, y_testt_ltsm)
    
    print(f'Data saved to {X_train_ltsm_file_path} and {X_test_ltsm_file_path}')
    print(f'Data saved to {y_train_ltsm_file_path} and {y_test_ltsm_file_path}')


Data saved to ../datasets\X_train_ltsm_Прошлые данные - PIKK.csv and ../datasets\X_test_ltsm_Прошлые данные - PIKK.csv
Data saved to ../datasets\y_train_ltsm_Прошлые данные - PIKK.csv and ../datasets\y_test_ltsm_Прошлые данные - PIKK.csv


In [8]:
# dataset_with_shift=dataset
# #Сдвигаем на одну позицию вперед целевую переменную
# #Target_1 это лаг наооборот для целевой переменной , типа предсказываем на день вперед
# # dataset_with_shift['future_1'] = dataset['Цена'].shift(1)
# # dataset_with_shift.dropna(inplace=True)
# # 
# # dataset_with_shift
# 
# X_xgboost = dataset.drop(['Цена','Дата'],axis=1)
# y_xgboost = dataset['Target_1']  # Целевая переменная
# X_xgboost = y_xgboost[-len(X_ltsm):]
# y_xgboost = X_xgboost[-len(X_ltsm):]