In [1]:
# Функция для создания датасета для LSTM
def create_lstm_dataset(data, time_steps=1, forecast_days=1):
    X, y = [], []
    for i in range(len(data) - time_steps - forecast_days + 1):
        end_ix = i + time_steps
        out_end_ix = end_ix + forecast_days
        if out_end_ix > len(data):
            break
        seq_x, seq_y = data[i:end_ix, :], data[end_ix:out_end_ix, -1]
        X.append(seq_x)
        y.append(seq_y)
    return np.array(X), np.array(y)


In [10]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
import joblib

from free_utils import (
    file_names,
    n_intervals
)

In [12]:
file_id = 11
file_path = '../data_updated/' + file_names[file_id]

# Read the file into a DataFrame
dataset = pd.read_csv(file_path)
dataset

Unnamed: 0,Дата,Цена,Откр.,Макс.,Мин.,Объём,Изм. %,dayofweek,quarter,month,year,dayofyear,dayofmonth,weekofyear,SMA_20,EMA_20,RSI,MACD,Signal_Line
0,2013-01-14,149.60,148.55,149.85,148.20,2871000000,1.10,0,1,1,2013,14,14,3,144.6675,144.848034,73.582474,2.822895,2.677856
1,2013-01-15,147.73,149.48,149.60,147.71,2601000000,-1.25,1,1,1,2013,15,15,3,143.9520,144.347827,71.645920,2.739619,2.641596
2,2013-01-16,146.42,147.70,148.40,146.03,3160000000,-0.89,2,1,1,2013,16,16,3,143.2305,143.991809,69.056604,2.783880,2.617090
3,2013-01-17,147.70,146.67,148.23,146.37,3180000000,0.87,3,1,1,2013,17,17,3,142.7670,143.736210,79.002234,2.933261,2.575393
4,2013-01-18,148.42,148.43,149.23,148.13,2764000000,0.49,4,1,1,2013,18,18,3,142.0920,143.318969,84.682713,2.950934,2.485926
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2689,2023-10-09,167.06,167.30,167.97,166.62,2175000000,0.28,0,4,10,2023,282,9,41,,169.484502,,-0.515963,-0.193701
2690,2023-10-10,168.03,167.37,168.80,166.86,1858000000,0.58,1,4,10,2023,283,10,41,,169.739713,,-0.348030,-0.113135
2691,2023-10-11,167.89,168.50,171.40,167.30,5998000000,-0.08,2,4,10,2023,284,11,41,,169.919683,,-0.227387,-0.054412
2692,2023-10-12,169.50,167.90,169.80,167.40,3185000000,0.96,3,4,10,2023,285,12,41,,170.133333,,-0.055840,-0.011168


In [15]:
    
df = dataset
df = df.dropna()
# Извлекаем целевую переменную и столбец с датами
target_column = df.pop('Цена')
dates = df.pop('Дата')

# Вставляем целевую переменную в конец DataFrame
df['Цена'] = target_column

# Преобразуем в numpy массив
dataset = df.values


AttributeError: 'numpy.ndarray' object has no attribute 'dropna'

In [16]:
    
# Создаем датасет для LSTM
time_steps = n_intervals  # Количество временных шагов
forecast_days = 1  # Количество дней для предсказания
# Normalize the dataset using Min-Max scaling
scaler = MinMaxScaler(feature_range=(0, 1))
normalized_dataset = scaler.fit_transform(dataset)

X_ltsm, y_ltsm = create_lstm_dataset(normalized_dataset, time_steps, forecast_days)


In [17]:

# Разделяем на обучающий и тестовый наборы данных
train_size = int(len(X_ltsm) * 0.8)
X_train_ltsm, X_test_ltsm = X_ltsm[:train_size], X_ltsm[train_size:]
y_train_ltsm, y_testt_ltsm = y_ltsm[:train_size], y_ltsm[train_size:]
# Сохраняем scaler_X и scaler_y в файлы

scaler_filename = '../scalers/'+file_names[file_id]+'_scaler.pkl'
with open(scaler_filename, 'wb') as scaler_file:
    joblib.dump(scaler, scaler_file)


# Выводим размеры полученных датасетов
print("Размеры X_train_ltsm, y_train_ltsm, X_test_ltsm, y_test_ltsm:")
print(X_train_ltsm.shape, y_train_ltsm.shape, X_test_ltsm.shape, y_testt_ltsm.shape)


Размеры X_train_ltsm, y_train_ltsm, X_test_ltsm, y_test_ltsm:
(2044, 120, 18) (2044, 1) (511, 120, 18) (511, 1)


In [8]:
    import os
    
    # Путь к папке, где будем сохранять датасеты
    datasets_folder = '../datasets'
    
    # Создадим папку, если её нет
    os.makedirs(datasets_folder, exist_ok=True)
    
    # Сохранение данных X_train и y_train_ltsm
    X_train_ltsm_file_path = os.path.join(datasets_folder, f'X_train_ltsm_{file_names[file_id]}')
    X_test_ltsm_file_path = os.path.join(datasets_folder, f'X_test_ltsm_{file_names[file_id]}')
    y_train_ltsm_file_path = os.path.join(datasets_folder, f'y_train_ltsm_{file_names[file_id]}')
    y_test_ltsm_file_path = os.path.join(datasets_folder, f'y_test_ltsm_{file_names[file_id]}')
    
    np.save(X_train_ltsm_file_path, X_train_ltsm)
    np.save(X_test_ltsm_file_path, X_test_ltsm)
    
    np.save(y_train_ltsm_file_path, y_train_ltsm)
    np.save(y_test_ltsm_file_path, y_testt_ltsm)
    
    print(f'Data saved to {X_train_ltsm_file_path} and {X_test_ltsm_file_path}')
    print(f'Data saved to {y_train_ltsm_file_path} and {y_test_ltsm_file_path}')


Data saved to ../datasets\X_train_ltsm_Прошлые данные - PIKK.csv and ../datasets\X_test_ltsm_Прошлые данные - PIKK.csv
Data saved to ../datasets\y_train_ltsm_Прошлые данные - PIKK.csv and ../datasets\y_test_ltsm_Прошлые данные - PIKK.csv


In [8]:
# dataset_with_shift=dataset
# #Сдвигаем на одну позицию вперед целевую переменную
# #Target_1 это лаг наооборот для целевой переменной , типа предсказываем на день вперед
# # dataset_with_shift['future_1'] = dataset['Цена'].shift(1)
# # dataset_with_shift.dropna(inplace=True)
# # 
# # dataset_with_shift
# 
# X_xgboost = dataset.drop(['Цена','Дата'],axis=1)
# y_xgboost = dataset['Target_1']  # Целевая переменная
# X_xgboost = y_xgboost[-len(X_ltsm):]
# y_xgboost = X_xgboost[-len(X_ltsm):]