In [1]:
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.preprocessing import MinMaxScaler
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split


In [2]:
def create_dataset(data, n_steps):
    X, y = [], []
    for i in range(len(data) - n_steps):
        scaler = MinMaxScaler(feature_range=(0, 1))
        seq = data[i: i + n_steps + 1]
        seq = scaler.fit_transform(seq) # это чтобы сначала разделить потом нормализовать
        seq_x = seq[:n_steps]
        seq_y = seq[n_steps]
        X.append(seq_x)
        y.append(seq_y)
    return np.array(X), np.array(y)

tickets = ["AFKS_short"]

X, y = [], []
for ticket in tickets:
    print(ticket)
    data = pd.read_csv(ticket +".txt")
    prise = data["open"].tolist()
    data = np.column_stack((prise))[0]

    # Нормализация данных
    scaler = MinMaxScaler(feature_range=(0, 1))
    data = data.reshape(-1, 1)
    #data = scaler.fit_transform(data) я убрал это чтобы поствить нормализацию в create_dataset
    x1, y1 = create_dataset(data, n_steps = 100)
    if len(X) == 0:
        X = x1
        y = y1
    else:
        X = np.concatenate((X, x1))
        y = np.concatenate((y, y1))
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.001, random_state=42)
print(X_train)
X_train = torch.tensor(X_train, dtype=torch.float32)
y_train = torch.tensor(y_train, dtype=torch.float32)
X_val = torch.tensor(X_val, dtype=torch.float32)
y_val = torch.tensor(y_val, dtype=torch.float32)

AFKS_short
[[[0.80188679]
  [0.66981132]
  [0.52830189]
  ...
  [0.33962264]
  [0.14150943]
  [0.32075472]]

 [[0.70833333]
  [0.58333333]
  [0.58333333]
  ...
  [0.97916667]
  [0.95833333]
  [0.9375    ]]

 [[0.75967413]
  [0.67617108]
  [0.71894094]
  ...
  [0.38900204]
  [0.40733198]
  [0.38696538]]

 ...

 [[0.58450704]
  [0.56338028]
  [0.5915493 ]
  ...
  [0.8556338 ]
  [0.97183099]
  [0.98943662]]

 [[0.56443299]
  [0.56443299]
  [0.58762887]
  ...
  [0.43556701]
  [0.32989691]
  [0.30154639]]

 [[0.09137056]
  [0.02538071]
  [0.2284264 ]
  ...
  [0.89847716]
  [0.89170897]
  [0.87478849]]]


In [3]:
print(len(X_train))
X_train

196124


tensor([[[0.8019],
         [0.6698],
         [0.5283],
         ...,
         [0.3396],
         [0.1415],
         [0.3208]],

        [[0.7083],
         [0.5833],
         [0.5833],
         ...,
         [0.9792],
         [0.9583],
         [0.9375]],

        [[0.7597],
         [0.6762],
         [0.7189],
         ...,
         [0.3890],
         [0.4073],
         [0.3870]],

        ...,

        [[0.5845],
         [0.5634],
         [0.5915],
         ...,
         [0.8556],
         [0.9718],
         [0.9894]],

        [[0.5644],
         [0.5644],
         [0.5876],
         ...,
         [0.4356],
         [0.3299],
         [0.3015]],

        [[0.0914],
         [0.0254],
         [0.2284],
         ...,
         [0.8985],
         [0.8917],
         [0.8748]]])

In [4]:
class LSTMModel(nn.Module):
    def __init__(self, input_size, hidden_size, output_size, num_layers=2):
        super(LSTMModel, self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_size, output_size)

    def forward(self, x):
        h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(x.device)
        c0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(x.device)
        
        out, _ = self.lstm(x, (h0, c0))
        out = self.fc(out[:, -1, :])  # использование последнего выходного состояния
        return out

In [6]:
# Гиперпараметры модели
input_size = 1  # размерность входного вектора (пара чисел)
hidden_size = 150  # размер скрытого состояния
output_size = 1  # размерность выходного вектора (пара чисел)
num_layers = 3  # количество слоев LSTM

model = LSTMModel(input_size, hidden_size, output_size, num_layers)

# Оптимизатор и функция потерь
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)


def train_model(model, criterion, optimizer, X_train, y_train, X_val, y_val, num_epochs, batch_size):
    train_losses = []
    val_losses = []
    
    train_dataset = torch.utils.data.TensorDataset(X_train, y_train)
    train_dataloader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
    l = len(train_dataloader)

    for epoch in range(num_epochs):
        model.train()
        epoch_train_loss = 0
        s = 0
        for batch_X, batch_y in train_dataloader:
            s += 1
            if(s % 100 == 0):
                print(str(int(s / l * 100)) + "%")
            optimizer.zero_grad()
            output = model(batch_X)
            loss = criterion(output, batch_y)
            loss.backward()

            
            optimizer.step()
            epoch_train_loss += loss.item()
        
        train_loss = epoch_train_loss / len(train_dataloader)
        train_losses.append(train_loss)
        
        # Валидация
        model.eval()
        with torch.no_grad():
            val_output = model(X_val)
            val_loss = criterion(val_output, y_val).item()
            val_losses.append(val_loss)
        
        print(f'Epoch [{epoch+1}/{num_epochs}], Train Loss: {train_loss:.4f}, Val Loss: {val_loss:.4f}')

    return train_losses, val_losses

num_epochs = 3
# Обучение модели
train_losses, val_losses = train_model(model, criterion, optimizer, X_train, y_train, X_val, y_val, num_epochs=num_epochs, batch_size=256)

plt.plot(range(num_epochs), train_losses, label='Train Loss')
plt.plot(range(num_epochs), val_losses, label='Validation Loss')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()
plt.show()

13%
26%
39%
52%
65%
78%


KeyboardInterrupt: 

In [5]:
torch.save(model, "flot10m1.pth")