In [3]:
import numpy as np
import yfinance as yf
import pandas as pd
import gym
from gym import spaces
import tensorflow as tf
from tensorflow import keras
import matplotlib.pyplot as plt

# Среда для торговли акциями
class StockTradingEnv(gym.Env):
    def __init__(self, stock_data, window_size=5):
        super(StockTradingEnv, self).__init__() # Инициализация родительского класса
        self.stock_data = stock_data.reset_index(drop=True) # Сброс индекса
        print("Доступные столбцы данных:", self.stock_data.columns.tolist())  # Отладочный вывод
        self.n_steps = len(stock_data) # Количество шагов
        self.window_size = window_size # Размер окна SMA
        self.current_step = 0 # Текущий шаг

        self.action_space = spaces.Discrete(3)  # действия: hold, buy, sell
        self.observation_space = spaces.Box(low=0, high=np.inf, shape=(4,), dtype=np.float32)
    # Сброс среды
    def reset(self):
        self.current_step = self.window_size
        self.current_balance = 2000
        self.stock_owned = 0 
        self.buy_price = 0
        return self._get_observation()
    
    # Расчет SMA
    def simple_moving_average(self):
        return self.stock_data['Adj Close']['AAPL'].iloc[self.current_step - self.window_size:self.current_step].mean()
    
    # Получение наблюдения
    def _get_observation(self):
        if self.current_step >= self.n_steps:
            raise IndexError("Текущий шаг выходит за пределы доступных данных.")

        current_price = self.stock_data['Adj Close']['AAPL'].iloc[self.current_step] 

        if self.current_step < self.window_size:
            raise IndexError("Недостаточно данных для расчета SMA.")

        sma = self.simple_moving_average()

        # Проверка значений
        print(f"Current Price: {current_price}, Current Balance: {self.current_balance}, Stocks Owned: {self.stock_owned}, SMA: {sma}")

        # Проверка, являются ли значения числами
        if not isinstance(current_price, (int, float)):
            raise ValueError("Current price or SMA is not a number.")

        # Создание наблюдения
        observation = np.array([current_price, self.current_balance, self.stock_owned, sma], dtype=np.float32)

        # Проверка формы возвращаемого наблюдения
        print(f"Observation Shape: {observation.shape}")
        if observation.shape != (4,):
            raise ValueError("Возвращаемое наблюдение не имеет ожидаемой формы.")

        return observation
    # Обновление среды
    def step(self, action):
        current_price = self.stock_data['Adj Close']['AAPL'].iloc[self.current_step]
        reward = 0
        done = False

        # Если купили
        if action == 1:  # Buy
            if self.current_balance >= current_price:
                self.stock_owned += 1
                self.current_balance -= current_price
                self.buy_price = current_price
                reward += current_price + self.buy_price
            else:
                reward -= current_price - self.buy_price

        # Если продали
        elif action == 2:  # Sell
            if self.stock_owned > 0:
                if current_price < self.buy_price:
                    reward -= 10  # Штраф за продажу ниже цены
                else:
                    reward += current_price - self.buy_price
                self.stock_owned -= 1
                self.current_balance += current_price

        # Удержание
        elif action == 0:  # Hold
            if current_price < self.buy_price:
                reward -= 2  # Штраф за удержание, если цена падает
            else:
                reward += 3  # Награда за удержание, если цена растет

        reward += self.current_balance + self.stock_owned * current_price - 3000 
        self.current_step += 1

        # Проверка завершения
        if self.current_step >= self.n_steps - 1:
            done = True

        return self._get_observation(), reward, done, {}

# Нейронная сеть для Q-learning
def build_model(state_size, action_size):
    model = keras.Sequential()
    model.add(keras.layers.Dense(24, input_dim=state_size, activation='relu'))
    model.add(keras.layers.Dense(24, activation='relu'))
    model.add(keras.layers.Dense(action_size, activation='linear'))
    model.compile(optimizer=keras.optimizers.Adam(learning_rate=0.001), loss='mse')
    return model

def moving_average_strategy(stock_data, short_window=20, long_window=50):
    signals = pd.DataFrame(index=stock_data.index)
    signals['price'] = stock_data['Adj Close']
    signals['short_mavg'] = stock_data['Adj Close'].rolling(window=short_window, min_periods=1).mean()
    signals['long_mavg'] = stock_data['Adj Close'].rolling(window=long_window, min_periods=1).mean()
    
    # Создадим сигнал покупки и продажи
    signals['signal'] = 0
    signals['signal'][short_window:] = np.where(signals['short_mavg'][short_window:] > signals['long_mavg'][short_window:], 1, 0)
    signals['positions'] = signals['signal'].diff()
    
    return signals

def evaluate_strategy(signals):
    starting_balance = 2000
    balance = starting_balance
    stocks_owned = 0

    for i in range(len(signals)):
        price = signals['price'].iloc[i]
        if signals['positions'].iloc[i] == 1:  # Buy Signal
            stocks_owned += 1
            balance -= price
        elif signals['positions'].iloc[i] == -1:  # Sell Signal
            if stocks_owned > 0:
                stocks_owned -= 1
                balance += price

    # Финальный баланс
    final_balance = balance + stocks_owned * signals['price'].iloc[-1]
    return final_balance

def plot_results(stock_data, signals, agent_rewards):
    fig, ax = plt.figure(figsize=(14, 7))
    
    # График цены акций
    plt.subplot(2, 1, 1)
    plt.plot(stock_data['Adj Close'], label='Цена акций', alpha=0.5)
    plt.plot(signals['short_mavg'], label='20-дневная SMA', alpha=0.5)
    plt.plot(signals['long_mavg'], label='50-дневная SMA', alpha=0.5)
    plt.title('Сравнение цен акций и скользящих средних')
    plt.legend()
    
    # График результатов
    plt.subplot(2, 1, 2)
    plt.plot(agent_rewards, label='Награды агента', alpha=0.5)
    plt.title('Награды RL агента')
    plt.legend()
    
    plt.tight_layout()
    plt.show()


# Основной блок кода
if __name__ == "__main__":
    SYMBOL = 'AAPL'  # Символ акций
    stock_data = yf.download(SYMBOL, period="1mo")  # Получаем данные за последний месяц

    if stock_data.empty:
        print("Нет доступных данных для выбранного символа.")
        exit()

    env = StockTradingEnv(stock_data)
    
    n_actions = env.action_space.n
    model = build_model(env.observation_space.shape[0], n_actions)
 
    n_episodes = 500 
    agent_rewards = []
    for episode in range(n_episodes):
        state = env.reset()
        done = False
        total_reward = 0

        while not done:
            state = np.reshape(state, [1, env.observation_space.shape[0]])
            q_values = model.predict(state)
            action = np.argmax(q_values[0])  # Оптимальное действие

            next_state, reward, done, _ = env.step(action)

            # Обучение модели с использованием Q-learning
            next_state = np.reshape(next_state, [1, env.observation_space.shape[0]])
            target = reward + 0.95 * np.amax(model.predict(next_state)[0])
            target_f = q_values
            target_f[0][action] = target

            model.fit(state, target_f, epochs=1, verbose=0)
            state = next_state
            total_reward += reward
            
        agent_rewards.append(total_reward)
        print(f"Эпизод: {episode + 1}/{n_episodes}, Награда: {total_reward}")
        
    # Реализация стратегии на основе скользящих средних
    signals = moving_average_strategy(stock_data)
    final_balance_strategy = evaluate_strategy(signals)

    # Сравнение с результатами агента
    final_balance_agent = env.current_balance + env.stock_owned * stock_data['Adj Close'].iloc[env.current_step]
    print(f"Финальный баланс агента: {final_balance_agent}, Финальный баланс стратегии: {final_balance_strategy}")

    plot_results(stock_data, signals, agent_rewards)

    print("Обучение завершено.")

[*********************100%***********************]  1 of 1 completed

Доступные столбцы данных: [('Adj Close', 'AAPL'), ('Close', 'AAPL'), ('High', 'AAPL'), ('Low', 'AAPL'), ('Open', 'AAPL'), ('Volume', 'AAPL')]
Current Price: 225.1199951171875, Current Balance: 2000, Stocks Owned: 0, SMA: 225.0250457763672
Observation Shape: (4,)
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 53ms/step
Current Price: 228.22000122070312, Current Balance: 2000, Stocks Owned: 0, SMA: 225.5539978027344
Observation Shape: (4,)
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 10ms/step



  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 12ms/step
Current Price: 225.0, Current Balance: 2000, Stocks Owned: 0, SMA: 225.7519989013672
Observation Shape: (4,)
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 11ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 10ms/step
Current Price: 228.02000427246094, Current Balance: 2000, Stocks Owned: 0, SMA: 225.35999755859376
Observation Shape: (4,)
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 10ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 10ms/step
Current Price: 228.27999877929688, Current Balance: 2000, Stocks Owned: 0, SMA: 226.1179992675781
Observation Shape: (4,)
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 10ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 10ms/step
Current Price: 229.0, Current Balance: 2000, Stocks Owned: 0, SMA: 226.9279998779297
Observation Shape: (4,)
[1m1/1[0m [32m━━━━━━━━━━━━━━━━

KeyboardInterrupt: 

[name: "/device:CPU:0"
device_type: "CPU"
memory_limit: 268435456
locality {
}
incarnation: 8949893048095436215
xla_global_id: -1
, name: "/device:GPU:0"
device_type: "GPU"
memory_limit: 9963569152
locality {
  bus_id: 1
  links {
  }
}
incarnation: 7378899974456639246
physical_device_desc: "device: 0, name: NVIDIA GeForce RTX 4070 Ti, pci bus id: 0000:01:00.0, compute capability: 8.9"
xla_global_id: 416903419
]
True
Instructions for updating:
Use `tf.config.list_physical_devices('GPU')` instead.
True


2024-12-05 10:31:46.446708: I tensorflow/core/common_runtime/gpu/gpu_device.cc:2112] Could not identify NUMA node of platform GPU id 0, defaulting to 0.  Your kernel may not have been built with NUMA support.
2024-12-05 10:31:46.446779: I tensorflow/core/common_runtime/gpu/gpu_device.cc:2021] Created device /device:GPU:0 with 9502 MB memory:  -> device: 0, name: NVIDIA GeForce RTX 4070 Ti, pci bus id: 0000:01:00.0, compute capability: 8.9
2024-12-05 10:31:46.450481: I tensorflow/core/common_runtime/gpu/gpu_device.cc:2112] Could not identify NUMA node of platform GPU id 0, defaulting to 0.  Your kernel may not have been built with NUMA support.
2024-12-05 10:31:46.450522: I tensorflow/core/common_runtime/gpu/gpu_device.cc:2021] Created device /device:GPU:0 with 9502 MB memory:  -> device: 0, name: NVIDIA GeForce RTX 4070 Ti, pci bus id: 0000:01:00.0, compute capability: 8.9
