In [3]:
# !pip install gym

In [2]:
import numpy as np
import gym
import yfinance as yf
import tensorflow as tf

# Define the custom Gym-like environment for stock price prediction
class StockPriceEnv(gym.Env):
    def __init__(self, historical_prices):
        super(StockPriceEnv, self).__init__()
        self.historical_prices = historical_prices
        self.current_step = 0
        self.total_steps = len(historical_prices)
        self.action_space = gym.spaces.Discrete(21)  # Number of discrete actions
        self.observation_space = gym.spaces.Box(low=0, high=np.inf, shape=(2,), dtype=np.float32)

    def reset(self):
        self.current_step = 0
        return self._get_observation()

    def _get_observation(self):
        return np.array([self.historical_prices[self.current_step], self.current_step], dtype=np.float32)

    def step(self, action):
        current_price = self.historical_prices[self.current_step]
        next_price = self.historical_prices[self.current_step + 1]
        reward = -abs(action - (next_price - current_price))

        self.current_step += 1
        done = self.current_step >= self.total_steps - 1

        return self._get_observation(), reward, done, {}

# Define the Q-learning agent using TensorFlow
class QLearningAgent:
    def __init__(self, num_actions, learning_rate=0.1, discount_factor=0.9, exploration_prob=0.2):
        self.num_actions = num_actions
        self.learning_rate = learning_rate
        self.discount_factor = discount_factor
        self.exploration_prob = exploration_prob
        self.q_network = self._build_q_network()
        self.optimizer = tf.keras.optimizers.Adam(learning_rate=self.learning_rate)
    
    def _build_q_network(self):
        model = tf.keras.Sequential([
            tf.keras.layers.Dense(64, activation='relu', input_shape=(2,)),
            tf.keras.layers.Dense(self.num_actions)
        ])
        return model
    
    def choose_action(self, state):
        if np.random.rand() < self.exploration_prob:
            return np.random.choice(self.num_actions)
        else:
            q_values = self.q_network.predict(state.reshape(1, -1))
            return np.argmax(q_values)
    
    def train(self, state, action, reward, next_state):
        with tf.GradientTape() as tape:
            q_values = self.q_network(state.reshape(1, -1))
            target = reward + self.discount_factor * np.max(self.q_network(next_state.reshape(1, -1)))
            target = tf.convert_to_tensor(target, dtype=tf.float32)
            action = tf.convert_to_tensor(action, dtype=tf.int32)
            # Calculate the predicted Q-value for the chosen action
            predicted_q_value = tf.reduce_sum(q_values * tf.one_hot(action, self.num_actions), axis=1)
            loss = tf.keras.losses.mean_squared_error(target, predicted_q_value)
        grads = tape.gradient(loss, self.q_network.trainable_variables)
        self.optimizer.apply_gradients(zip(grads, self.q_network.trainable_variables))


# Fetch historical stock prices using Yahoo Finance API
def fetch_historical_prices(ticker_symbol):
    ticker_data = yf.Ticker(ticker_symbol)
    historical_data = ticker_data.history(interval='1d', start='2022-03-01')
    return historical_data['Close'].values.tolist()

# User inputs
ticker_symbol = "AAPL"
# input("Enter the ticker symbol (e.g., AAPL, MSFT, ORCL): ")
num_days = 10
# int(input("Enter the number of days for predicting future stock prices: "))

# Fetch historical stock prices from 2013-01-01 to the current date
historical_prices = fetch_historical_prices(ticker_symbol)

# Initialize RL environment and agent
env = StockPriceEnv(historical_prices)
num_actions = 21  # Number of discrete actions

agent = QLearningAgent(num_actions)

num_episodes = 20

for episode in range(num_episodes):
    state = env.reset()
    total_reward = 0

    while True:
        action = agent.choose_action(state)
        next_state, reward, done, _ = env.step(action)
        agent.train(state, action, reward, next_state)
        total_reward += reward
        state = next_state

        if done:
            break

# Using the trained agent to predict future stock prices
future_prices = []
state = env.reset()
for _ in range(num_days):
    action = agent.choose_action(state)
    predicted_price = state[0] + action
    future_prices.append(predicted_price)
    state = np.array([predicted_price, state[1] + 1], dtype=np.float32)

print(f"Predicted Stock Prices for the next {num_days} days: {future_prices}")















































































Predicted Stock Prices for the next 10 days: [161.77481079101562, 161.77481079101562, 180.77481079101562, 180.77481079101562, 180.77481079101562, 195.77481079101562, 195.77481079101562, 200.77481079101562, 200.77481079101562, 200.77481079101562]
