In [3]:
# !pip install gym

In [None]:
import numpy as np
import gym
import yfinance as yf
import tensorflow as tf

# Define the custom Gym-like environment for stock price prediction
class StockPriceEnv(gym.Env):
    def __init__(self, historical_prices):
        super(StockPriceEnv, self).__init__()
        self.historical_prices = historical_prices
        self.current_step = 0
        self.total_steps = len(historical_prices)
        self.action_space = gym.spaces.Discrete(21)  # Number of discrete actions
        self.observation_space = gym.spaces.Box(low=0, high=np.inf, shape=(2,), dtype=np.float32)

    def reset(self):
        self.current_step = 0
        return self._get_observation()

    def _get_observation(self):
        return np.array([self.historical_prices[self.current_step], self.current_step], dtype=np.float32)

    def step(self, action):
        current_price = self.historical_prices[self.current_step]
        next_price = self.historical_prices[self.current_step + 1]
        reward = -abs(action - (next_price - current_price))

        self.current_step += 1
        done = self.current_step >= self.total_steps - 1

        return self._get_observation(), reward, done, {}

# Define the Q-learning agent using TensorFlow
class QLearningAgent:
    def __init__(self, num_actions, learning_rate=0.1, discount_factor=0.9, exploration_prob=0.2):
        self.num_actions = num_actions
        self.learning_rate = learning_rate
        self.discount_factor = discount_factor
        self.exploration_prob = exploration_prob
        self.q_network = self._build_q_network()
        self.optimizer = tf.keras.optimizers.Adam(learning_rate=self.learning_rate)
    
    def _build_q_network(self):
        model = tf.keras.Sequential([
            tf.keras.layers.Dense(64, activation='relu', input_shape=(2,)),
            tf.keras.layers.Dense(self.num_actions)
        ])
        return model
    
    def choose_action(self, state):
        if np.random.rand() < self.exploration_prob:
            return np.random.choice(self.num_actions)
        else:
            q_values = self.q_network.predict(state.reshape(1, -1))
            return np.argmax(q_values)
    
    def train(self, state, action, reward, next_state):
        with tf.GradientTape() as tape:
            q_values = self.q_network(state.reshape(1, -1))
            target = reward + self.discount_factor * np.max(self.q_network(next_state.reshape(1, -1)))
            target = tf.convert_to_tensor(target, dtype=tf.float32)
            action = tf.convert_to_tensor(action, dtype=tf.int32)
            # Calculate the predicted Q-value for the chosen action
            predicted_q_value = tf.reduce_sum(q_values * tf.one_hot(action, self.num_actions), axis=1)
            loss = tf.keras.losses.mean_squared_error(target, predicted_q_value)
        grads = tape.gradient(loss, self.q_network.trainable_variables)
        self.optimizer.apply_gradients(zip(grads, self.q_network.trainable_variables))


# Fetch historical stock prices using Yahoo Finance API
def fetch_historical_prices(ticker_symbol):
    ticker_data = yf.Ticker(ticker_symbol)
    historical_data = ticker_data.history(interval='1d', start='2022-03-01')
    return historical_data['Close'].values.tolist()

# User inputs
ticker_symbol = "AAPL"
# input("Enter the ticker symbol (e.g., AAPL, MSFT, ORCL): ")
num_days = 10
# int(input("Enter the number of days for predicting future stock prices: "))

# Fetch historical stock prices from 2013-01-01 to the current date
historical_prices = fetch_historical_prices(ticker_symbol)

# Initialize RL environment and agent
env = StockPriceEnv(historical_prices)
num_actions = 21  # Number of discrete actions

agent = QLearningAgent(num_actions)

num_episodes = 80

for episode in range(num_episodes):
    state = env.reset()
    total_reward = 0

    while True:
        action = agent.choose_action(state)
        next_state, reward, done, _ = env.step(action)
        agent.train(state, action, reward, next_state)
        total_reward += reward
        state = next_state

        if done:
            break

# Using the trained agent to predict future stock prices
future_prices = []
state = env.reset()
for _ in range(num_days):
    action = agent.choose_action(state)
    predicted_price = state[0] + action
    future_prices.append(predicted_price)
    state = np.array([predicted_price, state[1] + 1], dtype=np.float32)

print(f"Predicted Stock Prices for the next {num_days} days: {future_prices}")





























































































































# 50 episodes each

In [1]:
import numpy as np
import gym
import yfinance as yf
import tensorflow as tf
import time

# Define the custom Gym-like environment for stock price prediction
class StockPriceEnv(gym.Env):
    def __init__(self, historical_prices):
        super(StockPriceEnv, self).__init__()
        self.historical_prices = historical_prices
        self.current_step = 0
        self.total_steps = len(historical_prices)
        self.action_space = gym.spaces.Discrete(21)  # Number of discrete actions
        self.observation_space = gym.spaces.Box(low=0, high=np.inf, shape=(2,), dtype=np.float32)

    def reset(self):
        self.current_step = 0
        return self._get_observation()

    def _get_observation(self):
        return np.array([self.historical_prices[self.current_step], self.current_step], dtype=np.float32)

    def step(self, action):
        current_price = self.historical_prices[self.current_step]
        next_price = self.historical_prices[self.current_step + 1]
        reward = -abs(action - (next_price - current_price))

        self.current_step += 1
        done = self.current_step >= self.total_steps - 1

        return self._get_observation(), reward, done, {}

# Define the Q-learning agent using TensorFlow
class QLearningAgent:
    def __init__(self, num_actions, learning_rate=0.1, discount_factor=0.9, exploration_prob=0.2):
        self.num_actions = num_actions
        self.learning_rate = learning_rate
        self.discount_factor = discount_factor
        self.exploration_prob = exploration_prob
        self.q_network = self._build_q_network()
        self.optimizer = tf.keras.optimizers.Adam(learning_rate=self.learning_rate)
    
    def _build_q_network(self):
        model = tf.keras.Sequential([
            tf.keras.layers.Dense(64, activation='relu', input_shape=(2,)),
            tf.keras.layers.Dense(self.num_actions)
        ])
        return model
    
    def choose_action(self, state):
        if np.random.rand() < self.exploration_prob:
            return np.random.choice(self.num_actions)
        else:
            q_values = self.q_network.predict(state.reshape(1, -1))
            return np.argmax(q_values)
    
    def train(self, state, action, reward, next_state):
        with tf.GradientTape() as tape:
            q_values = self.q_network(state.reshape(1, -1))
            target = reward + self.discount_factor * np.max(self.q_network(next_state.reshape(1, -1)))
            target = tf.convert_to_tensor(target, dtype=tf.float32)
            action = tf.convert_to_tensor(action, dtype=tf.int32)
            # Calculate the predicted Q-value for the chosen action
            predicted_q_value = tf.reduce_sum(q_values * tf.one_hot(action, self.num_actions), axis=1)
            loss = tf.keras.losses.mean_squared_error(target, predicted_q_value)
        grads = tape.gradient(loss, self.q_network.trainable_variables)
        self.optimizer.apply_gradients(zip(grads, self.q_network.trainable_variables))


# Fetch historical stock prices using Yahoo Finance API
def fetch_historical_prices(ticker_symbol):
    ticker_data = yf.Ticker(ticker_symbol)
    historical_data = ticker_data.history(interval='1d', start='2022-03-01')
    return historical_data['Close'].values.tolist()

# User inputs
ticker_symbol = "AAPL"
# input("Enter the ticker symbol (e.g., AAPL, MSFT, ORCL): ")
num_days = 10
# int(input("Enter the number of days for predicting future stock prices: "))

# Fetch historical stock prices from 2013-01-01 to the current date
historical_prices = fetch_historical_prices(ticker_symbol)

# Initialize RL environment and agent
env = StockPriceEnv(historical_prices)
num_actions = 21  # Number of discrete actions

# Define a Q-learning agent
agent = QLearningAgent(num_actions)

num_total_episodes = 200  # Total number of episodes to train
num_episodes_per_batch = 50  # Number of episodes per batch

for batch in range(num_total_episodes // num_episodes_per_batch):
    for episode in range(num_episodes_per_batch):
        state = env.reset()
        total_reward = 0

        while True:
            action = agent.choose_action(state)
            next_state, reward, done, _ = env.step(action)
            agent.train(state, action, reward, next_state)
            total_reward += reward
            state = next_state

            if done:
                break

    # Save the model weights after each batch of episodes
    agent.q_network.save_weights(f'q_network_weights_batch{batch}.h5')

    # Pause for 1 minute and free up resources
    print(f'Pausing for 1 minute between batches ({batch+1}/{num_total_episodes // num_episodes_per_batch})...')
    time.sleep(60)

    # Optionally release resources
    tf.keras.backend.clear_session()

# Load the fully trained model
agent.q_network.load_weights(f'q_network_weights_batch{(num_total_episodes // num_episodes_per_batch) - 1}.h5')

# Using the trained agent to predict future stock prices
future_prices = []
state = env.reset()
for _ in range(num_days):
    action = agent.choose_action(state)
    predicted_price = state[0] + action
    future_prices.append(predicted_price)
    state = np.array([predicted_price, state[1] + 1], dtype=np.float32)

print(f"Predicted Stock Prices for the next {num_days} days: {future_prices}")





























































































































Pausing for 1 minute between batches (1/4)...






































































































Pausing for 1 minute between batches (2/4)...




































































































Pausing for 1 minute between batches (3/4)...




































































































Pausing for 1 minute between batches (4/4)...
Predicted Stock Prices for the next 10 days: [161.7748260498047, 181.7748260498047, 181.7748260498047, 181.7748260498047, 181.7748260498047, 181.7748260498047, 181.7748260498047, 181.7748260498047, 184.7748260498047, 184.7748260498047]


# Independent from the dataset

In [15]:
import numpy as np
import gym
import yfinance as yf
import tensorflow as tf
import time

# Define the generalized Gym-like environment for stock price prediction
class StockPriceEnv(gym.Env):
    def __init__(self, historical_prices):
        super(StockPriceEnv, self).__init__()
        self.historical_prices = historical_prices
        self.current_step = 0
        self.max_steps = len(historical_prices) - 1  # Total prediction steps
        self.action_space = gym.spaces.Discrete(21)  # Number of discrete actions
        self.observation_space = gym.spaces.Box(low=0, high=np.inf, shape=(2,), dtype=np.float32)

    def reset(self):
        self.current_step = 0
        return self._get_observation()

    def _get_observation(self):
        return np.array([self.historical_prices[self.current_step], self.current_step], dtype=np.float32)

    def step(self, action):
        current_price = self.historical_prices[self.current_step]
        next_price = self.historical_prices[self.current_step + 1]
        reward = -abs(action - (next_price - current_price))

        self.current_step += 1
        done = self.current_step >= self.max_steps

        return self._get_observation(), reward, done, {}

# Define the Q-learning agent using TensorFlow
class QLearningAgent:
    def __init__(self, num_actions, learning_rate=0.1, discount_factor=0.8, exploration_prob=0.1):
        self.num_actions = num_actions
        self.learning_rate = learning_rate
        self.discount_factor = discount_factor
        self.exploration_prob = exploration_prob
        self.q_network = self._build_q_network()
        self.optimizer = tf.keras.optimizers.Adam(learning_rate=self.learning_rate)
    
    def _build_q_network(self):
        model = tf.keras.Sequential([
            tf.keras.layers.Dense(64, activation='relu', input_shape=(2,)),
            tf.keras.layers.Dense(self.num_actions)
        ])
        return model
    
    def choose_action(self, state):
        if np.random.rand() < self.exploration_prob:
            return np.random.choice(self.num_actions)
        else:
            q_values = self.q_network.predict(state.reshape(1, -1))
            return np.argmax(q_values)
    
    def train(self, state, action, reward, next_state):
        with tf.GradientTape() as tape:
            q_values = self.q_network(state.reshape(1, -1))
            target = reward + self.discount_factor * np.max(self.q_network(next_state.reshape(1, -1)))
            target = tf.convert_to_tensor(target, dtype=tf.float32)
            action = tf.convert_to_tensor(action, dtype=tf.int32)
            # Calculate the predicted Q-value for the chosen action
            predicted_q_value = tf.reduce_sum(q_values * tf.one_hot(action, self.num_actions), axis=1)
            loss = tf.keras.losses.mean_squared_error(target, predicted_q_value)
        grads = tape.gradient(loss, self.q_network.trainable_variables)
        self.optimizer.apply_gradients(zip(grads, self.q_network.trainable_variables))

# Training the agent for multiple stock symbols in batches
def train_agents_in_batches(agent_dict, env_dict, total_episodes, episodes_per_batch, weights_filename_prefix):
    for ticker_symbol in agent_dict.keys():
        agent = agent_dict[ticker_symbol]
        env = env_dict[ticker_symbol]

        for batch in range(total_episodes // episodes_per_batch):
            for episode in range(episodes_per_batch):
                state = env.reset()
                total_reward = 0

                for _ in range(episodes_per_batch):  # Corrected this line
                    action = agent.choose_action(state)
                    next_state, reward, done, _ = env.step(action)
                    agent.train(state, action, reward, next_state)
                    total_reward += reward
                    state = next_state

                    if done:
                        break

            # Save the agent's model weights after each batch of episodes
            agent.q_network.save_weights(f'{weights_filename_prefix}_{ticker_symbol}.h5')

            # Pause for 1 minute between batches
            if batch < total_episodes // episodes_per_batch - 1:
                print(f'Pausing for 1 minute between batches ({batch+1}/{total_episodes // episodes_per_batch})...')
                time.sleep(60)

            # Optionally release resources
            tf.keras.backend.clear_session()

# Load the trained agent for prediction
def load_trained_agent(agent, weights_filename):
    agent.q_network.load_weights(weights_filename)

# Fetch historical stock prices using Yahoo Finance API
def fetch_historical_prices(ticker_symbol):
    ticker_data = yf.Ticker(ticker_symbol)
    historical_data = ticker_data.history(interval='1d', start='2022-03-01')
    return historical_data['Close'].values.tolist()

# Example usage
if __name__ == "__main__":
    # Define a list of stock symbols
    ticker_symbols = ["AAPL"]

    # Create a dictionary to store agents and environments for each stock symbol
    agent_dict = {}
    env_dict = {}

    for ticker_symbol in ticker_symbols:
        # Fetch historical stock prices from Yahoo Finance
        historical_prices = fetch_historical_prices(ticker_symbol)

        # Create a generalized environment for each stock symbol
        env = StockPriceEnv(historical_prices)
        env_dict[ticker_symbol] = env

        # Create a Q-learning agent for each stock symbol
        num_actions = 21  # Number of discrete actions
        agent = QLearningAgent(num_actions, exploration_prob=0.5)  # Increase exploration probability
        agent_dict[ticker_symbol] = agent

    # Train agents for each stock symbol in batches
    total_episodes = 1000  # Total number of episodes to train
    episodes_per_batch = 50  # Number of episodes per batch

    weights_filename_prefix = "trained_agent_weights"
    train_agents_in_batches(agent_dict, env_dict, total_episodes, episodes_per_batch, weights_filename_prefix)

    # Load a trained agent for prediction (e.g., AAPL)
    load_trained_agent(agent_dict["AAPL"], "trained_agent_weights_AAPL.h5")

    # Using the trained agent for prediction (e.g., AAPL)
    future_prices = []
    state = env_dict["AAPL"].reset()
    num_days = 10
    for _ in range(num_days):
        action = agent_dict["AAPL"].choose_action(state)
        predicted_price = state[0] + action
        future_prices.append(predicted_price)
        state = np.array([predicted_price, state[1] + 1], dtype=np.float32)

    print(f"Predicted Stock Prices for AAPL for the next {num_days} days: {future_prices}")








Pausing for 1 minute between batches (1/40)...








Pausing for 1 minute between batches (2/40)...








Pausing for 1 minute between batches (3/40)...








Pausing for 1 minute between batches (4/40)...








Pausing for 1 minute between batches (5/40)...








Pausing for 1 minute between batches (6/40)...








Pausing for 1 minute between batches (7/40)...










Pausing for 1 minute between batches (8/40)...






Pausing for 1 minute between batches (9/40)...








Pausing for 1 minute between batches (10/40)...










Pausing for 1 minute between batches (11/40)...








Pausing for 1 minute between batches (12/40)...








Pausing for 1 minute between batches (13/40)...








Pausing for 1 minute between batches (14/40)...






Pausing for 1 minute between batches (15/40)...








Pausing for 1 minute between batches (16/40)...










Pausing for 1 minute between batches (17/40)...








Pausing for 1 minute between batches (18/40)...








Pausing for 1 minute between batches (19/40)...






Pausing for 1 minute between batches (20/40)...








Pausing for 1 minute between batches (21/40)...










Pausing for 1 minute between batches (22/40)...








Pausing for 1 minute between batches (23/40)...








Pausing for 1 minute between batches (24/40)...






Pausing for 1 minute between batches (25/40)...










Pausing for 1 minute between batches (26/40)...








Pausing for 1 minute between batches (27/40)...








Pausing for 1 minute between batches (28/40)...








Pausing for 1 minute between batches (29/40)...








Pausing for 1 minute between batches (30/40)...








Pausing for 1 minute between batches (31/40)...








Pausing for 1 minute between batches (32/40)...








Pausing for 1 minute between batches (33/40)...






Pausing for 1 minute between batches (34/40)...








Pausing for 1 minute between batches (35/40)...








Pausing for 1 minute between batches (36/40)...








Pausing for 1 minute between batches (37/40)...








Pausing for 1 minute between batches (38/40)...








Pausing for 1 minute between batches (39/40)...








Predicted Stock Prices for AAPL for the next 10 days: [166.77481079101562, 175.77481079101562, 176.77481079101562, 196.77481079101562, 200.77481079101562, 212.77481079101562, 219.77481079101562, 220.77481079101562, 221.77481079101562, 222.77481079101562]


In [14]:
# # Using the trained agent for prediction (e.g., AAPL)
# future_prices = []
# state = env_dict["AAPL"].reset()
# num_days = 1
# for _ in range(num_days):
#     action = agent_dict["AAPL"].choose_action(state)
#     predicted_price = state[0] + action
#     future_prices.append(predicted_price)
#     state = np.array([predicted_price, state[1] + 1], dtype=np.float32)

# print(f"Predicted Stock Prices for AAPL for the next {num_days} days: {future_prices}")

Predicted Stock Prices for AAPL for the next 1 days: [162.7748260498047]


# Final RL Agent

In [6]:
import numpy as np
import gym
import yfinance as yf
import tensorflow as tf
import time
import os
import pandas as pd

# Define the generalized Gym-like environment for stock price prediction
class StockPriceEnv(gym.Env):
    def __init__(self, historical_prices):
        super(StockPriceEnv, self).__init__()
        self.historical_prices = historical_prices
        self.current_step = 0
        self.max_steps = len(historical_prices) - 1  # Total prediction steps
        self.action_space = gym.spaces.Discrete(21)  # Number of discrete actions
        self.observation_space = gym.spaces.Box(low=0, high=np.inf, shape=(2,), dtype=np.float32)

    def reset(self):
        self.current_step = 0
        return self._get_observation()

    def _get_observation(self):
        return np.array([self.historical_prices[self.current_step], self.current_step], dtype=np.float32)

    def step(self, action):
        current_price = self.historical_prices[self.current_step]
        next_price = self.historical_prices[self.current_step + 1]
        reward = -abs(action - (next_price - current_price))

        self.current_step += 1
        done = self.current_step >= self.max_steps

        return self._get_observation(), reward, done, {}

# Define the Q-learning agent using TensorFlow
class QLearningAgent:
    def __init__(self, num_actions, learning_rate=0.1, discount_factor=0.8, exploration_prob=0.1):
        self.num_actions = num_actions
        self.learning_rate = learning_rate
        self.discount_factor = discount_factor
        self.exploration_prob = exploration_prob
        self.q_network = self._build_q_network()
        self.optimizer = tf.keras.optimizers.Adam(learning_rate=self.learning_rate)
    
    def _build_q_network(self):
        model = tf.keras.Sequential([
            tf.keras.layers.Dense(64, activation='relu', input_shape=(2,)),
            tf.keras.layers.Dense(self.num_actions)
        ])
        return model
    
    def choose_action(self, state):
        if np.random.rand() < self.exploration_prob:
            return np.random.choice(self.num_actions)
        else:
            q_values = self.q_network.predict(state.reshape(1, -1))
            return np.argmax(q_values)
    
    def train(self, state, action, reward, next_state):
        with tf.GradientTape() as tape:
            q_values = self.q_network(state.reshape(1, -1))
            target = reward + self.discount_factor * np.max(self.q_network(next_state.reshape(1, -1)))
            target = tf.convert_to_tensor(target, dtype=tf.float32)
            action = tf.convert_to_tensor(action, dtype=tf.int32)
            # Calculate the predicted Q-value for the chosen action
            predicted_q_value = tf.reduce_sum(q_values * tf.one_hot(action, self.num_actions), axis=1)
            loss = tf.keras.losses.mean_squared_error(target, predicted_q_value)
        grads = tape.gradient(loss, self.q_network.trainable_variables)
        self.optimizer.apply_gradients(zip(grads, self.q_network.trainable_variables))

# Function to preprocess data from DataFrames
def ARIMA_and_RL_Agent_Preprocess(tickers, start_date):
    filtered_dfs = {}  # Store filtered DataFrames for each ticker
    
    for ticker in tickers:
        # Read the data CSV for the current ticker
        df = pd.read_csv(f"{ticker}_Daily_Data.csv")

        # Convert the 'Date' column to datetime if it's not already
        df['Date'] = pd.to_datetime(df['Date'])

        # Filter the DataFrame to keep rows from the start_date onwards
        filtered_df = df[df['Date'] >= start_date]

        # Reset the index of the filtered DataFrame
        filtered_df.reset_index(drop=True, inplace=True)

        # Store the filtered DataFrame in the dictionary
        filtered_dfs[ticker] = filtered_df

    return filtered_dfs

# Training the agent for a single stock symbol
def train_agent(agent, env, total_episodes, episodes_per_batch, weights_directory, ticker_symbol, data_df):
    for batch in range(total_episodes // episodes_per_batch):
        start_time = time.time()
        
        for episode in range(episodes_per_batch):
            state = env.reset()
            total_reward = 0

            for _ in range(episodes_per_batch):
                action = agent.choose_action(state)
                next_state, reward, done, _ = env.step(action)
                agent.train(state, action, reward, next_state)
                total_reward += reward
                state = next_state

                if done:
                    break

#             # Print a message at the end of each agent's training
#             episode_time = time.time() - start_time
#             print(f'Agent for {ticker_symbol} finished training episode {episode+1} (batch {batch+1}/{total_episodes // episodes_per_batch}) in {episode_time:.2f} seconds')

        # Save the agent's model weights after each batch of episodes
        weights_filename = os.path.join(weights_directory, f'{ticker_symbol}.h5')
        agent.q_network.save_weights(weights_filename)

        # Pause for 5 minutes between training each agent
        if batch < total_episodes // episodes_per_batch - 1:
            print(f'Pausing for 1 minutes between training agents for {ticker_symbol}...')
            time.sleep(60)  # 1 minutes

        # Optionally release resources
        tf.keras.backend.clear_session()

if __name__ == "__main__":
    # Define a list of stock symbols
    ticker_symbols = ["MSFT", "ORCL"]  # Replace with your list of stock symbols

    # Directory to save agent models
    weights_directory = "trained_agent_models"
    os.makedirs(weights_directory, exist_ok=True)  # Create the directory if it doesn't exist

    # Create dictionaries to store agents and environments for each stock symbol
    agent_dict = {}
    env_dict = {}

    # Load data from DataFrames for each stock symbol
    filtered_data = ARIMA_and_RL_Agent_Preprocess(ticker_symbols, pd.to_datetime('2022-03-01'))

    for ticker_symbol in ticker_symbols:
        # Fetch historical stock prices from DataFrames
        historical_prices = filtered_data[ticker_symbol]['Close'].values.tolist()

        # Create a generalized environment for each stock symbol
        env = StockPriceEnv(historical_prices)
        env_dict[ticker_symbol] = env

        # Create a Q-learning agent for each stock symbol
        num_actions = 21  # Number of discrete actions
        agent = QLearningAgent(num_actions, exploration_prob=0.5)  # Increase exploration probability
        agent_dict[ticker_symbol] = agent

        # Train agents for each stock symbol
        total_episodes = 1000  # Total number of episodes to train
        episodes_per_batch = 50  # Number of episodes per batch

        train_agent(agent_dict[ticker_symbol], env_dict[ticker_symbol], total_episodes, episodes_per_batch, weights_directory, ticker_symbol, filtered_data[ticker_symbol])

        # Pause for 5 minutes between training each agent
        if ticker_symbol != ticker_symbols[-1]:
            print(f'Pausing for 5 minutes before training the next agent...')
            time.sleep(300)  # 5 minutes

    print("Training process completed for all agents.")

In [8]:
import os  # Added for directory handling
import pandas as pd
import yfinance as yf
import numpy as np
import tensorflow as tf
from datetime import datetime, timedelta

# Load the trained agent for prediction
def load_trained_agent(agent, weights_filename):
    agent.q_network.load_weights(weights_filename)

# Function to load a saved RL agent model and make future predictions
def make_future_predictions(stock_ticker, num_days, weights_directory):
    # Load the saved RL agent model for the chosen stock
    weights_filename = os.path.join(weights_directory, f'{stock_ticker}.h5')
    
    # Check if the agent model file exists
    if not os.path.exists(weights_filename):
        print(f"No trained agent model found for {stock_ticker}. Please train the agent first.")
        return None

    # Create the Q-learning agent
    num_actions = 21  # Number of discrete actions
    agent = QLearningAgent(num_actions, exploration_prob=0.1)  # Set the exploration probability as needed
    load_trained_agent(agent, weights_filename)

    # Fetch historical data for the chosen stock
    ticker = yf.Ticker(stock_ticker)
    df = ticker.history(interval='1d', start='2022-03-01')

    # Determine the last date in the historical data
    last_date = df.index[-1]

    # Generate date range for the next num_days days
    future_dates = pd.date_range(start=last_date + pd.DateOffset(days=1), periods=num_days)

    # Create an environment for prediction
    env = StockPriceEnv(df['Close'].values)

    # Make predictions for the chosen stock for the next num_days days
    future_prices = []

    for _ in range(num_days):
        action = agent.choose_action(env._get_observation())  # Use the environment's observation
        predicted_price = env._get_observation()[0] + action
        future_prices.append(predicted_price)
        state = np.array([predicted_price, env._get_observation()[1] + 1], dtype=np.float32)
        env.current_step += 1

    # Create a dictionary to store the date and predicted price
    predicted_prices = {}

    for i, date in enumerate(future_dates):
        predicted_prices[date.strftime('%Y-%m-%d')] = future_prices[i]

    return predicted_prices

if __name__ == "__main__":
    # Define the directory where agent models are saved
    weights_directory = "trained_agent_models"

    # Example usage:
    user_ticker = input("Enter a stock ticker (e.g., AAPL, MSFT, ORCL): ")
    user_days = int(input("Enter the number of future days for prediction: "))

    predictions = make_future_predictions(user_ticker, user_days, weights_directory)

    if predictions:
        # Print the predicted prices
        for date, price in predictions.items():
            print(f"Date: {date}, Predicted Price: {price}")


Enter a stock ticker (e.g., AAPL, MSFT, ORCL): ORCL
Enter the number of future days for prediction: 20
Date: 2023-09-30, Predicted Price: 74.13932037353516
Date: 2023-10-01, Predicted Price: 76.35344696044922
Date: 2023-10-02, Predicted Price: 76.3241958618164
Date: 2023-10-03, Predicted Price: 74.60750579833984
Date: 2023-10-04, Predicted Price: 72.49090576171875
Date: 2023-10-05, Predicted Price: 71.52527618408203
Date: 2023-10-06, Predicted Price: 74.22711181640625
Date: 2023-10-07, Predicted Price: 74.7635726928711
Date: 2023-10-08, Predicted Price: 75.90477752685547
Date: 2023-10-09, Predicted Price: 75.17322540283203
Date: 2023-10-10, Predicted Price: 78.6163330078125
Date: 2023-10-11, Predicted Price: 77.99211120605469
Date: 2023-10-12, Predicted Price: 78.6163330078125
Date: 2023-10-13, Predicted Price: 79.6697769165039
Date: 2023-10-14, Predicted Price: 78.96748352050781
Date: 2023-10-15, Predicted Price: 90.78681945800781
Date: 2023-10-16, Predicted Price: 78.41152954101562
D