In [2]:
import gym
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow.keras import layers
import yfinance as yf
import statsmodels.api as sm
from statsmodels.tsa.arima_model import ARIMA

# Define StockTradingEnv class
class StockTradingEnv(gym.Env):
    def __init__(self, data):
        super(StockTradingEnv, self).__init__()
        self.data = data
        self.max_steps = len(data)
        self.current_step = None
        self.action_space = gym.spaces.Discrete(3)  # Buy, Sell, Hold
        self.observation_space = gym.spaces.Box(low=0, high=np.inf, shape=(5,))
        self.reset()

    def reset(self):
        self.current_step = 0
        self.balance = 10000
        self.shares_held = 0
        self.net_worth = self.balance
        self.stock_price = self.data['close'][self.current_step]
        self.history = np.array([self.balance, self.shares_held, self.stock_price, 0, 0])
        return self.history

    def step(self, action):
        self._take_action(action)
        self.current_step += 1
        self.stock_price = self.data['close'][self.current_step]
        self.net_worth = self.balance + self.shares_held * self.stock_price
        self.history = np.array([self.balance, self.shares_held, self.stock_price, self.net_worth, action])
        reward = self.net_worth - self.history[3]
        done = self.current_step == self.max_steps - 1
        return self.history, reward, done, {}

    def _take_action(self, action):
        if action == 0:  # Buy
            self.shares_held += 100
            self.balance -= self.stock_price * 100
        elif action == 1:  # Sell
            self.shares_held -= 100
            self.balance += self.stock_price * 100

# Define DQNAgent class
class DQNAgent:
    def __init__(self, state_size, action_size):
        self.state_size = state_size
        self.action_size = action_size
        self.model = self._build_model()

    def _build_model(self):
        model = tf.keras.Sequential([
            layers.Dense(24, input_dim=self.state_size, activation='relu'),
            layers.Dense(24, activation='relu'),
            layers.Dense(self.action_size, activation='linear')
        ])
        model.compile(loss='mse', optimizer=tf.keras.optimizers.Adam(lr=0.001))
        return model

    def act(self, state):
        q_values = self.model.predict(state)
        return np.argmax(q_values[0])

    def train(self, state, action, reward, next_state, done):
        target = reward
        if not done:
            target = reward + 0.95 * np.amax(self.model.predict(next_state)[0])
        target_f = self.model.predict(state)
        target_f[0][action] = target
        self.model.fit(state, target_f, epochs=1, verbose=0)

# Retrieve and save stock data
symbol = '^NSEBANK'
start_date = '2023-07-01'
end_date = '2023-08-10'

def get_stock_data(symbol, start_date, end_date):
    stock = yf.download(symbol, start=start_date, end=end_date)
    stock.reset_index(inplace=True)
    stock.rename(columns={'Date': 'date', 'Open': 'open', 'High': 'high', 'Low': 'low', 'Close': 'close', 'Volume': 'volume'}, inplace=True)
    return stock[['date', 'close']]

stock_data = get_stock_data(symbol, start_date, end_date)
stock_data.to_csv('stock_data.csv', index=False)

# Create the environment
data = pd.read_csv('stock_data.csv')
env = StockTradingEnv(data)

# Initialize the DQNAgent
state_size = env.observation_space.shape[0]
action_size = env.action_space.n
agent = DQNAgent(state_size, action_size)

# Training loop
num_episodes = 100
for episode in range(num_episodes):
    state = env.reset()
    state = np.reshape(state, [1, state_size])

    for step in range(env.max_steps):
        action = agent.act(state)
        next_state, reward, done, _ = env.step(action)
        next_state = np.reshape(next_state, [1, state_size])
        agent.train(state, action, reward, next_state, done)
        state = next_state

        if done:
            print(f"Episode: {episode}/{num_episodes}, Net Worth: {env.net_worth}")
            break

            
            

def fit_arima(data):
    # Fit an ARIMA model to historical stock price data
    # 'data' should be a pandas Series or DataFrame with the historical stock prices
    # You can adjust the order (p, d, q) of the ARIMA model based on your data and needs
    p, d, q = 1, 1, 1  # Example values, you might need to adjust these
    arima_model = ARIMA(data, order=(p, d, q))
    arima_model_fit = arima_model.fit()
    return arima_model_fit

def forecast_arima(model, steps):
    # Generate ARIMA forecasts for the next 'steps' time steps
    # 'model' should be the ARIMA model fitted using fit_arima
    # 'steps' is the number of steps into the future you want to forecast
    arima_forecasts = model.forecast(steps=steps)
    return arima_forecasts
    
    

# Using ARIMA forecasts for trading decisions
forecast_steps = 10 
arima_model_fit = fit_arima(stock_data['close'])
arima_forecasts = forecast_arima(arima_model_fit, forecast_steps)

print(f"Episode: {episode}/{num_episodes}, Step: {i}/{len(arima_forecasts)}")
print("State:", state)
print("Action:", action)
print("Reward:", reward)



% Combine ARIMA forecasts with RL environment
for i in range(len(arima_forecasts)):
    state = env.history
    state[2] = arima_forecasts[i]
    state = np.reshape(state, [1, state_size])

    action = agent.act(state)
    next_state, reward, done, _ = env.step(action)
    next_state = np.reshape(next_state, [1, state_size])
    agent.train(state, action, reward, next_state, done)

[*********************100%%**********************]  1 of 1 completed




Episode: 0/100, Net Worth: 961971.875
Episode: 1/100, Net Worth: 1113107.8125
Episode: 2/100, Net Worth: 1113107.8125
Episode: 3/100, Net Worth: 254134.765625
Episode: 4/100, Net Worth: 1028957.8125
Episode: 5/100, Net Worth: 41725.0
Episode: 6/100, Net Worth: 1020097.65625
Episode: 7/100, Net Worth: 1192457.03125
Episode: 8/100, Net Worth: 1113107.8125
Episode: 9/100, Net Worth: 11795.3125
Episode: 10/100, Net Worth: 11795.3125
Episode: 11/100, Net Worth: 883307.03125
Episode: 12/100, Net Worth: 1113107.8125
Episode: 13/100, Net Worth: 41725.0
Episode: 14/100, Net Worth: 11795.3125
Episode: 15/100, Net Worth: 11795.3125
Episode: 16/100, Net Worth: 11795.3125
Episode: 17/100, Net Worth: 11795.3125
Episode: 18/100, Net Worth: 11795.3125
Episode: 19/100, Net Worth: 11795.3125
Episode: 20/100, Net Worth: 11795.3125
Episode: 21/100, Net Worth: 11795.3125
Episode: 22/100, Net Worth: 11795.3125
Episode: 23/100, Net Worth: 11795.3125
Episode: 24/100, Net Worth: 11795.3125
Episode: 25/100, Ne

NotImplementedError: 
statsmodels.tsa.arima_model.ARMA and statsmodels.tsa.arima_model.ARIMA have
been removed in favor of statsmodels.tsa.arima.model.ARIMA (note the .
between arima and model) and statsmodels.tsa.SARIMAX.

statsmodels.tsa.arima.model.ARIMA makes use of the statespace framework and
is both well tested and maintained. It also offers alternative specialized
parameter estimators.
