<a href="https://colab.research.google.com/github/RatishMoondra/MS_Thesis/blob/main/collab_rl_algo.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
from keras.models import Sequential
from keras.layers import LSTM, Dense
import random
from collections import deque
import sqlite3
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'  # Suppress TensorFlow logging (1: INFO, 2: WARNING, 3: ERROR)


class StockTradingEnvironment:
    def __init__(self, df, window_size=14, initial_balance=1000):
        self.df = df
        self.window_size = window_size
        self.initial_balance = initial_balance
        self.reset()

    def reset(self):
        self.balance = self.initial_balance
        self.shares_owned = 0
        self.net_worth = self.balance
        self.current_step = self.window_size  # Start from window size to have enough history
        self.max_steps = len(self.df) - 1
        self.state = self.get_state()
        return self.state

    def get_state(self):
        start_idx = max(0, self.current_step - self.window_size)
        end_idx = self.current_step
        state = self.df['Close'].values[start_idx:end_idx]
        return np.reshape(state, (self.window_size, 1))

    def step(self, action):
        self.current_step += 1
        reward = 0
        print(f"(PRE) Current Step : {self.current_step} Action: {action} Balance {self.balance} Share Ownned {self.shares_owned} Close price {self.df['Close'][self.current_step]}")
        if action == 1:
            if self.balance >= self.df['Close'][self.current_step]:
                self.shares_owned += 1
                self.balance -= self.df['Close'][self.current_step]
            else:
                print(f"*****BALANCE<CLOSEPRICE")
                reward -= 5
        elif action == 2:
            if self.shares_owned > 0:
                self.shares_owned -= 1
                self.balance += self.df['Close'][self.current_step]
            else:
                print(f"*****NO SHARWS OWNED BUT SELLING****")
                reward = -5

        self.net_worth = self.balance + self.shares_owned * self.df['Close'][self.current_step]
        reward += (self.net_worth - self.initial_balance) / self.initial_balance

        print(f"(POST) Balance {self.balance} Share Ownned {self.shares_owned} Reward {reward} Net Worth {self.net_worth}")

        next_state = self.get_state()

        done = self.current_step == self.max_steps
        print(f"Initial Balance {self.initial_balance} / Balance {self.balance}/ Net_worth {self.net_worth} / Reward = {reward} ")
        return next_state, reward, done

class RLAgent:
    def __init__(self, state_size, action_size):
        self.state_size = state_size
        self.action_size = action_size
        self.memory = deque(maxlen=2000)
        self.gamma = 0.95
        self.epsilon = 1.0
        self.epsilon_min = 0.01
        self.epsilon_decay = 0.995
        self.model = self._build_model()

    def _build_model(self):
        model = Sequential([
            LSTM(50, input_shape=(self.state_size[0], self.state_size[1])),
            Dense(25, activation='relu'),
            Dense(self.action_size, activation='linear')
        ])
        model.compile(optimizer='adam', loss='mse')
        return model

    def remember(self, state, action, reward, next_state, done):
        self.memory.append((state, action, reward, next_state, done))

    def act(self, state):
        if np.random.rand() <= self.epsilon:
            return random.randrange(self.action_size)

        return np.argmax(self.model.predict(np.array([state]), verbose=0)[0])

    def replay(self, batch_size):
        minibatch = random.sample(self.memory, batch_size)
        for state, action, reward, next_state, done in minibatch:
            target = reward
            if not done:
                target = reward + self.gamma * np.amax(self.model.predict(np.array([next_state]), verbose=0)[0])
            target_f = self.model.predict(np.array([state]), verbose=0)
            target_f[0][action] = target
            self.model.fit(np.array([state]), target_f, epochs=1, verbose=0)
        if self.epsilon > self.epsilon_min:
            self.epsilon *= self.epsilon_decay

    def train(self, env, episodes, batch_size):
        for episode in range(episodes):
            print(f"Processing Episode# {episode}")
            state = env.reset()
            for step in range(env.max_steps):
                action = self.act(state)
                if action == 0:
                    agent_action = "hold"
                elif action == 1:
                    agent_action = "buy"
                elif action == 2:
                    agent_action = "sell"

                print(f"Processing Episode {episode} Step Number {step} Action {action} Agent Action {agent_action}")


                next_state, reward, done = env.step(action)
                self.remember(state, action, reward, next_state, done)
                state = next_state
                if done:
                    print(f"Episode: {episode+1}/{episodes}, Reward: {reward:.2f}, Epsilon: {self.epsilon:.2f}")
                    break
                if len(self.memory) > batch_size:
                    self.replay(batch_size)

# Connect to SQLite database
conn = sqlite3.connect('stock_data.db')
ticker='MSFT'
# Fetch data from database
cursor = conn.cursor()
cursor.execute(f"SELECT Close FROM new_prices WHERE StockName='{ticker}' ")
data = cursor.fetchall()
conn.close()

# Convert the data into a pandas DataFrame
df = pd.DataFrame(data, columns=['Close'])

# # Normalize the data
# scaler = MinMaxScaler(feature_range=(0, 1))
# scaled_data = scaler.fit_transform(df[['Close']])
# df['Close'] = scaled_data

# Initialize environment, agent, and train the agent
env = StockTradingEnvironment(df)
state_size = (env.window_size, 1)  # Make sure state_size is a tuple
action_size = 3  # 3 actions: hold, buy, sell
agent = RLAgent(state_size, action_size)
agent.train(env, episodes=1, batch_size=32)


OperationalError: no such table: new_prices