<a href="https://colab.research.google.com/github/Harshit-jain-1/Stock-market-portfolio/blob/main/AI_agent_with_agentic_AI_for_stock.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
# import library
import yfinance as yf
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
import random
from collections import deque

In [2]:
# define stock symbol and time period
symbol = "GOOG"
start_date = "2020-01-01"
end_date = "2025-03-01"

# download historical data
data = yf.download(symbol, start=start_date, end=end_date)

YF.download() has changed argument auto_adjust default to True


[*********************100%***********************]  1 of 1 completed


In [3]:
# feature engineering
data['SMA_5'] = data['Close'].rolling(window=5).mean()
data['SMA_20'] = data['Close'].rolling(window=20).mean()
data['Returns'] = data['Close'].pct_change()

In [4]:
# drop NaN values and reset index
data.dropna(inplace=True)
data.reset_index(drop=True, inplace=True)

In [5]:
# define action space
ACTIONS = {0: "HOLD", 1: "BUY", 2: "SELL"}

In [6]:
# get state function
def get_state(data, index):
    return np.array([
        float(data.loc[index, 'Close']),
        float(data.loc[index, 'SMA_5']),
        float(data.loc[index, 'SMA_20']),
        float(data.loc[index, 'Returns'])
    ])

In [7]:
# trading environment
class TradingEnvironment:
    def __init__(self, data):
        self.data = data # The historical stock data (e.g., closing prices, moving averages, etc.)
        self.initial_balance = 10000 # Starting balance in dollars
        self.balance = self.initial_balance # Current balance (starts with the initial balance)
        self.holdings = 0 # Amount of stock held (starts with 0)
        self.index = 0 # The current index (position in the data)

    def reset(self):
        self.balance = self.initial_balance # Reset balance to the initial value
        self.holdings = 0 # Reset holdings to 0 (agent is not holding any stock)
        self.index = 0 # Reset the index to the first data point
        return get_state(self.data, self.index) # Return the initial state

    def step(self, action):
        price = float(self.data.loc[self.index, 'Close']) # Get the current price (closing price)
        reward = 0 # Initialize reward to 0

        # Perform the action: BUY (1) or SELL (2)
        if action == 1 and self.balance >= price:  # BUY
            self.holdings = self.balance // price # Calculate how many shares can be bought
            self.balance -= self.holdings * price # Subtract the cost of the shares from the balance
        elif action == 2 and self.holdings > 0:  # SELL
            self.balance += self.holdings * price # Add the revenue from selling the shares to the balance
            self.holdings = 0 # Reset holdings to 0 after selling

        self.index += 1 # Move to the next day in the data
        done = self.index >= len(self.data) - 1 # Check if the simulation is complete (end of data)

        if done:
            reward = self.balance - self.initial_balance # Reward is the profit/loss

        next_state = get_state(self.data, self.index) if not done else None
        return next_state, reward, done, {}

In [8]:
# deep q-network
class DQN(nn.Module):
    def __init__(self, state_size, action_size):
        super(DQN, self).__init__()
        self.fc1 = nn.Linear(state_size, 64)  # First fully connected layer
        self.fc2 = nn.Linear(64, 64)  # Second fully connected layer
        self.fc3 = nn.Linear(64, action_size)  # Output layer

    def forward(self, x):
        x = torch.relu(self.fc1(x))  # Apply ReLU activation to the first layer
        x = torch.relu(self.fc2(x))  # Apply ReLU activation to the second layer
        return self.fc3(x)  # Output layer returns the Q-values

In [9]:
# DQN agent
class DQNAgent:
    def __init__(self, state_size, action_size):
        self.state_size = state_size
        self.action_size = action_size
        self.memory = deque(maxlen=2000) # Memory buffer with max size of 2000
        self.gamma = 0.95  # Discount factor
        self.epsilon = 1.0  # Exploration rate
        self.epsilon_min = 0.01 # Minimum value for epsilon (exploration)
        self.epsilon_decay = 0.995 # Decay rate for epsilon
        self.learning_rate = 0.001 # Learning rate for model optimization
        self.model = DQN(state_size, action_size) # The DQN model (Q-network)
        self.optimizer = optim.Adam(self.model.parameters(), lr=self.learning_rate)
        self.criterion = nn.MSELoss()

    def remember(self, state, action, reward, next_state, done):
        self.memory.append((state, action, reward, next_state, done))

    def act(self, state):
        if random.uniform(0, 1) < self.epsilon:
          return random.choice(list(ACTIONS.keys()))  # Exploration: random action
        state = torch.FloatTensor(state).unsqueeze(0)  # Convert state to tensor and add batch dimension
        with torch.no_grad():
          q_values = self.model(state)  # Predict Q-values for each action
        return torch.argmax(q_values).item()  # Exploitation: return the action with the highest Q-value

    def replay(self, batch_size):
        if len(self.memory) < batch_size:
            return
        minibatch = random.sample(self.memory, batch_size)

        for state, action, reward, next_state, done in minibatch:
            target = reward
            if not done:
                next_state_tensor = torch.FloatTensor(next_state).unsqueeze(0)
                target += self.gamma * torch.max(self.model(next_state_tensor)).item()

            state_tensor = torch.FloatTensor(state).unsqueeze(0) # Convert state to tensor
            target_tensor = self.model(state_tensor).clone().detach()  # Clone model's Q-values
            target_tensor[0][action] = target  # Update the target for the specific action

            self.optimizer.zero_grad()  # Zero the gradients of the model
            output = self.model(state_tensor)  # Get the current Q-values for the state
            loss = self.criterion(output, target_tensor)  # Calculate the loss (MSE between Q-values)
            loss.backward()  # Backpropagate the loss
            self.optimizer.step()  # Update the model's parameters

        if self.epsilon > self.epsilon_min:
            self.epsilon *= self.epsilon_decay

In [12]:
# train the agent
env = TradingEnvironment(data)
agent = DQNAgent(state_size=4, action_size=3)
batch_size = 32
episodes = 1000
total_rewards = []

for episode in range(episodes):
    state = env.reset()
    done = False
    total_reward = 0

    while not done:
        action = agent.act(state)
        next_state, reward, done, _ = env.step(action)
        agent.remember(state, action, reward, next_state, done)
        state = next_state
        total_reward += reward

    agent.replay(batch_size)
    total_rewards.append(total_reward)
    print(f"Episode {episode+1}/{episodes}, Total Reward: {total_reward}")

print("Training Complete!")

  float(data.loc[index, 'Close']),
  float(data.loc[index, 'SMA_5']),
  float(data.loc[index, 'SMA_20']),
  float(data.loc[index, 'Returns'])
  price = float(self.data.loc[self.index, 'Close']) # Get the current price (closing price)


Episode 1/1000, Total Reward: -9987.103031158447
Episode 2/1000, Total Reward: -9914.385829925537
Episode 3/1000, Total Reward: -9898.291049957275
Episode 4/1000, Total Reward: -9903.004447937012
Episode 5/1000, Total Reward: -9914.729091644287
Episode 6/1000, Total Reward: -9881.305938720703
Episode 7/1000, Total Reward: -9902.10478591919
Episode 8/1000, Total Reward: -9884.3890914917
Episode 9/1000, Total Reward: -9794.944774627686
Episode 10/1000, Total Reward: -9909.824466705322
Episode 11/1000, Total Reward: -9887.926235198975
Episode 12/1000, Total Reward: -9885.515453338623
Episode 13/1000, Total Reward: -9903.89836883545
Episode 14/1000, Total Reward: -9768.748672485352
Episode 15/1000, Total Reward: -9897.36173248291
Episode 16/1000, Total Reward: -9994.958473205566
Episode 17/1000, Total Reward: -9934.299583435059
Episode 18/1000, Total Reward: -9933.639183044434
Episode 19/1000, Total Reward: -9870.354633331299
Episode 20/1000, Total Reward: -9980.978504180908
Episode 21/100

In [13]:
# create a fresh environment instance for testing
test_env = TradingEnvironment(data)
state = test_env.reset()
done = False

# simulate a trading session using the trained agent
while not done:
    # always choose the best action (exploitation)
    action = agent.act(state)
    next_state, reward, done, _ = test_env.step(action)
    state = next_state if next_state is not None else state

final_balance = test_env.balance
profit = final_balance - test_env.initial_balance
print(f"Final Balance after testing: ${final_balance:.2f}")
print(f"Total Profit: ${profit:.2f}")

  float(data.loc[index, 'Close']),
  float(data.loc[index, 'SMA_5']),
  float(data.loc[index, 'SMA_20']),
  float(data.loc[index, 'Returns'])
  price = float(self.data.loc[self.index, 'Close']) # Get the current price (closing price)


Final Balance after testing: $15127.47
Total Profit: $5127.47
