# AI Agent using Agentic AI

In [1]:
import yfinance as yf
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
import random
from collections import deque

In [3]:
# define symbol and time

symbol = "AAPL"
start_date = "2020-01-01"
end_date = "2025-02-24"


In [4]:
# Data Download

df = yf.download(symbol, start=start_date, end=end_date)

YF.download() has changed argument auto_adjust default to True


[*********************100%***********************]  1 of 1 completed


# Feature Engineering

In [5]:
df['SMA_5'] = df['Close'].rolling(window=5).mean()
df['SMA_20'] = df['Close'].rolling(window=20).mean()
df['Returns'] = df['Close'].pct_change()

In [6]:
df.dropna(inplace=True)
df.reset_index(drop=True, inplace=True)

# Defining Acion space

In [7]:
Actions = {0: "HOLD", 1: "BUY", 2: "SELL"}


In [8]:
def get_state(df, index):
    return np.array([
        float(df.loc[index, 'Close']),
        float(df.loc[index, 'SMA_5']),
        float(df.loc[index, 'SMA_20']),
        float(df.loc[index, 'Returns']),
    ])

# Building Environment for Agent

In [9]:
class TradingEnvironment:
    def __init__(self, df):
        self.df = df
        self.initial_balance = 10000
        self.balance = self.initial_balance
        self.holdings = 0
        self.index = 0

    def reset(self):
        self.balance = self.initial_balance
        self.holdings = 0
        self.index = 0
        return get_state(self.df, self.index)

    def step(self, action):
        price = float(self.df.loc[self.index, 'Close'])
        reward = 0

        if action == 1 and self.balance >= price:
            self.holdings = self.balance // price
            self.balance -= self.holdings * price
        elif action == 2 and self.holdings > 0:
            self.balance += self.holdings * price
            self.holdings = 0
        self.index += 1
        done = self.index >= len(self.df) - 1
        if done :
            reward = self.balance - self.initial_balance

        next_state = get_state(self.df, self.index) if not done else None
        return next_state, reward, done, {}

# Deep QNetwork

In [10]:
class DQN(nn.Module):
    def __init__(self, state_size, action_size):
        super(DQN, self).__init__()
        self.fc1 = nn.Linear(state_size, 64)
        self.fc2 = nn.Linear(64, 64)
        self.fc3 = nn.Linear(64, action_size)
    def forward(self, x):
        x = torch.relu(self.fc1(x))
        x = torch.relu(self.fc2(x))
        return self.fc3(x)

# DQN Agent

In [15]:
class DQNAgent:
    def __init__(self, state_size, action_size):
        self.state_size = state_size
        self.action_size = action_size
        self.memory = deque(maxlen = 2000)
        self.gamma = 0.95
        self.epsilon = 1.0
        self.epsilon_min = 0.995
        self.epsilon_decay = 0.995
        self.learning_rate = 0.001
        self.model = DQN(state_size, action_size)
        self.optimizer = optim.Adam(self.model.parameters(), lr=self.learning_rate)
        self.criterion = nn.MSELoss()

    def remember(self, state, action, reward, next_state, done):
        self.memory.append((state, action, reward, next_state, done))

    def act(self, state):
        if random.uniform(0, 1) < self.epsilon:
            return random.choice(list(Actions.keys()))
        state = torch.FloatTensor(state).unsqueeze(0)
        with torch.no_grad():
            q_values = self.model(state)
        return torch.argmax(q_values).item()
    def replay(self, batch_size):
        if len(self.memory) < batch_size:
            return
        minibatch = random.sample(self.memory, batch_size)

        for state, action, reward, next_state, done in minibatch:
            target = reward
            if not done:
                next_state_tensor = torch.FloatTensor(next_state).unsqueeze(0)
                target += self.gamma * torch.max(self.model(next_state_tensor)).item()

            state_tensor = torch.FloatTensor(state).unsqueeze(0)
            target_tensor = self.model(state_tensor).clone().detach()
            target_tensor[0][action] = target
            self.optimizer.zero_grad()
            output = self.model(state_tensor)
            loss = self.criterion(output, target_tensor)
            loss.backward()
            self.optimizer.step()

        if self.epsilon > self.epsilon_min:
            self.epsilon *= self.epsilon_decay


# Training

In [16]:
env = TradingEnvironment(df)
agent = DQNAgent(state_size=4, action_size=3)
batch_size = 32
episodes = 500
total_rewards = []

for episode in range(episodes):
    state = env.reset()
    done = False
    total_reward = 0

    while not done:
        action = agent.act(state)
        next_state, reward, done, _ = env.step(action)
        agent.remember(state, action, reward, next_state, done)
        state = next_state
        total_reward += reward

    agent.replay(batch_size)
    total_rewards.append(total_reward)
    print(f"Episode {episode+1}/{episodes}, Total Reward: {total_reward}")

print("Training Complete!")

  float(df.loc[index, 'Close']),
  float(df.loc[index, 'SMA_5']),
  float(df.loc[index, 'SMA_20']),
  float(df.loc[index, 'Returns']),
  price = float(self.df.loc[self.index, 'Close'])


Episode 1/500, Total Reward: -9939.841484069824
Episode 2/500, Total Reward: -9992.113983154297
Episode 3/500, Total Reward: -9919.872188568115
Episode 4/500, Total Reward: 9375.639671325684
Episode 5/500, Total Reward: -9784.97509765625
Episode 6/500, Total Reward: -9805.311935424805
Episode 7/500, Total Reward: -9845.643013000488
Episode 8/500, Total Reward: -9870.549018859863
Episode 9/500, Total Reward: 6640.612163543701
Episode 10/500, Total Reward: -9861.947647094727
Episode 11/500, Total Reward: 2516.5684089660645
Episode 12/500, Total Reward: -9988.677139282227
Episode 13/500, Total Reward: -9850.955074310303
Episode 14/500, Total Reward: -9875.424404144287
Episode 15/500, Total Reward: -9843.125370025635
Episode 16/500, Total Reward: -9815.525001525879
Episode 17/500, Total Reward: -9884.95157623291
Episode 18/500, Total Reward: -9828.735778808594
Episode 19/500, Total Reward: -9940.198749542236
Episode 20/500, Total Reward: -9927.276191711426
Episode 21/500, Total Reward: -99

In [18]:
test_env = TradingEnvironment(df)
state = test_env.reset()
done = False

while not done:
    action = agent.act(state)
    next_state, reward, done, _ = test_env.step(action)
    state = next_state if next_state is not None else state

final_balance = test_env.balance
profit = final_balance - test_env.initial_balance
print(f"Final Balance after testing: ${final_balance:.2f}")
print(f"Total Profit: ${profit:.2f}")

  float(df.loc[index, 'Close']),
  float(df.loc[index, 'SMA_5']),
  float(df.loc[index, 'SMA_20']),
  float(df.loc[index, 'Returns']),
  price = float(self.df.loc[self.index, 'Close'])


Final Balance after testing: $149.46
Total Profit: $-9850.54
