In [None]:
!pip install yfinance torch numpy pandas




In [None]:
import yfinance as yf
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
import random
from collections import deque


In [None]:
# define stock symbol and time period
symbol = "AAPL"
start_date = "2020-01-01"
end_date = "2025-02-14"

# download historical data
data = yf.download(symbol, start=start_date, end=end_date)




  data = yf.download(symbol, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


In [None]:
print(data)

Price            Close        High         Low        Open     Volume
Ticker            AAPL        AAPL        AAPL        AAPL       AAPL
Date                                                                 
2020-01-02   72.468277   72.528597   71.223274   71.476615  135480400
2020-01-03   71.763718   72.523746   71.539330   71.696160  146322800
2020-01-06   72.335548   72.374154   70.634532   70.885464  118387200
2020-01-07   71.995377   72.600983   71.775811   72.345227  108872000
2020-01-08   73.153503   73.455103   71.698589   71.698589  132079200
...                ...         ...         ...         ...        ...
2025-02-07  226.605133  232.946448  226.236789  231.552758   39707200
2025-02-10  226.874191  229.804175  226.425728  228.787662   33115600
2025-02-11  231.827255  234.428361  227.352566  227.422320   53718400
2025-02-12  236.062775  236.152480  229.893867  230.412099   45243300
2025-02-13  240.706894  241.514131  234.767213  236.102643   53614100

[1287 rows x 5 colu

In [None]:
data.head()

Price,Close,High,Low,Open,Volume
Ticker,AAPL,AAPL,AAPL,AAPL,AAPL
Date,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2
2020-01-02,72.468277,72.528597,71.223274,71.476615,135480400
2020-01-03,71.763718,72.523746,71.53933,71.69616,146322800
2020-01-06,72.335548,72.374154,70.634532,70.885464,118387200
2020-01-07,71.995377,72.600983,71.775811,72.345227,108872000
2020-01-08,73.153503,73.455103,71.698589,71.698589,132079200


In [None]:
# feature engineering
data['SMA_5'] = data['Close'].rolling(window=5).mean()
data['SMA_20'] = data['Close'].rolling(window=20).mean()
data['Returns'] = data['Close'].pct_change()

# clean data
data.dropna(inplace=True)
data.reset_index(drop=True, inplace=True)

data.head()


Price,Close,High,Low,Open,Volume,SMA_5,SMA_20,Returns
Ticker,AAPL,AAPL,AAPL,AAPL,AAPL,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
0,78.143158,78.19624,76.907809,77.339701,126743200,76.879333,75.287975,-0.001449
1,74.678383,77.856028,74.384022,77.433789,199588400,76.454686,75.398481,-0.044339
2,74.473289,75.638666,72.91945,73.421308,173788400,76.440688,75.533959,-0.002746
3,76.931923,77.122536,75.672444,76.077792,136616400,76.496664,75.763778,0.033014
4,77.55928,78.357915,76.956082,78.058723,118826800,76.357207,76.041973,0.008155


In [None]:
# action space
ACTIONS = {0: "HOLD", 1: "BUY", 2: "SELL"}

# get state function
def get_state(data, index):
    return np.array([
        float(data.loc[index, 'Close']),
        float(data.loc[index, 'SMA_5']),
        float(data.loc[index, 'SMA_20']),
        float(data.loc[index, 'Returns'])
    ])


In [None]:
class TradingEnvironment:
    def __init__(self, data):
        self.data = data
        self.initial_balance = 10000
        self.balance = self.initial_balance
        self.holdings = 0
        self.index = 0

    def reset(self):
        self.balance = self.initial_balance
        self.holdings = 0
        self.index = 0
        return get_state(self.data, self.index)

    def step(self, action):
        price = float(self.data.loc[self.index, 'Close'])
        reward = 0

        if action == 1 and self.balance >= price:  # BUY
            self.holdings = self.balance // price
            self.balance -= self.holdings * price

        elif action == 2 and self.holdings > 0:  # SELL
            self.balance += self.holdings * price
            self.holdings = 0

        self.index += 1
        done = self.index >= len(self.data) - 1

        if done:
            reward = self.balance - self.initial_balance

        next_state = get_state(self.data, self.index) if not done else None
        return next_state, reward, done, {}


In [None]:
#nn.Module is PyTorch’s base class for all neural networks.

class DQN(nn.Module):
    def __init__(self, state_size, action_size):
        
        #Initializes PyTorch’s internal stuff, (Always needed when using nn.Module), Calls parent constructor
        super(DQN, self).__init__()
        
        self.fc1 = nn.Linear(state_size, 64)
        self.fc2 = nn.Linear(64, 64)
        self.fc3 = nn.Linear(64, action_size)

    def forward(self, x):
        x = torch.relu(self.fc1(x))
        x = torch.relu(self.fc2(x))
        return self.fc3(x)


In [None]:
class DQNAgent:
    def __init__(self, state_size, action_size):
        self.state_size = state_size
        self.action_size = action_size
        self.memory = deque(maxlen=2000)
        
        #discount factor : Future rewards matter 95% as much as current reward.
        self.gamma = 0.95
        
        #Exploration settings: randomness in action selection.
        self.epsilon = 1.0
        self.epsilon_min = 0.01
        self.epsilon_decay = 0.995
        
        #Learning rate:
        self.learning_rate = 0.001


        self.model = DQN(state_size, action_size)
        self.optimizer = optim.Adam(self.model.parameters(), lr=self.learning_rate) #Optimizer
        self.criterion = nn.MSELoss() #Loss function

    def remember(self, state, action, reward, next_state, done):
        #store experience - Called after every step.
        self.memory.append((state, action, reward, next_state, done))

    def act(self, state):
        if random.random() < self.epsilon:
            return random.choice(list(ACTIONS.keys()))
        state = torch.FloatTensor(state).unsqueeze(0)
        with torch.no_grad():
            q_values = self.model(state)
        return torch.argmax(q_values).item()

    def replay(self, batch_size):
        
        #If memory too small - skip training.
        if len(self.memory) < batch_size:
            return

        #Pick random experiences to learn from, This avoids correlation problems.
        minibatch = random.sample(self.memory, batch_size)

        for state, action, reward, next_state, done in minibatch:
            target = reward
            if not done:
                next_state = torch.FloatTensor(next_state).unsqueeze(0)
                target += self.gamma * torch.max(self.model(next_state)).item()
                
                '''
                Q-learning formula:
                target = reward + γ * best future Q-value
                '''

            state = torch.FloatTensor(state).unsqueeze(0)
            target_f = self.model(state).clone().detach()
            target_f[0][action] = target

            self.optimizer.zero_grad()
            output = self.model(state)
            loss = self.criterion(output, target_f)
            loss.backward()
            self.optimizer.step()

        if self.epsilon > self.epsilon_min:
            self.epsilon *= self.epsilon_decay


In [None]:
env = TradingEnvironment(data)
agent = DQNAgent(state_size=4, action_size=3)

episodes = 500
batch_size = 32

for episode in range(episodes):
    state = env.reset()
    done = False
    total_reward = 0

    while not done:
        action = agent.act(state)
        next_state, reward, done, _ = env.step(action)
        agent.remember(state, action, reward, next_state, done)
        state = next_state
        total_reward += reward

    agent.replay(batch_size)
    print(f"Episode {episode+1}/{episodes}, Reward: {total_reward}")

print("Training completed!")


  float(data.loc[index, 'Close']),
  float(data.loc[index, 'SMA_5']),
  float(data.loc[index, 'SMA_20']),
  float(data.loc[index, 'Returns'])
  price = float(self.data.loc[self.index, 'Close'])


Episode 1/500, Reward: -9920.567775726318
Episode 2/500, Reward: -9854.000030517578
Episode 3/500, Reward: -9742.861263275146
Episode 4/500, Reward: -9782.529434204102
Episode 5/500, Reward: -9883.57027053833
Episode 6/500, Reward: -9854.499488830566
Episode 7/500, Reward: -9791.499031066895
Episode 8/500, Reward: -9910.090469360352
Episode 9/500, Reward: -9855.78706741333
Episode 10/500, Reward: -9867.057849884033
Episode 11/500, Reward: -9851.162780761719
Episode 12/500, Reward: -9924.620738983154
Episode 13/500, Reward: -9922.126449584961
Episode 14/500, Reward: -9788.55874633789
Episode 15/500, Reward: -9939.798225402832
Episode 16/500, Reward: -9852.947940826416
Episode 17/500, Reward: 8756.373840332031
Episode 18/500, Reward: -9885.100708007812
Episode 19/500, Reward: -9861.95877456665
Episode 20/500, Reward: -9792.254001617432
Episode 21/500, Reward: -9814.118503570557
Episode 22/500, Reward: -9824.084381103516
Episode 23/500, Reward: -9922.078029632568
Episode 24/500, Reward: -

In [None]:
test_env = TradingEnvironment(data)
state = test_env.reset()
done = False

while not done:
    action = agent.act(state)
    next_state, reward, done, _ = test_env.step(action)
    state = next_state if next_state is not None else state

final_balance = test_env.balance
profit = final_balance - test_env.initial_balance

print(f"Final Balance: ${final_balance:.2f}")
print(f"Profit: ${profit:.2f}")


  float(data.loc[index, 'Close']),
  float(data.loc[index, 'SMA_5']),
  float(data.loc[index, 'SMA_20']),
  float(data.loc[index, 'Returns'])
  price = float(self.data.loc[self.index, 'Close'])


Final Balance: $13240.61
Profit: $3240.61
