# Optimizing Stock Portfolio Management Using a Dueling Deep Q-Network  

## The project utilizes reinforcement learning, specifically a Dueling Deep Q-Network, to optimize stock portfolio management, addressing market unpredictability and enhancing automated trading decisions through data acquisition, feature engineering, and performance evaluation.

## Importing the necessary libraries

In [1]:
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
from datetime import datetime, timedelta
import torch.nn.functional as F
from alpha_vantage.timeseries import TimeSeries

In [2]:
API_KEY = '769YEIDZFR0FE90Q'

## Data Acquisition
### This section uses the Alpha Vantage API to retrieve daily trading data for a variety of stocks over the past year, capturing critical metrics such as daily open/close prices, intraday high/low values, trading volume, and timestamps. Users can specify a stock symbol to dynamically fetch data relevant to their interests.

In [3]:
# Fetch stock data from Alpha Vantage API
# Define function for data acquisition
def download_stock_data(ticker, start_date, end_date):
    ts = TimeSeries(key=API_KEY, output_format='pandas')
    data, _ = ts.get_daily(symbol=ticker, outputsize='full')
    
    data = data.rename(columns={
        '1. open': 'Open',
        '2. high': 'High',
        '3. low': 'Low',
        '4. close': 'Close',
        '5. volume': 'Volume'
    })
    
    data.index = pd.to_datetime(data.index)
    data = data.sort_index()
    filtered_data = data[(data.index >= pd.to_datetime(start_date)) & 
                        (data.index <= pd.to_datetime(end_date))]
    
    return filtered_data

In [4]:
def compute_loss(predictions, targets):
    targets = targets.view(-1, 1).expand_as(predictions)
    loss = F.smooth_l1_loss(predictions, targets, reduction='none')
    return loss.mean()

In [5]:
def calculate_macd(prices, fast=12, slow=26, signal=9):
    exp1 = prices.ewm(span=fast, adjust=False).mean()
    exp2 = prices.ewm(span=slow, adjust=False).mean()
    macd = exp1 - exp2
    signal_line = macd.ewm(span=signal, adjust=False).mean()
    histogram = macd - signal_line
    return macd, signal_line, histogram

In [6]:
def calculate_rsi(prices, period=14):
    delta = prices.diff()
    gain = (delta.where(delta > 0, 0)).rolling(window=period).mean()
    loss = (-delta.where(delta < 0, 0)).rolling(window=period).mean()
    rs = gain / loss
    rsi = 100 - (100 / (1 + rs))
    return rsi.fillna(50)  

## Data Preprocessing
### To ensure data consistency and quality, preprocessing steps include handling missing values, detecting and treating outliers, and standardizing technical indicators. This prepares the data for model training and ensures stability across different market conditions.


### Feature Engineering
## This segment implements various technical indicators essential for the RL model, including:
## - **MACD (Moving Average Convergence Divergence)**: Captures momentum and trend shifts.
## - **RSI (Relative Strength Index)**: Signals overbought/oversold conditions.
## - **Moving Averages and Volatility**: Offer insights into trend direction and market risk.


In [7]:
def preprocess_data(data):
    df = data.copy()

    
    df['Original_Close'] = df['Close']

    df['Return'] = df['Close'].pct_change().fillna(0)
    df['MA5'] = df['Close'].rolling(window=5).mean().ffill()  
    df['MA20'] = df['Close'].rolling(window=20).mean().ffill()  
    df['RSI'] = calculate_rsi(df['Close'])
    df['Volatility'] = df['Return'].rolling(window=10).std().ffill()  

    df['Price_Momentum'] = (df['Close'] - df['MA20']) / df['MA20'].replace(0, np.nan)  

 
    df['Above_MA5'] = (df['Close'] > df['MA5']).astype(float)
    df['Above_MA20'] = (df['Close'] > df['MA20']).astype(float)


    df['MACD'], df['Signal_Line'], _ = calculate_macd(df['Close'])


    features_to_scale = ['Close', 'MA5', 'MA20', 'RSI', 'Volatility', 'Price_Momentum', 'MACD', 'Signal_Line']
    for column in features_to_scale:
        min_val = df[column].min()
        max_val = df[column].max()
        df[column] = (df[column] - min_val) / (max_val - min_val)
        
    df.interpolate(method='linear', inplace=True)

    return df

In [8]:
class NoisyLinear(nn.Module):
    def __init__(self, in_features, out_features, std_init=0.1):  
        super(NoisyLinear, self).__init__()
        self.in_features = in_features
        self.out_features = out_features
        self.std_init = std_init
        
        self.weight_mu = nn.Parameter(torch.Tensor(out_features, in_features))
        self.weight_sigma = nn.Parameter(torch.Tensor(out_features, in_features))
        self.register_buffer('weight_epsilon', torch.Tensor(out_features, in_features))
        
        self.bias_mu = nn.Parameter(torch.Tensor(out_features))
        self.bias_sigma = nn.Parameter(torch.Tensor(out_features))
        self.register_buffer('bias_epsilon', torch.Tensor(out_features))
        
        self.reset_parameters()
        self.reset_noise()
    
    def reset_parameters(self):
        mu_range = 1 / np.sqrt(self.in_features)
        self.weight_mu.data.uniform_(-mu_range, mu_range)
        self.weight_sigma.data.fill_(self.std_init / np.sqrt(self.in_features))
        self.bias_mu.data.uniform_(-mu_range, mu_range)
        self.bias_sigma.data.fill_(self.std_init / np.sqrt(self.out_features))
    
    def reset_noise(self):
        epsilon_in = self.scale_noise(self.in_features)
        epsilon_out = self.scale_noise(self.out_features)
        self.weight_epsilon.copy_(epsilon_out.outer(epsilon_in))
        self.bias_epsilon.copy_(self.scale_noise(self.out_features))
    
    def forward(self, x):
        if self.training:
            return F.linear(x, self.weight_mu + self.weight_sigma * self.weight_epsilon,
                            self.bias_mu + self.bias_sigma * self.bias_epsilon)
        else:
            return F.linear(x, self.weight_mu, self.bias_mu)
    
    @staticmethod
    def scale_noise(size):
        x = torch.randn(size)
        return x.sign().mul_(x.abs().sqrt_())

## Dueling Deep Q-Network Design
### This model leverages a Dueling Deep Q-Network (DDQN) with separate streams for estimating state value and action advantage. The structure includes shared feature extraction layers with specialized noise-injected transformations for robust exploration and convergence. This architecture is designed to improve generalization and decision accuracy under fluctuating market conditions.


In [9]:
class DuelingDQN(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(DuelingDQN, self).__init__()
        self.feature = nn.Sequential(
            NoisyLinear(input_size, hidden_size),
            nn.ReLU(),
            nn.LayerNorm(hidden_size),  
            NoisyLinear(hidden_size, hidden_size),
            nn.ReLU(),
            nn.LayerNorm(hidden_size)  
        )
        self.advantage = nn.Sequential(
            NoisyLinear(hidden_size, hidden_size),
            nn.ReLU(),
            nn.LayerNorm(hidden_size),  
            NoisyLinear(hidden_size, output_size)
        )
        self.value = nn.Sequential(
            NoisyLinear(hidden_size, hidden_size),
            nn.ReLU(),
            nn.LayerNorm(hidden_size),  
            NoisyLinear(hidden_size, 1)
        )
    
    def forward(self, x):
        x = self.feature(x)
        advantage = self.advantage(x)
        value = self.value(x)
        return value + advantage - advantage.mean()

    def reset_noise(self):
        for module in self.modules():
            if isinstance(module, NoisyLinear):
                module.reset_noise()

## Model Training
### The model is trained using mini-batch learning with gradient clipping and a smooth L1 loss function for robustness. A Boltzmann exploration strategy facilitates a balance between exploring new actions and exploiting known strategies.


In [10]:
def train_model(model, optimizer, X_train, y_train, epochs=100, batch_size=32):
    model.train()
    
    X_train_clean = X_train.replace([np.inf, -np.inf], np.nan).dropna()
    y_train_clean = y_train[X_train_clean.index]  

    
    dataset = torch.utils.data.TensorDataset(torch.tensor(X_train_clean.values, dtype=torch.float32), 
                                             torch.tensor(y_train_clean.values, dtype=torch.float32))
    train_loader = torch.utils.data.DataLoader(dataset, batch_size=batch_size, shuffle=True)
    
    for epoch in range(epochs):
        total_loss = 0
        num_batches = 0
        for batch in train_loader:
            inputs, target_actions = batch
            
           
            if torch.isnan(inputs).any() or torch.isinf(inputs).any() or torch.isnan(target_actions).any() or torch.isinf(target_actions).any():
                print("Skipping batch due to invalid input data.")
                continue
            
            optimizer.zero_grad()
            predictions = model(inputs)
            
            
            if torch.isnan(predictions).any() or torch.isinf(predictions).any():
                print("Skipping batch due to invalid predictions.")
                continue
            
            loss = compute_loss(predictions, target_actions)
            
            
            if torch.isnan(loss) or torch.isinf(loss):
                print(f"Skipping batch due to invalid loss: {loss.item()}")
                continue
            
            
            loss.backward()
            
            
            torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)
            
            optimizer.step()
            total_loss += loss.item()
            num_batches += 1
        
        if num_batches > 0:
            print(f"Epoch {epoch+1}/{epochs}, Loss: {total_loss/num_batches:.4f}")
        else:
            print(f"Epoch {epoch+1}/{epochs} - All batches skipped due to invalid loss")
        
        
        if num_batches == 0:
            print(f"Early stopping at epoch {epoch+1}")
            break

In [11]:
def boltzmann_exploration(q_values, temperature=1.0):
    q_values = q_values.detach().cpu().numpy()
    q_values = np.nan_to_num(q_values, nan=0.0, posinf=0.0, neginf=0.0)
    exp_q = np.exp(q_values / temperature)
    probs = exp_q / np.sum(exp_q)
    action = np.random.choice(len(probs), p=probs)
    return action

In [12]:
def calculate_sharpe_ratio(returns, risk_free_rate=0.02):
    excess_returns = returns - risk_free_rate / 252  # Assuming 252 trading days in a year
    
   
    mean_excess_return = excess_returns.mean()
    std_excess_return = excess_returns.std()

    
    if std_excess_return == 0:
        if mean_excess_return > 0:
            return np.inf  
        elif mean_excess_return < 0:
            return -np.inf  
        else:
            return 0.0  

    # Compute Sharpe Ratio
    sharpe_ratio = np.sqrt(252) * mean_excess_return / std_excess_return
    return sharpe_ratio


## Performance Evaluation
### The final model's effectiveness is evaluated using metrics such as the Sharpe Ratio for risk-adjusted returns and Maximum Drawdown for downside risk assessment. A portfolio simulation with transaction cost considerations is used to mimic real-world trading conditions.


In [13]:
def simulate_portfolio(model, test_data, initial_portfolio=5000, temperature=1.0):
    model.eval()
    portfolio_value = initial_portfolio
    portfolio_history = []
    shares_held = 0
    daily_returns = []
    
    print("\n=== Last 7 Days Portfolio Simulation ===")
    print(f"Initial Portfolio Value: ₹{portfolio_value:.2f}")
    
    for i in range(len(test_data)-1):
        current_price = test_data['Original_Close'].iloc[i]
        next_price = test_data['Original_Close'].iloc[i+1]
        
        with torch.no_grad():
            features = ['Close', 'MA5', 'MA20', 'RSI', 'Volatility', 'Price_Momentum', 'Above_MA5', 'Above_MA20', 'MACD', 'Signal_Line']
            current_data = torch.FloatTensor(test_data.iloc[i][features].values)
            q_values = model(current_data.unsqueeze(0)).squeeze()
            action = boltzmann_exploration(q_values, temperature)
        
        # More aggressive trading strategy with transaction costs
        transaction_cost = 0.001  # 0.1% transaction cost
        if action == 1:  # Buy signal
            if shares_held == 0:
                shares_to_buy = (portfolio_value * (1 - transaction_cost)) // current_price
                shares_held = shares_to_buy
                portfolio_value -= shares_to_buy * current_price * (1 + transaction_cost)
                trading_action = f"BUY {shares_to_buy} shares at ₹{current_price:.2f}"
            else:
                trading_action = "HOLD (Already invested)"
        elif action == 2:  # Sell signal
            if shares_held > 0:
                portfolio_value += shares_held * current_price * (1 - transaction_cost)
                trading_action = f"SELL {shares_held} shares at ₹{current_price:.2f}"
                shares_held = 0
            else:
                trading_action = "HOLD (No shares to sell)"
        else:
            trading_action = "HOLD"
        
        if shares_held > 0:
            portfolio_value = shares_held * current_price
        
        daily_return = (portfolio_value - initial_portfolio) / initial_portfolio
        daily_returns.append(daily_return)
        
        print(f"\nDay {i+1}: {test_data.index[i].strftime('%Y-%m-%d')}")
        print(f"Stock Price: ₹{current_price:.2f}")
        print(f"Action Taken: {trading_action}")
        print(f"Portfolio Value: ₹{portfolio_value:.2f}")
        
        portfolio_history.append({
            'date': test_data.index[i],
            'portfolio_value': portfolio_value,
            'action': trading_action,
            'price': current_price,
            'shares_held': shares_held
        })
    
    total_return = ((portfolio_value - initial_portfolio) / initial_portfolio) * 100
    sharpe_ratio = calculate_sharpe_ratio(pd.Series(daily_returns))
    
    print(f"\n=== Simulation Results ===")
    print(f"Starting Portfolio: ₹{initial_portfolio:.2f}")
    print(f"Final Portfolio: ₹{portfolio_value:.2f}")
    print(f"Total Return: {total_return:+.2f}%")
    print(f"Sharpe Ratio: {sharpe_ratio:.4f}")
    
    return portfolio_value, portfolio_history, sharpe_ratio

In [14]:
def get_recommendation(model, current_data, temperature=1.0):
    model.eval()  # Set the model to evaluation mode

    # Extract features for the current data
    features = ['Close', 'MA5', 'MA20', 'RSI', 'Volatility', 'Price_Momentum', 'Above_MA5', 'Above_MA20', 'MACD', 'Signal_Line']
    current_features = torch.FloatTensor(current_data[features].values).unsqueeze(0)  

    with torch.no_grad():
        q_values = model(current_features).squeeze() 
        action_probabilities = np.nan_to_num(np.exp(q_values.cpu().numpy() / temperature))
        action_probabilities /= action_probabilities.sum()  
        action = np.random.choice(len(action_probabilities), p=action_probabilities) 

    action_mapping = {0: "SELL", 1: "HOLD", 2: "BUY"}
    return action_mapping[action], action_probabilities

In [15]:
# List of famous stocks and their symbols
stocks = {
    "Reliance Global Group Inc": "RELI",
    "Infosys": "INFY",
    "HDFC Bank": "HDB",
    "ICICI Bank": "IBN",
    "Apple Inc.": "AAPL",
    "Microsoft Corp.": "MSFT",
    "Amazon.com Inc.": "AMZN",
    "Alphabet Inc. (Google)": "GOOGL",
    "Tesla Inc.": "TSLA",
    "Nvidia Corp.": "NVDA",
    "Coca-Cola Company": "KO"
}

# Prompt user for input
print("Please choose a stock from the following list:")
for stock, symbol in stocks.items():
    print(f"{stock} ({symbol})")

# Get user input
ticker = input("Enter the stock symbol (e.g., AAPL, RELIANCE): ").strip().upper()

# Validate user input
if ticker in stocks.values():
    print(f"You selected: {ticker}")
else:
    print("Invalid stock symbol. Please check the symbol and try again.")

Please choose a stock from the following list:
Reliance Global Group Inc (RELI)
Infosys (INFY)
HDFC Bank (HDB)
ICICI Bank (IBN)
Apple Inc. (AAPL)
Microsoft Corp. (MSFT)
Amazon.com Inc. (AMZN)
Alphabet Inc. (Google) (GOOGL)
Tesla Inc. (TSLA)
Nvidia Corp. (NVDA)
Coca-Cola Company (KO)
You selected: AAPL


In [17]:
end_date = (datetime.now() - timedelta(days=1)).strftime('%Y-%m-%d')
start_date = (datetime.now() - timedelta(days=365)).strftime('%Y-%m-%d')
        

print("\n=== Stock Trading Analysis ===")
print(f"Analyzing data from {start_date} to {end_date}")
stock_data = download_stock_data(ticker, start_date, end_date)
processed_data = preprocess_data(stock_data)


test_data = processed_data.tail(7)  # selects the last 6 rows from processed_data
train_data = processed_data.iloc[:-7]  # uses all but the last 6 rows for training

# Update feature and target variable definitions as before
features = ['Close', 'MA5', 'MA20', 'RSI', 'Volatility', 'Price_Momentum', 'Above_MA5', 'Above_MA20', 'MACD', 'Signal_Line']   
X_train = train_data[features]
y_train = train_data['Return']

input_size = len(features)
hidden_size = 64  
output_size = 3  # Buy, Hold, Sell
model = DuelingDQN(input_size, hidden_size, output_size)
optimizer = optim.Adam(model.parameters(), lr=0.0005, weight_decay=1e-5) 

print("\nTraining model on historical data...")
train_model(model, optimizer, X_train, y_train, epochs=100, batch_size=32)

print("\nSimulating portfolio performance...")
final_portfolio_value, portfolio_history, sharpe_ratio = simulate_portfolio(model, test_data, initial_portfolio=5000, temperature=0.5)


=== Stock Trading Analysis ===
Analyzing data from 2023-11-03 to 2024-11-01

Training model on historical data...
Epoch 1/100, Loss: 0.0965
Epoch 2/100, Loss: 0.0341
Epoch 3/100, Loss: 0.0101
Epoch 4/100, Loss: 0.0045
Epoch 5/100, Loss: 0.0030
Epoch 6/100, Loss: 0.0012
Epoch 7/100, Loss: 0.0015
Epoch 8/100, Loss: 0.0009
Epoch 9/100, Loss: 0.0007
Epoch 10/100, Loss: 0.0005
Epoch 11/100, Loss: 0.0005
Epoch 12/100, Loss: 0.0007
Epoch 13/100, Loss: 0.0004
Epoch 14/100, Loss: 0.0003
Epoch 15/100, Loss: 0.0003
Epoch 16/100, Loss: 0.0005
Epoch 17/100, Loss: 0.0004
Epoch 18/100, Loss: 0.0003
Epoch 19/100, Loss: 0.0002
Epoch 20/100, Loss: 0.0003
Epoch 21/100, Loss: 0.0004
Epoch 22/100, Loss: 0.0006
Epoch 23/100, Loss: 0.0007
Epoch 24/100, Loss: 0.0009
Epoch 25/100, Loss: 0.0012
Epoch 26/100, Loss: 0.0008
Epoch 27/100, Loss: 0.0005
Epoch 28/100, Loss: 0.0004
Epoch 29/100, Loss: 0.0003
Epoch 30/100, Loss: 0.0003
Epoch 31/100, Loss: 0.0003
Epoch 32/100, Loss: 0.0002
Epoch 33/100, Loss: 0.0002
Epo