# Import pip install required

In [1]:
import numpy as np
import pandas as pd
import random
import gym
from sklearn.ensemble import RandomForestClassifier
import matplotlib.pyplot as plt
import plotly.express as px
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Input
import yfinance as yf
import time


#### This Python function, secret_message(), is designed to print an introductory message for a trading bot called Delta φ. The function emphasizes continuous learning and suggests that higher-tier subscriptions offer deeper market analysis and better profitability.

In [2]:
def secret_message():
    print("Welcome to the delta φ trading bot! Keep learning, stay profitable!")
    print("If you're subscribed to a higher tier, the analysis is deeper, and the profits muchhhhh greater.")
    print("Unlock advanced strategies and become a master trader!")

## Overview

The TradingEnvironment class defines a reinforcement learning environment for simulating stock trading. It uses OpenAI Gym to structure the environment and allows agents to interact by taking buy, sell, or hold actions based on stock price data.

## Key Features:

* State Representation: Uses stock price data as input features (excluding epoch time).

* Actions: The agent can:

  0: Buy a share if balance allows.

  1: Sell a share if holdings exist.

  2: Hold position.

* Rewards: The agent is rewarded based on net worth change.

* Episode Termination: The environment ends when the dataset is exhausted.

## Possible Enhancements:

* Transaction Costs: Introduce commission fees for realistic simulations.

* Multiple Stock Support: Extend to handle a portfolio of assets.

* Feature Engineering: Include technical indicators like Moving Averages or RSI.

* Visualization: Plot performance metrics over time.

In [8]:
class TradingEnvironment(gym.Env):
    def __init__(self, df):
        print("Initializing Trade Environment...")
        super(TradingEnvironment, self).__init__()
        self.df = df
        self.current_step = 0
        self.balance = 10000  # Starting balance in USD
        self.shares_held = 0
        self.net_worth = self.balance
        self.action_space = gym.spaces.Discrete(3)  # 3 actions: 0 = Buy, 1 = Sell, 2 = Hold
        self.observation_space = gym.spaces.Box(low=0, high=1, shape=(len(df.columns)-1,), dtype=np.float32)

    def reset(self):
        print("Resetting environment...")
        self.current_step = 0
        self.balance = 10000
        self.shares_held = 0
        self.net_worth = self.balance
        return self.df.iloc[self.current_step].values[1:]  # Remove epoch_time

    def step(self, action):
        self.current_step += 1
        done = self.current_step >= len(self.df) - 1

        prev_net_worth = self.net_worth
        current_price = self.df.iloc[self.current_step]['Close'].item()  # Extract scalar value
                                  #current_price = self.df.iloc[self.current_step]['Close'].item()  # Extract scalar value

        reward = 0

        if action == 0 and self.balance >= current_price:  # Buy
            self.shares_held += 1
            self.balance -= current_price
        elif action == 1 and self.shares_held > 0:  # Sell
            self.shares_held -= 1
            self.balance += current_price

        self.net_worth = self.balance + self.shares_held * current_price
        reward = self.net_worth - prev_net_worth

        print(f"Balance: {self.balance}, Net Worth: {self.net_worth}, Reward: {reward}")

        return self.df.iloc[self.current_step].values[1:], reward, done, {}  # Remove epoch_time

## Load Stock Data Function
    The load_data function retrieves historical stock price data for a given stock symbol using Yahoo Finance (yfinance). It returns a processed DataFrame that includes time-based and market price features.

## Key Features:

* Retrieves stock market data using the Yahoo Finance API.

* Handles errors when data retrieval fails.

* Formats the DataFrame by selecting essential columns (Open, High, Low, Close, Volume).

* Converts timestamps into epoch time for numerical processing. 

## Possible Enhancements:

* Parameterization: Allow users to specify start/end dates.

* Data Validation: Check for missing values and handle them appropriately.

* Feature Engineering: Compute moving averages or other technical indicators.

In [9]:
def load_data(stock_symbol='AAPL', interval='1m', period='5d'):
    print(f"Loading data for {stock_symbol}...")
    df = yf.download(stock_symbol, interval=interval, period=period)
    
    if df.empty:
        print(f"Data retrieval failed for {stock_symbol}. Please try again.")
        return None

    df = df[['Open', 'High', 'Low', 'Close', 'Volume']].copy()
    df['epoch_time'] = df.index.astype('int64') // 10**9  # Fix for epoch time error
    df = df[['epoch_time', 'Open', 'High', 'Low', 'Close', 'Volume']]
    
    print(f"Data loaded for {stock_symbol}.")
    return df

## Deep Q-Network (DQN) Agent Implementation
   Implements a Deep Q-Network (DQN) agent using Keras and TensorFlow to solve reinforcement learning problems. The agent interacts with an environment, learns from experience, and improves its decision-making through Q-learning.
  
## Component Overview

* __init__ → Initializes state/action sizes, memory, and hyperparameters. Builds the neural network.

* build_model → Defines a feedforward neural network with two hidden layers (ReLU) and an output layer (linear). Uses MSE loss & Adam optimizer.

* act → Chooses an action using ε-greedy (random action with probability ε, otherwise best action from the model).

* remember → Stores experiences (state, action, reward, next_state, done) for replay.

* replay → Samples a batch, updates Q-values using the Bellman equation, and trains the model. Decreases ε over time.

In [10]:
# Reinforcement Learning Agent (DQN)
class DQNAgent:
    def __init__(self, state_size, action_size):
        print("Initializing DQN Agent...")
        self.state_size = state_size
        self.action_size = action_size
        self.memory = []
        self.gamma = 0.95  # Discount factor
        self.epsilon = 1.0  # Exploration rate
        self.epsilon_min = 0.01
        self.epsilon_decay = 0.995
        self.model = self.build_model()

    def build_model(self):
        model = Sequential([
            Input(shape=(self.state_size,)),  # Updated Input Layer
            Dense(24, activation='relu'),
            Dense(24, activation='relu'),
            Dense(self.action_size, activation='linear')
        ])
        model.compile(loss='mse', optimizer=tf.keras.optimizers.Adam(learning_rate=0.001))
        print("Model built successfully.")
        return model

    def act(self, state):
        if np.random.rand() <= self.epsilon:
            return random.randrange(self.action_size)
        act_values = self.model.predict(state, verbose=0)
        return np.argmax(act_values[0])

    def remember(self, state, action, reward, next_state, done):
        self.memory.append((state, action, reward, next_state, done))

    def replay(self, batch_size):
        if len(self.memory) < batch_size:
            return
        batch = random.sample(self.memory, batch_size)
        for state, action, reward, next_state, done in batch:
            target = reward if done else reward + self.gamma * np.amax(self.model.predict(next_state, verbose=0)[0])
            target_f = self.model.predict(state, verbose=0)
            target_f[0][action] = target
            self.model.fit(state, target_f, epochs=1, verbose=0)
        if self.epsilon > self.epsilon_min:
            self.epsilon *= self.epsilon_decay

## Training and Testing the Trading Bot
This section trains a Deep Q-Network (DQN)-based trading bot to make stock trading decisions based on historical price data. The agent learns through reinforcement learning (RL), optimizing its trading strategy over multiple episodes.

## Key Features
* Stock Market Simulation: Uses historical stock data (AAPL by default) as the environment.

* Deep Q-Network (DQN): The trading agent leverages a neural network for decision-making.

* Exploration vs. Exploitation: The bot gradually shifts from random exploration to learned strategies.

* Experience Replay: Stores past experiences to stabilize learning.

* Dynamic Reward System: Rewards are based on trading performance.

## Component Overview
* train_trading_bot()

* Loads stock data for training.

* Initializes the trading environment and DQN agent.

* Runs multiple episodes where the agent learns through trial and error.

* Uses experience replay to improve decision-making.

## Training Loop
* The agent selects actions, receives rewards, stores experiences, and updates its model.

## Hyperparameters 
* episodes = 10, batch_size = 32 for training stability.



In [11]:
# Train and test the agent
def train_trading_bot():
    stock_symbol = 'AAPL'  # Change to other stock symbols if needed
    df = load_data(stock_symbol)
    if df is None:
        return

    env = TradingEnvironment(df)
    agent = DQNAgent(state_size=len(df.columns) - 1, action_size=3)  # Remove epoch_time column
    episodes = 10
    batch_size = 32

    for e in range(episodes):
        state = env.reset()
        state = np.reshape(state, [1, len(df.columns) - 1])  # Fix reshape issue
        done = False
        while not done:
            action = agent.act(state)
            next_state, reward, done, _ = env.step(action)
            next_state = np.reshape(next_state, [1, len(df.columns) - 1])  
            agent.remember(state, action, reward, next_state, done)
            state = next_state
        agent.replay(batch_size)

        print(f"Episode {e+1}/{episodes} completed")

    secret_message()

if __name__ == "__main__":
    train_trading_bot()


Loading data for AAPL...
YF.download() has changed argument auto_adjust default to True


[*********************100%***********************]  1 of 1 completed


Data loaded for AAPL.
Initializing Trade Environment...
Initializing DQN Agent...
Model built successfully.
Resetting environment...
Balance: 10000, Net Worth: 10000.0, Reward: 0.0
Balance: 10000, Net Worth: 10000.0, Reward: 0.0
Balance: 9787.824996948242, Net Worth: 10000.0, Reward: 0.0
Balance: 9575.151992797852, Net Worth: 10000.498001098633, Reward: 0.4980010986328125
Balance: 9362.681991577148, Net Worth: 10000.091995239258, Reward: -0.406005859375
Balance: 9150.371994018555, Net Worth: 9999.61198425293, Reward: -0.480010986328125
Balance: 9150.371994018555, Net Worth: 10000.231979370117, Reward: 0.6199951171875
Balance: 9150.371994018555, Net Worth: 10000.090377807617, Reward: -0.1416015625
Balance: 8937.901992797852, Net Worth: 10000.251998901367, Reward: 0.16162109375
Balance: 9150.72200012207, Net Worth: 10002.002029418945, Reward: 1.750030517578125
Balance: 9364.105407714844, Net Worth: 10004.255630493164, Reward: 2.25360107421875
Balance: 9150.50830078125, Net Worth: 10004.8