In [None]:
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
warnings.filterwarnings('ignore')

In [None]:
df = pd.read_csv('data.csv')

In [None]:
df.info()
df.columns

In [None]:
df.head()

In [None]:
df["Trades"].info()

In [None]:
# more than 50% data of trades is null, therefore dropping it from the dataset
df = df.drop(columns = ['Trades','Series', 'Symbol'])

In [None]:
df = df.dropna(subset = ['Deliverable Volume'], axis= 0)
df = df.reset_index(drop=True)

In [None]:
#Turnover = VWAP*Volume, therfore dropping it
df = df.drop(columns = ['Turnover'])

In [None]:
# Convert 'Date' column to datetime
df['Date'] = pd.to_datetime(df['Date'])

df['Year'] = df['Date'].dt.year
df['Month'] = df['Date'].dt.month
df['Day'] = df['Date'].dt.day

In [None]:
df = df.sort_values(by=['Year', 'Month', 'Day'], ascending=True)
df = df.reset_index(drop=True)

In [None]:
year_counts = df['Year'].value_counts().sort_index()  # This counts the entries per year and sorts by year

plt.figure(figsize=(12, 6))
year_counts.plot(kind='bar', color='blue')
plt.title('Count of Entries per Year')
plt.xlabel('Year')
plt.ylabel('Count')
plt.xticks(rotation=45, ha='right')
plt.grid(axis='y', linestyle='--', alpha=0.7)
plt.tight_layout()
plt.show()

In [None]:
#  Calculate the correlation matrix
correlation_matrix = df.corr()

plt.figure(figsize=(30, 10))
sns.heatmap(correlation_matrix, annot=True, fmt=".2f", cmap='coolwarm', square=True, cbar_kws={"shrink": .8})
plt.title('Correlation Matrix Heatmap')

plt.tight_layout()
plt.show()

In [None]:
#Due to columns with highly correlated with other columns it is preferable to drop them
df = df.drop(columns = ['Last', 'Date', 'Deliverable Volume'])

In [None]:
df.info()

In [None]:
!pip install pandas-ta

In [None]:
!wget http://prdownloads.sourceforge.net/ta-lib/ta-lib-0.4.0-src.tar.gz
!tar -xzvf ta-lib-0.4.0-src.tar.gz
%cd ta-lib
!./configure --prefix=/usr
!make
!make install
!pip install TA-Lib

In [None]:
import talib

df['MA'] = talib.SMA(df['Close'], timeperiod=20)
df['MACD'], df['MACD_signal'], df['MACD_hist'] = talib.MACD(df['Close'], fastperiod=12, slowperiod=26, signalperiod=9)

df['K'], df['D'] = talib.STOCH(df['High'], df['Low'], df['Close'], fastk_period=14, slowk_period=3, slowk_matype=0, slowd_period=3, slowd_matype=0)
df['J'] = 3 * df['K'] - 2 * df['D']  

df['RSI'] = talib.RSI(df['Close'], timeperiod=14)
df['OBV'] = talib.OBV(df['Close'], df['Volume']) 

df['BOLL_upperband'], df['BOLL_middleband'], df['BOLL_lowerband'] = talib.BBANDS(df['Close'], timeperiod=20, nbdevup=2, nbdevdn=2, matype=0)

In [None]:
df.info()

In [None]:
df['HAMMER'] = talib.CDLHAMMER(df['Open'], df['High'], df['Low'], df['Close'])
df['HANGINGMAN'] = talib.CDLHANGINGMAN(df['Open'], df['High'], df['Low'], df['Close'])
df['DARKCLOUDCOVER'] = talib.CDLDARKCLOUDCOVER(df['Open'], df['High'], df['Low'], df['Close'])
df['PIERCING'] = talib.CDLPIERCING(df['Open'], df['High'], df['Low'], df['Close'])
df['MORNINGSTAR'] = talib.CDLMORNINGSTAR(df['Open'], df['High'], df['Low'], df['Close'])
df['EVENINGSTAR'] = talib.CDLEVENINGSTAR(df['Open'], df['High'], df['Low'], df['Close'])
df['DOJI'] = talib.CDLDOJI(df['Open'], df['High'], df['Low'], df['Close'])

In [None]:
df=df.dropna()

In [None]:
train_df= df[df['Year'] <= 2018].copy()
test_df = df[df['Year'] >= 2019].copy()

In [None]:
!pip install keras

In [None]:
from sklearn.preprocessing import StandardScaler
X_train = train_df.drop(columns=['Close'])
y_train = train_df['Close']
X_test = test_df.drop(columns=['Close'])
y_test = test_df['Close']


scaler_x = StandardScaler()
scaler_y = StandardScaler()
X_train = scaler_x.fit_transform(X_train).reshape(X_train.shape[0], 1, X_train.shape[1])
X_test = scaler_x.transform(X_test)



In [None]:
!pip install tensorflow

In [None]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout

class LSTMModel:
    def __init__(self, input_shape, lstm_layers=2, neurons_per_layer=60, dropout_rate=0.1):
        self.input_shape = input_shape  
        self.lstm_layers = lstm_layers
        self.neurons_per_layer = neurons_per_layer
        self.dropout_rate = dropout_rate
        self.model = self.build_lstm_model()

    def build_lstm_model(self):
        model = Sequential()

        
        model.add(LSTM(self.neurons_per_layer, return_sequences=True, input_shape=self.input_shape))
        model.add(Dropout(self.dropout_rate))

        
        for _ in range(self.lstm_layers - 2):
            model.add(LSTM(self.neurons_per_layer, return_sequences=True))
            model.add(Dropout(self.dropout_rate))

      
        model.add(LSTM(self.neurons_per_layer))
        model.add(Dropout(self.dropout_rate))

      
        model.add(Dense(32, activation='relu'))
        model.add(Dense(1))  
      
        model.compile(optimizer='adam', loss='mean_squared_error')
        return model

    def fit(self, X, y, epochs, batch_size):
        self.model.fit(X, y, epochs=epochs, batch_size=batch_size)

    def predict(self, X):
        return self.model.predict(X)

input_shape = (X_train.shape[1], X_train.shape[2])  

lstm_model = LSTMModel(input_shape=input_shape, lstm_layers=2, neurons_per_layer=60, dropout_rate=0.1)
lstm_model.fit(X_train, y_train, epochs=100, batch_size=32)


predicted_scaled = lstm_model.predict(X_test).flatten()

In [None]:
accuracy = np.mean(np.abs(predicted_scaled - y_test ) / y_test)  # Mean Absolute Error
print(f'Mean Absolute Error: {accuracy}')

In [None]:
!pip install tensorflow

In [None]:
from collections import deque
import random
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Dense, Dropout, Concatenate, BatchNormalization, LayerNormalization
from tensorflow.keras.optimizers import Adam

ACTOR_LR = 0.001
CRITIC_LR = 0.002
TAU = 0.005 
GAMMA = 0.99  
BUFFER_SIZE = 100000
BATCH_SIZE = 64

class PrioritizedReplayBuffer:
    def __init__(self, capacity, alpha=0.6, beta=0.4, beta_increment=0.001):
        self.buffer = deque(maxlen=capacity)
        self.priorities = deque(maxlen=capacity)
        self.alpha = alpha
        self.beta = beta
        self.beta_increment = beta_increment

    def add(self, experience):
        self.buffer.append(experience)
        self.priorities.append(max(self.priorities, default=1))  

    def sample(self, batch_size):
        total_priority_sum = sum(p**self.alpha for p in self.priorities)
        probabilities = [(p**self.alpha) / total_priority_sum for p in self.priorities]
        indices = random.choices(range(len(self.buffer)), weights=probabilities, k=batch_size)
        experiences = [self.buffer[i] for i in indices]
        weights = [(len(self.buffer) * p)**(-self.beta) for p in probabilities]
        weights = [w / max(weights) for w in weights]  
        self.beta = min(1, self.beta + self.beta_increment)  
        return experiences, indices, weights

    def update_priorities(self, indices, errors):
        for i, error in zip(indices, errors):
            self.priorities[i] = abs(error) + 1e-5  
    def size(self):
        return len(self.buffer)

def build_actor(state_size, action_size):
    state_input = Input(shape=(state_size,))
    x = Dense(64, activation='relu')(state_input)
    x = Dropout(0.1)(x)
    x = Dense(64, activation='relu')(x)
    x = BatchNormalization()(x)
    action_output = Dense(action_size, activation='tanh')(x)

    model = Model(inputs = state_input, outputs = action_output)
    model.compile(optimizer=Adam(learning_rate=0.001))
    return model


def build_critic(state_size, action_size):
    state_input = Input(shape=(state_size,))
    action_input = Input(shape=(action_size,))

    state_out = Dense(64, activation='relu')(state_input)
    state_out = Dense(64, activation='relu')(state_out)

    action_out = Dense(64, activation='relu')(action_input)

    x = Concatenate()([state_out, action_out])
    x = Dense(64, activation='relu')(x)
    x = LayerNormalization()(x)
    Q_value_output = Dense(1)(x)

    model = Model(inputs = [state_input, action_input], outputs = Q_value_output)
    model.compile(optimizer=Adam(learning_rate=0.002), loss='mse')
    return model

class DDPGAgent:
    def __init__(self, state_size, action_size, buffer_size=100000, batch_size=64, gamma=0.99, tau=0.001):
        self.state_size = state_size
        self.action_size = action_size
        self.buffer_size = buffer_size
        self.batch_size = batch_size
        self.gamma = gamma
        self.tau = tau

        # Initialize Actor and Critic networks
        self.memory = PrioritizedReplayBuffer(buffer_size)
        self.actor = build_actor(state_size, action_size)
        self.critic = build_critic(state_size, action_size)

        # Initialize Target networks
        self.target_actor = build_actor(state_size, action_size)
        self.target_critic = build_critic(state_size, action_size)

        # Synchronize Target networks
        self.target_actor.set_weights(self.actor.get_weights())
        self.target_critic.set_weights(self.critic.get_weights())


    def remember(self, state, action, reward, next_state, done):
        experience = (state, action, reward, next_state, done)
        self.memory.add(experience)  # Add the tuple to the memory

    def act(self, state):
        """Return action from Actor network (policy)."""
        state = np.reshape(state, (1, self.state_size))
        action = self.actor.predict(state)[0]
        return action

    def train(self):
        if self.memory.size() < BATCH_SIZE:
            return

        # Sample a batch from experience replay
        experiences, _, _ = self.memory.sample(BATCH_SIZE)

        states = np.array([e[0] for e in experiences])
        actions = np.array([e[1] for e in experiences])
        rewards = np.array([e[2] for e in experiences])
        next_states = np.array([e[3] for e in experiences])
        dones = np.array([e[4] for e in experiences])
        # Calculate target Q-values for Critic using Target networks
        target_actions = self.target_actor.predict(next_states)
        target_q_values = self.target_critic.predict([next_states, target_actions])
        targets = rewards + GAMMA * (1 - dones) * np.squeeze(target_q_values)

        # Train the Critic network
        self.critic.train_on_batch([states, actions], targets)

        # Calculate the policy gradient
        with tf.GradientTape() as tape:
            actions_pred = self.actor(states)
            critic_value = self.critic([states, actions_pred])
            actor_loss = -tf.math.reduce_mean(critic_value)

        # Update the Actor network
        actor_grads = tape.gradient(actor_loss, self.actor.trainable_variables)
        self.actor.optimizer.apply_gradients(zip(actor_grads, self.actor.trainable_variables))

        # Update the Target networks with soft update
        self.update_target_network(self.target_actor, self.actor)
        self.update_target_network(self.target_critic, self.critic)

    def update_target_network(self, target_model, source_model):
        """Soft update the target network parameters."""
        target_weights = np.array(target_model.get_weights())
        source_weights = np.array(source_model.get_weights())
        new_weights = TAU * source_weights + (1 - TAU) * target_weights
        target_model.set_weights(new_weights)

state_size = X_train.shape[2]   
action_size = 1  
ddpg_agent = DDPGAgent(state_size, action_size)

In [None]:
!pip install stable-baselines3[extra]

In [None]:
import gym
from gym import spaces
from stable_baselines3 import PPO
from stable_baselines3.common.vec_env import DummyVecEnv
from sklearn.preprocessing import StandardScaler

class StockTradingEnv(gym.Env):
    def __init__(self, X_train, y_train, lstm_model, ddpg_agent, lambda_init=1.0, lambda_min=0.1, lambda_decay=0.9999):
        super(StockTradingEnv, self).__init__()

        self.X_train = X_train  
        self.y_train = y_train  
        self.max_steps = len(df)
        self.current_step = 0
        self.ddpg_agent = ddpg_agent
        self.lstm_model = lstm_model
        self.lambda_init = lambda_init  
        self.lambda_min = lambda_min  
        self.lambda_decay = lambda_decay  
        self.current_lambda = lambda_init  
        self.scaler = StandardScaler()
        self.iteration = 0

        # Portfolio initialization
        self.initMoney = 10000
        self.balance = self.initMoney
        self.shares_held = 0
        self.total_value = self.balance

        # Define action and observation spaces
        self.action_space = spaces.Box(low=-1, high=1, shape=(1,), dtype=np.float32)  # -1 for sell, 1 for buy
        self.observation_space = spaces.Box(low=0, high=np.inf, shape=(X_train.shape[1] * X_train.shape[2] ,), dtype=np.float32)  # Exclude 'Close' from observation

        self.dataset_A = []
        self.dataset_D = []

    def reset(self):
        self.current_step = 0
        self.balance = self.initMoney
        self.shares_held = 0
        self.total_value = self.balance
        self.current_lambda = self.lambda_init 
        self.dataset_A = []
        self.dataset_D = []
        return self.X_train[0][self.current_step].reshape(1,-1)

    def get_lambda(self, iteration):
        lambda_value = self.lambda_init * (self.lambda_decay ** iteration)
        return max(lambda_value, self.lambda_min)  
    def step(self, action):
        self.current_step += 1
        done = False
        if self.current_step >= self.max_steps:
            done = True
            self.current_step = self.max_steps - 1

        current_price = self.y_train[self.current_step]

        current_price = current_price.item() if isinstance(current_price, np.ndarray) else current_price
    
        ddpg_action = self.ddpg_agent.actor.predict(self.X_train[self.current_step].reshape(1, -1))

        lstm_action = self.lstm_model.predict(self.X_train[self.current_step].reshape(1, self.X_train.shape[1], self.X_train.shape[2]))[0]

        lambda_value = self.get_lambda(self.iteration)

        final_action = (1 - lambda_value) * ddpg_action + lambda_value * lstm_action

        if np.any(final_action) > 0:  # Buy
            shares_to_buy = int(self.balance // current_price)
            self.shares_held += shares_to_buy
            self.balance -= shares_to_buy * current_price
        elif np.any(final_action) < 0:  # Sell
            shares_to_sell = int(self.shares_held * -final_action)
            self.shares_held -= shares_to_sell
            self.balance += shares_to_sell * current_price

        # Calculate total value
        self.total_value = self.balance + self.shares_held * current_price
        earnings = self.total_value - self.initMoney

        # Reward Calculation
        if earnings > 0:
            reward = earnings / self.initMoney
        else:
            reward = -0.1


        predicted_price = self.lstm_model.predict(self.X_train[self.current_step].reshape(1, self.X_train.shape[1], self.X_train.shape[2]))[0]
    
        target_price = self.y_train[self.current_step]  
        
        supervised_reward = -abs(target_price - predicted_price)/target_price 
        reward += supervised_reward 
        
        next_state = self.X_train[self.current_step].reshape(-1)
        return next_state, reward, done, {}

    def render(self):
        print(f"Step: {self.current_step}, Balance: {self.balance}, Shares Held: {self.shares_held}, Total Value: {self.total_value}")


env = StockTradingEnv(X_train, y_train, lstm_model, ddpg_agent)
state = env.reset()

for episode in range(100): 
    done = False
    state = env.reset()
    while not done:

        action = ddpg_agent.actor.predict(state.reshape(1,-1))[0]
        next_state, reward, done, _ = env.step(action)

        ddpg_agent.remember(state, action, reward, next_state, done)

        ddpg_agent.train()

        state = next_state

    env.render()

In [None]:
y_pred = ddpg_agent.predict(X_test)

accuracy = np.mean(np.abs(y_pred - y_test) / y_test)  
print(f'Mean Absolute Error: {accuracy}')

In [None]:
import matplotlib.pyplot as plt

test_df['Date'] = pd.to_datetime(test_df[['Year', 'Month', 'Day']])
plt.figure(figsize=(15, 6))
plt.plot(test_df['Date'], y_test, color='blue', label='Actual Prices')
plt.plot(test_df['Date'], predicted_scaled, color='red', label='Predicted Prices')
plt.title('Stock Price Prediction')
plt.xlabel('Date')
plt.ylabel('Price')
plt.legend()
plt.show()