In [None]:
import numpy as np
import pandas as pd
import random
from collections import deque
import os
from keras.layers import Dense
from keras.models import Sequential
import tensorflow as tf

In [None]:
class StockMarketEnv:
    def __init__(self, data, window_size=5):
        self.data = data  # Stock price data
        self.window_size = window_size  # Number of past timesteps for the state
        self.current_step = 0
        self.max_steps = len(data) - 1
        self.action_space = [0, 1, 2]  # Actions: 0 = hold, 1 = buy, 2 = sell
        self.state_size = window_size  # Number of features in the state

    def reset(self):
        """Resets the environment to the initial state."""
        self.current_step = np.random.randint(0, len(self.data) - self.window_size - 1)
        return self.get_state()

    def get_state(self):
        """Returns the current state for the agent."""

        start = max(0, self.current_step - self.window_size + 1)
        end = self.current_step + 1
        
        prices = self.data['Close'][start:end].values
        
        return prices  

    def step(self, action):
        """Takes an action and updates the environment."""

        current_price = self.data['Close'][self.current_step]
        next_price = self.data['Close'][self.current_step + 1] if self.current_step + 1 < len(self.data) else current_price
        reward = 0

        # Calculate reward based on action
        if action == 1:  # Buy
            reward = next_price - current_price
        elif action == 2:  # Sell
            reward = current_price - next_price
        else:  # Hold
            reward = 0

        # Update step
        self.current_step += 1
        done = self.current_step >= self.max_steps

        # Get next state
        next_state = self.get_state()
        return next_state, reward, done

In [None]:
def huber_loss(states, targets, delta=1.0):
        error = states - targets
        is_small_error = tf.abs(error) <= delta
        squared_loss = 0.5 * tf.square(error)
        linear_loss = delta * (tf.abs(error) - 0.5 * delta)
        return tf.where(is_small_error, squared_loss, linear_loss)

def _build_model(self):
    model = Sequential()
    model.add(Dense(units=128, activation="relu", input_dim=self.state_size))
    model.add(Dense(units=64, activation="relu"))
    model.add(Dense(units=32, activation="relu"))
    model.add(Dense(units=self.action_size))

    model.compile(loss=huber_loss, optimizer=tf.keras.optimizers.Adam(learning_rate=self.learning_rate))

    return model

def save_model_weights(model, filename="model_weights.h5"):
    """
    Saves the weights of the given model to a file.

    Parameters:
    - model: The trained Keras model whose weights are to be saved.
    - filename: The name of the file to save the weights (default is 'model_weights.h5').

    Returns:
    - None
    """
    model.save_weights(filename)
    print(f"Model weights saved to {filename}")

In [None]:
class TradingAgent:
    def __init__(self, state_size, action_size):
        self.state_size = state_size
        self.action_size = action_size
        self.epsilon = 1.0  # Exploration-exploitation tradeoff
        self.epsilon_decay = 0.995
        self.epsilon_min = 0.01
        self.discount_factor = 0.99
        self.learning_rate = 0.001
        self.batch_size = 64

        # Replay memory
        self.memory = deque(maxlen=2000)
        self.model_name = 'model_1'

        # Neural network for Q-value approximation
        self.q_network = _build_model(self)
        self.target_network = _build_model(self)
        self.target_network.set_weights(self.q_network.get_weights())
    
    def act(self, state):
        if np.random.rand() < self.epsilon:
            return random.choice(range(self.action_size))  # Explore

        state_tensor = np.expand_dims(state, axis=0)  # Prepare for prediction
        q_values = self.q_network.predict(state_tensor, verbose=1)
        return np.argmax(q_values[0])   # Exploit

    def store_transition(self, state, action, reward, next_state, done):
        self.memory.append((state, action, reward, next_state, done))

    def replay(self):
        if len(self.memory) < self.batch_size:
            return

        # Sample a batch from memory
        batch = random.sample(self.memory, self.batch_size)
        states, actions, rewards, next_states, dones = zip(*batch)
        
        states = np.array(states)
        actions = np.array(actions)
        rewards = np.array(rewards)
        next_states = np.array(next_states)
        dones = np.array(dones, dtype=float)
        
        # Current Q-values
        q_values = self.q_network.predict(states, verbose=1)

        # Target Q-values
        next_q_values = self.target_network.predict(next_states, verbose=1)
        targets = q_values.copy()
        self.update_target_network()

        for i in range(self.batch_size):
            target = rewards[i]
            if not dones[i]:
                target += self.discount_factor * np.max(next_q_values[i])
            targets[i, actions[i]] = target

        # Train the Q-network
        self.q_network.fit(states, targets, epochs=1, verbose=1)


        # Decay epsilon
        if self.epsilon > self.epsilon_min:
            self.epsilon *= self.epsilon_decay

        return self.q_network


    def update_target_network(self):
        self.target_network.set_weights(self.q_network.get_weights())


In [None]:
def train_agent(data,episodes=1):
    env = StockMarketEnv(data)
    agent = TradingAgent(5, 3)
    model = None

    for e in range(episodes):
        state = env.reset()
        total_reward = 0
        done = False

        while not done:
            action = agent.act(state)
            next_state, reward, done = env.step(action)
            agent.store_transition(state, action, reward, next_state, done)
            model = agent.replay()  
            state = next_state
            total_reward += reward
    
        print(f"Episode {e+1}/{episodes} - Total Reward: {total_reward:.2f}")
        
    return model

In [None]:
for i in range(8 , 11):
    df = pd.read_csv(f"./Data/data_{i}.csv")
    model = train_agent(df)
    save_model_weights(model , f'./Weights/model_{i}.weights.h5')