In [38]:
import pandas as pd
import numpy as np

# Load dataset
data = pd.read_csv('/content/drive/MyDrive/Mini Project/FinalData/Fault_Present_Data.xlsx')

X = data[['Tc', 'Tr', 'Tj']].values
y = data['Fc'].values

# Environment Setup: Define the environment for Q-Learning.

In [39]:
class BatchReactorEnvironment:
    def __init__(self, X, y):
        self.X = X
        self.y = y
        self.state_size = X.shape[1]
        self.action_size = 1  # Only one action to control Fc
        self.total_steps = len(X)
        self.current_step = 0
        self.action_low = 0
        self.action_high = 1

    def reset(self):
        self.current_step = 0
        return self.X[self.current_step]

    def step(self, action):
        self.current_step += 1
        done = self.current_step >= self.total_steps
        next_state = self.X[self.current_step]
        action = np.clip(action, self.action_low, self.action_high)
        reward = self.get_reward(action)
        return next_state, reward, done

    def get_reward(self, action):
        # Reward calculation
        predicted_reward = self.y[self.current_step]  # Reward logic
        return predicted_reward

# Actor and Critic Networks: Implement DNN architectures for Actor and Critic.

In [40]:
import tensorflow as tf

class ActorCriticDNN:
    def __init__(self, state_size, action_size):
        self.state_size = state_size
        self.action_size = action_size
        self.actor = self.build_actor()
        self.critic = self.build_critic()

    def build_actor(self):
        actor = tf.keras.Sequential([
            tf.keras.layers.Dense(64, activation='relu', input_shape=(self.state_size,)),
            tf.keras.layers.Dense(self.action_size, activation='sigmoid')
        ])
        actor.compile(optimizer='adam', loss='mse')  # Compile actor model
        return actor

    def build_critic(self):
        critic = tf.keras.Sequential([
            tf.keras.layers.Dense(64, activation='relu', input_shape=(self.state_size,)),
            tf.keras.layers.Dense(1, activation='linear')
        ])
        critic.compile(optimizer='adam', loss='mse')  # Compile critic model
        return critic

# Q-Learning Algorithm Implementation: Implement the Q-Learning algorithm.

In [41]:
class QLearningAgent:
    def __init__(self, env, actor_critic, max_episodes=1, target_reward=None):
        self.env = env
        self.actor_critic = actor_critic
        self.epsilon = 1.0
        self.epsilon_decay = 0.995
        self.epsilon_min = 0.01
        self.gamma = 0.95  # Discount factor
        self.batch_size = 32
        self.max_episodes = max_episodes
        self.target_reward = target_reward

    def choose_action(self, state):
        if np.random.rand() <= self.epsilon:
            return np.random.uniform(self.env.action_low, self.env.action_high)
        else:
            return np.clip(self.actor_critic.actor.predict(state.reshape(1, -1))[0][0], self.env.action_low, self.env.action_high)

    def train(self):
        for episode in range(self.max_episodes):
            state = self.env.reset()
            total_reward = 0
            done = False
            while not done:
                action = self.choose_action(state)
                next_state, reward, done = self.env.step(action)
                total_reward += reward
                target = reward + self.gamma * np.max(self.actor_critic.critic.predict(next_state.reshape(1, -1)))
                target_f = self.actor_critic.critic.predict(state.reshape(1, -1))
                target_f[0][0] = target
                self.actor_critic.critic.fit(state.reshape(1, -1), target_f, epochs=1, verbose=0)
                # Prepare training data for the actor model
                actor_train_input = np.expand_dims(state, axis=0)
                actor_train_target = np.expand_dims(np.array([action]), axis=0)
                self.actor_critic.actor.fit(actor_train_input, actor_train_target, epochs=1, verbose=0)
                state = next_state
            if self.epsilon > self.epsilon_min:
                self.epsilon *= self.epsilon_decay
            print(f"Episode: {episode + 1}, Total Reward: {total_reward}")
            if self.target_reward is not None and total_reward >= self.target_reward:
                print(f"Reached target reward of {self.target_reward} in episode {episode + 1}.")
                break
        print("Training completed.")

##Initialize environment, actor-critic networks, and Q-learning agent

In [42]:
env = BatchReactorEnvironment(X, y)
actor_critic = ActorCriticDNN(env.state_size, env.action_size)

desired_reward = 100
agent = QLearningAgent(env, actor_critic, max_episodes=1, target_reward=1)

# Train the agent
agent.train()



KeyboardInterrupt: 