In [1]:
import numpy as np
import tensorflow as tf
from tensorflow.keras.layers import Dense
from tensorflow.keras.optimizers import Adam
import matlab.engine
import socket, struct
import threading
import concurrent.futures
import random

In [2]:
## matlab api connection
eng = matlab.engine.start_matlab()
eng.cd(r'C:\Users\pvm8318\Documents\NeoVim\Reinforcement')
eng.addpath(r'C:\Users\pvm8318\Documents\NeoVim\Reinforcement')
def SimRun():
    eng.sim('Buck_Converter.slx')
    return

In [3]:
## TCP Connection
MESSAGE_SIZE = 24
DELIMITER = b'\n'
TCP_IP = '156.62.139.28'
TCP_PORT = 50000
BUFFER_SIZE = MESSAGE_SIZE if MESSAGE_SIZE else 32  # Minimum for two doubles


def send_data(conn, val):
    """Sends two double-precision numbers."""
    # Fixed Size
    msg = struct.pack('>d', val)
    conn.send(msg)

def receive_data(conn):
    """Receives three double-precision numbers."""
    if MESSAGE_SIZE:
        data = conn.recv(MESSAGE_SIZE)
        val1, val2, Time = struct.unpack('>ddd', data)
    else:
        # Delimiter
        val1 = None
        val2 = None
        Time = None
        while True:
            data = conn.recv(BUFFER_SIZE)
            if DELIMITER in data:
                val1_bytes, remaining = data.split(DELIMITER, 1)
                val1 = struct.unpack('>d', val1_bytes)[0]
                if DELIMITER in remaining:
                    val2_bytes, time_bytes = remaining.split(DELIMITER, 1)
                    val2 = struct.unpack('>d', val2_bytes)[0]
                    Time = struct.unpack('>d', time_bytes)[0]
                    break
    return val1, val2, Time

# Close the existing socket connection if it is open

In [4]:
## Buck converter parameters 
Vref = 5
u = 0
R = 1.0  # Resistance
L = 0.1  # Inductance
C = 1e-3  # Capacitance
Vin = 12.0  # Input voltage
Vref = 5.0  # Reference output voltage.0
# State-space representation of the buck converter
A = np.array([[0, 1 / C], [-1 / L, -R / L]])
B = np.array([[0], [1 / L]])
#steady state calculation
duty_cycle =Vref/Vin
Iout = Vref/R
ILref = Iout/duty_cycle

In [5]:
def websocket ():
    s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
    s.bind((TCP_IP, TCP_PORT))
    print('Waiting for Simulink to start')
    s.listen(1)
    conn, addr = s.accept()
    return conn

In [6]:
# Define the actor and critic networks
class Actor(tf.keras.Model):
    def __init__(self, state_dim, action_dim, max_action):
        super(Actor, self).__init__()
        self.fc1 = Dense(64, activation='relu')
        self.fc2 = Dense(64, activation='relu')
        self.output_layer = Dense(action_dim, activation='sigmoid')
    def call(self, state):
        x = self.fc1(state)
        x = self.fc2(x)
        action = self.output_layer(x) * max_action
        return action

class Critic(tf.keras.Model):
    def __init__(self, state_dim, action_dim):
        super(Critic, self).__init__()
        self.fc1 = Dense(64, activation='relu')
        self.fc2 = Dense(64, activation='relu')
        self.output_layer = Dense(1)

    def call(self, state, action):
        x = self.fc1(state)
        x = tf.concat([x, action], axis=-1)
        x = self.fc2(x)
        q_value = self.output_layer(x)
        return q_value

def rewardcal(x, u):
    V = x[0]
    IL = x[1]
    Q = 10*np.eye(2)  # State penalty matrix
    R = 1 
    reward = -np.linalg.norm(x - np.array([Vref, ILref]))**2 
    # reward = -np.linalg.norm(x - np.array([Vref, ILref]))**2 - u**2 * R
    return reward


def isdone(x, t):
    # Define the desirable band
    desirable_band = [4.8, 5.2]

    # Initialize the start time and t0
    t0 = None

    V = x[0]
    IL = x[1]
    
    # Check if the state is within the desirable band
    if V >= desirable_band[0] and V <= desirable_band[1]:
        # Check if t0 is None (first time in the band)
        if t0 is None:
            t0 = t
        # Check if the state has been within the desirable band for 0.5 seconds
        elif t - t0 >= 0.5:
            return True
    else:
        # Reset t0 if V gets out of the band
        t0 = None
    
    return False

# Initialize environment and hyperparameters
state_dim = 2  # I have Voltage and Current that describes the state of the system
action_dim = 1  # Duty cycle
max_action = 1.0  # Maximum duty cycle value
actor_lr = 0.001
critic_lr = 0.002
gamma = 0.99  # Discount factor
tau = 0.005  # Target network update rate

actor = Actor(state_dim, action_dim, max_action)
actor_target = Actor(state_dim, action_dim, max_action)
actor_target.set_weights(actor.get_weights())

critic = Critic(state_dim, action_dim)
critic_target = Critic(state_dim, action_dim)
critic_target.set_weights(critic.get_weights())

actor_optimizer = Adam(learning_rate=actor_lr)
critic_optimizer = Adam(learning_rate=critic_lr)

# Define the replay buffer (store experiences)
replay_buffer = []
num_episodes = 100
Vinit = 0
Iinit = 0
max_steps = 30/1e-5  # Maximum number of steps per episode
batch_size = 32  # Replace with your desired batch size

# Training loop


# After training, use the trained actor network to control the buck converter
# You can query the actor network with the current state to get the optimal duty cycle
# Remember to adapt this code to your specific Simulink model and requirements


In [7]:
for episode in range(num_episodes):
    t1 = threading.Thread(target=SimRun)
    t1.start()
    with concurrent.futures.ThreadPoolExecutor() as executor:
        future2 = executor.submit(websocket)
        conn = future2.result()
    time = 0
    state = np.array([Vinit,Iinit])  # Initial state from Simulink
    total_reward = 0

    while time < 30:
        # Choose action using actor network
        action = actor(np.expand_dims(state, axis=0))
        u = action[0][0]
        send_data(conn, u)
        val1, val2,time = receive_data(conn)
        next_state = np.array([val1, val2])
        reward = rewardcal(next_state, u)
        done = isdone(next_state, time)
        
        # Store experience in replay buffer
        if len(replay_buffer) >= batch_size:
            batch = random.sample(replay_buffer, batch_size)
        else:
            continue
        critic_optimizer.apply_gradients(zip(critic_gradients, critic.trainable_variables))

        # Update actor network
        with tf.GradientTape() as tape:
            new_actions = actor(states)
            actor_loss = -tf.reduce_mean(critic(states, new_actions))
        actor_gradients = tape.gradient(actor_loss, actor.trainable_variables)
        actor_optimizer.apply_gradients(zip(actor_gradients, actor.trainable_variables))

        # Update target networks
        for target, source in zip(actor_target.trainable_variables, actor.trainable_variables):
            target.assign(target * (1 - tau) + source * tau)
        for target, source in zip(critic_target.trainable_variables, critic.trainable_variables):
            target.assign(target * (1 - tau) + source * tau)

        state = next_state
        total_reward += reward

        if done:
            break

    print(f"Episode {episode + 1}: Total Reward = {total_reward:.2f}")
    print('Duty cycle is:', u)
    print('time is:', time)
    conn.close()

Waiting for Simulink to start


: 