In [None]:
import gym
from gym import Env
from gym.spaces import Discrete, Box
import random
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

In [None]:
class DC_DC_Arvin(gym.Env):

    # This function is the initialisations.......
    def __init__(self, intended_out_put):
        # The user define the the out put voltage we need by hard coding...
        self.intended_out_put = int(intended_out_put)

        # I assumed that we have 3 actions associated with Duty-Cycle including: increasing, decreasing and do nothing.
        self.action_space = Discrete(3)

        # I assumed only 1 observation in this case including the out put voltage. out put voltage range:0-50 V

        self.observation_space = self.observation_space = Box(low=np.array([0]), high=np.array([50]), shape=(1,), dtype=np.float32)

        # set up the start in-put voltage, I consider a random number between intended_out_put-100 as voltage for starting....
        self.In_voltage = random.randint(int(intended_out_put),50)

        # Set up the Start Duty-Cycle. Random number between 0-1
        self.Duty_cycle = round ( random.random(), 2 )

        # Out-put voltage
        self.Out_voltage = (self.Duty_cycle)*(self.In_voltage)

        # how many times our model can take an action to obtain the intended out put voltage.
        self.chances = 100

        # Set up Max and Min of range for keeping voltage.



    def step(self,action):

        # Apply action: I assume that Duty cycle is going to change by steps of 0.01
        # 0/100 -0.01 =-0.01 Duty-cycle
        # 1/100 -0.01 =0 Duty-cycle
        # 2/100 -0.01 =+0.01 Duty-cycle

        self.Duty_cycle +=( (action/100) - 0.01 )

        # Clip the duty cycle within the range [0, 1]
        self.Duty_cycle = np.clip( self.Duty_cycle , 0.0, 1.0)

        # Calculating the Out-put voltage
        self.Out_voltage = round ( (self.Duty_cycle)*(self.In_voltage),2 )


        # since we our model is allowed to have only 100 actions, after each action we take one chance...
        self.chances -= 1

        # Calculating the reward
        reward , range = self.calculate_reward()

        # Check to see whether we are done or not by chances
        done = self.check_done()





        return self.Out_voltage , self.Duty_cycle , reward , done , self.intended_out_put , self.In_voltage , range


    def calculate_reward(self):

      # Calculate the reward based on the current state

      # Compute the deviation from the target voltage
      voltage_deviation = abs(self.Out_voltage - self.intended_out_put)

      # Set up Max value for error range to be accepted to get reward.
      max = (self.intended_out_put*0.03)


      # Define the reward function based on the deviation
      if voltage_deviation <= (max):
        reward = +1
      else:
        reward = -1

      return reward,max

    def check_done(self):

      # Determine if the episode is terminated

      if self.chances <= 0:

        return True

      else:

        return False





    def render(self):
        # Actually we do not have any visuall output so we do not need this fucntion.
        pass


    def reset(self):
        # Implement the reset function that resets the environment to an initial state
        self.In_voltage = random.randint(1,100)

        # Also, we should reset the chances we have
        self.chances = 100

        return self.In_voltage


In [None]:
env = DC_DC_Arvin(20)

In [None]:
action = 0
step = env.step(action)
print(f'Out-put Voltage: {step[0]} \nDuty cycle: {step[1]} \nReward: {step[2]} \nIs it done?: {step[3]} \nVoltage we need: {step[4]} \nInput Voltage: {step[5]} ')
print (f'If we keep error voltage as {step[6]}')