In [None]:
import numpy as np
import gym
from gym import spaces
import itertools
from gym.spaces import Box

class Nuc_Maint_Env_Proposal_220211_NPIC_MATLAB2022A(gym.Env):
    def __init__(self):
        self.NumComponents = 1
        self.MaxTime = 200
        self.RefuelPeriod = 18
        self.MaxInitialHealth = 0.9999
        self.MinInitialHealth = 0.8
        self.InitialHealthNoise = 0.01
        self.MinHealth = 0
        self.DegradationMean = 0.05
        self.DegradationVar = 0.1
        self.SensorNoiseVar = 0
        self.LeadTime = 5
        self.PenaltyForThirdCapacity = -40
        self.PenaltyForHalfCapacity = -60
        self.PenaltyForNotOperating = -100
        self.CostToRepair = -10
        self.CostToReplace = -5
        self.CostToOrder = -15
        self.CostInventory = -1
        
        self.observation_space = spaces.Box(low=-np.inf, high=np.inf, shape=(5, 1), dtype=np.float32)

        # a = [0, 1, 2]
        # b = [0, 1]
        # concat_actions = list(itertools.product(a, b))
        # concat_actions = [list(action) for action in concat_actions]
        

        # self.action_space = spaces.Discrete(len(concat_actions))
        # actions_list = []
        # for i in range(action_space.n):
        #     Action = concat_actions[i]
        #     actions_list.append(Action)
        # self.Action = actions_list

        elements = np.array([[0, 0], [1, 0], [2, 0], [0, 1], [1, 1], [2, 1]])
        action_space = Box(low=0, high=1, shape=(2,), dtype=np.float32)

        ActionInfo = action_space
        ActionInfo.Elements = elements.tolist()
        ActionInfo.Name = "Maintenance actions"
        ActionInfo.Description = ""
        ActionInfo.Dimension = [1, 2]
        ActionInfo.DataType = "double" 
         
        self.ActionInfo = ActionInfo
        self.State = np.zeros((5, 1))
        self.IsDone = False
        
    def step(self, action):
        MaintDecision1 = action.Elements[1][0]
        OrderPart1 = action.Elements[1][1]
        
        Month = self.State[0][0]
        Component1 = self.State[1][0]
        Inv1 = self.State[2][0]
        ArrivalDate1 = self.State[3][0]
        PartReplaced1 = self.State[4][0]
        
        Month += 1
        PartReplaced1 = 0
        
        if MaintDecision1 == 2 and Inv1 > 0:  # Replace
            Inv1 -= 1
            Component1 = self.MaxInitialHealth - np.random.gamma(self.InitialHealthNoise)
            PartReplaced1 = 1
            if Component1 < self.MinInitialHealth:
                Component1 = self.MinInitialHealth
        elif MaintDecision1 == 1:  # Repair
            Component1 += (self.MaxInitialHealth - Component1) * 0.1  # about 10 percent increase in health gap
        
        if Month % self.RefuelPeriod != 0:
            Component1 -= np.abs(self.DegradationMean + self.DegradationVar * np.random.randn()) * (1 - Component1)
        
        Component1 = max(Component1, 0)  # Health can not be less than 0
        
        if Component1 == 0:
            Sensor1 = 0  # self-announcing failure
        else:
            Sensor1 = Component1 + self.SensorNoiseVar * np.random.randn()
            Sensor1 = max(min(Sensor1, 1), 0.01)
        
        if Month % self.RefuelPeriod == 0:
            DaysToShutdown = 0
        else:
            DaysToShutdown = (self.RefuelPeriod - Month % self.RefuelPeriod) / self.RefuelPeriod
        
        ArrivalDate1 -= 1
        if len(ArrivalDate1) > 0:
            if ArrivalDate1[0] == 0:
                Inv1 += 1  # Add part to inventory
                ArrivalDate1 = ArrivalDate1[ArrivalDate1 > 0]  # Remove countdown from backlog
        
        if OrderPart1:
            ArrivalDate1 = np.append(ArrivalDate1, self.LeadTime)
        
        if len(ArrivalDate1) > 0:
            OnOrder1 = ArrivalDate1[0] / self.LeadTime
        else:
            OnOrder1 = 0
        
        InvNorm1 = Inv1 / 200
        InvFlag1 = int(np.any(Inv1 > 0))
        
        self.State = np.array([[Month], [Component1], [Inv1], [ArrivalDate1], [PartReplaced1]])
        observation = np.array([[DaysToShutdown], [Sensor1], [InvNorm1], [InvFlag1], [OnOrder1]])
        
        self.IsDone = False
        reward = self.get_reward(self.State, action)
        return observation, reward, self.IsDone, {}
    
    def reset(self):
        Component1 = self.MaxInitialHealth - 0.05 * np.random.rand()
        Sensor1 = Component1 + self.SensorNoiseVar * np.random.randn()
        Sensor1 = max(min(Sensor1, 1), 0)
        Inv1 = 0
        InvNorm1 = Inv1 / 200
        InvFlag1 = int(Inv1 > 0)
        ArrivalDate1 = []
        OnOrder1 = 0
        PartReplaced1 = 0
        Month = 1
        
        if Month % self.RefuelPeriod == 0:
            DaysToShutdown = 0
        else:
            DaysToShutdown = (self.RefuelPeriod - Month % self.RefuelPeriod) / self.RefuelPeriod
        
        initial_observation = np.array([[DaysToShutdown], [Sensor1], [InvNorm1], [InvFlag1], [OnOrder1]])
        self.State = np.array([[Month], [Component1], [Inv1], [ArrivalDate1], [PartReplaced1]])
        
        return initial_observation
    
    def get_reward(self, State, Action):
        Month = State[0][0]
        Sensor1 = State[1][0]
        Inv1 = State[2][0]
        PartReplaced1 = State[4][0]
        
        Action1 = Action // 3
        OrderPart1 = Action % 3
        
        reward = 0
        
        if Inv1 > 0:
            reward += self.CostInventory * Inv1
        
        reward += self.CostToOrder * OrderPart1
        
        if Sensor1 == 0 and Month % self.RefuelPeriod != 0:
            reward += 2 * self.PenaltyForHalfCapacity
        elif PartReplaced1 and Month % self.RefuelPeriod != 0:
            reward += 2 * self.PenaltyForHalfCapacity
        
        if Action1 == 1:
            reward += self.CostToRepair
        elif PartReplaced1:
            reward += self.CostToReplace
        
        return reward
