In [2]:
import pandas as pd
import numpy as np
import os
import matplotlib.pyplot as plt
import gym
import random
import sklearn.preprocessing
import random
import warnings
import logging
import copy
import seaborn as sns
import tensorflow as tf
import keras
from collections import deque
from sklearn.preprocessing import StandardScaler, MinMaxScaler
#scaler = StandardScaler()
minmax = MinMaxScaler()

In [3]:
dir_path = os.getcwd()
dataset = 'train_FD002.csv'
df = pd.read_csv(dir_path + r'/' + dataset, sep=",", skipinitialspace=True).dropna(axis=1)

In [4]:
#Disable the Warning from Tensorflow
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '1'
tf.get_logger().setLevel('ERROR')
logging.getLogger('tensorflow').setLevel(logging.ERROR)

In [5]:
df_A = df[df.columns[[0, 1]]]
df_W = df[df.columns[[2, 3, 4]]]
df_S = df[df.columns[list(range(5, 26))]]
df_X = pd.concat([df_W, df_S], axis=1)

df_X = minmax.fit_transform(df_X)

engine_unit = 1

In [6]:
def create_sequences(data, seq_length):
    xs = []
    for i in range(len(data) - seq_length):
        x = data[i:i+seq_length]
        #y = data[i+seq_length, -1]  # Assuming the last column is the target (RUL)
        xs.append(x)
        #ys.append(y)
    return np.array(xs)

seq_length = 3  #sequence length
X = create_sequences(df_X, seq_length)

# Expand dimensions to match the input shape of Conv1D
X = np.expand_dims(X, axis=2)
df_X = np.reshape(X, (X.shape[0], X.shape[1],24))

In [7]:
#Failure penalty
c_f = -100
#Repair penalty
c_r = -50
do_nothing = 0
policy = {}
policy_test = {}

In [8]:
class CustomEnv(gym.Env):
    metadata = {'render.modes': ['human']}

    def __init__(self, is_training=True, verbose=True):
        self.observation_space = gym.spaces.Box(low=-np.inf, high=np.inf, shape=(df_X.shape[1],))
        self.action_space = gym.spaces.Discrete(2)
        self.reward = 0
        self.cycle = 0
        self.done = False
        self.engine_unit = engine_unit
        self.engine_df_A = df_A[df_A['unit'] == self.engine_unit]
        self.X = df_X[self.engine_df_A.index[0]:self.engine_df_A.index[-1] + 1 ]
        self.state = self.X[self.cycle]
        self.failure_state = self.engine_df_A['cycle'].max() - 1
        self.train = is_training
        self.verbose = verbose

    def get_next_engine_data(self):
        self.engine_unit += 1

        if self.train:
            #training
            if self.engine_unit > int((df_A['unit'].max() * 80 / 100)):
                self.engine_unit = 1
        else:
            #testing
            if self.engine_unit > df_A['unit'].max():
                self.engine_unit = int((df_A['unit'].max() * 80 / 100) + 1)
        if self.verbose:
            print("********|engine unit|********:", self.engine_unit)

        self.engine_df_A = df_A[df_A['unit'] == self.engine_unit]
        self.X = df_X[self.engine_df_A.index[0]:self.engine_df_A.index[-1]+ 1]
        self.failure_state = self.engine_df_A['cycle'].max() - 1
        return self.X

    def step(self, action):
        # Action 0 represents do nothing
        if action == 0:
            #if self.verbose:
            #    print("|hold|:", self.cycle)
            if self.cycle == self.failure_state:
                self.reward = (c_f)
                self.state = self.X[self.cycle]
                self.done = True
                if self.train: # training policy
                    if (self.engine_unit not in policy or (self.cycle > policy[self.engine_unit]['replace_state'] and self.cycle != self.failure_state)):
                        policy[self.engine_unit] = {'unit': self.engine_unit,
                                                    'failure_state': self.failure_state,
                                                    'replace_state': self.cycle}
                else: # testing policy
                    policy_test[self.engine_unit] = {'unit': self.engine_unit,
                                                     'failure_state': self.failure_state,
                                                     'replace_state': self.cycle}
                if self.verbose:
                    print("|cycle reached failure state|:", self.cycle, "reward:", self.reward, '\n')
            else:
                self.reward = do_nothing
                self.cycle += 1
                self.state = self.X[self.cycle]
                self.done = False
                #if self.verbose:
                #    print("|system running|", "reward:", self.reward, '\n')
        
        #Action 1 represents repair
        elif action == 1:
            if self.verbose:
                print("|replace|:", self.cycle)

            # failure (fail penalty and repair penalty)
            if self.cycle == self.failure_state:
                self.reward = (c_f)
            elif self.cycle == 0:
                self.reward = (c_r)*2
            # replace penalty
            else:
                self.reward =c_r * (1-(self.cycle / self.failure_state))
                #c_r / (self.cycle + 0.1)

            self.state = self.X[self.cycle]

            #Traning policy building
            if self.train:
                if (self.engine_unit not in policy or (self.cycle > policy[self.engine_unit]['replace_state'] and self.cycle < self.failure_state)):
                    policy[self.engine_unit] = {'unit': self.engine_unit,
                                                'failure_state': self.failure_state,
                                                'replace_state': self.cycle}
            # Testing policy building
            else:
                policy_test[self.engine_unit] = {'unit': self.engine_unit,
                                                 'failure_state': self.failure_state,
                                                 'replace_state': self.cycle}
            self.done = True
        #if self.verbose:
        #    print("reward:", self.reward, '\n')
        info = {self.cycle}
        return self.state, self.reward, self.done, info

    def reset(self):
        self.X = self.get_next_engine_data()
        self.cycle = 0
        self.state = self.X[self.cycle]
        self.done = False
        return self.state

In [9]:
env = CustomEnv()
n_actions = env.action_space.n          # Num of actions
state_dim = env.observation_space.shape # Input shape/state dimensions
#n_actions

In [10]:
gamma = 0.5
batch = 3
state = env.state
epoch=0
num_episodes = 10 #int((df_A['unit'].max() * 80 / 100))
replay = deque(maxlen=100)
initial_epsilon = 0.029
epsilon_decay =0.975
epsilon=initial_epsilon

In [11]:
from keras.layers import SimpleRNN
from keras.layers import Dropout

network = keras.Sequential([
    keras.Input(shape = (X.shape[1],24)),
    keras.layers.SimpleRNN(50, activation='relu',return_sequences = True),
    keras.layers.Dropout(0.2),
    keras.layers.SimpleRNN(50),
    keras.layers.Dense(n_actions)
])

network.summary()

optimizer = tf.keras.optimizers.RMSprop(learning_rate=0.0001)

In [12]:
def get_action(state,epsilon):
    value_fn = network.predict(np.array([state]),verbose=0)[0]
    
    if np.random.rand() < epsilon:
        action =np.random.choice(n_actions, 1)[0] 
        
    else:
        action = np.argmax(value_fn)
    return action

In [13]:
total_reward=0
session_rewards=[]
epochs=[]
losses=[]
eps =[]
for episode in range(int((df_A['unit'].max() * 80 / 100))):#num_episodes):
    while True:
        action = get_action(state,epsilon=epsilon)
        
        next_state, reward, done, _ = env.step(action)

        done = 1 if done else 0
        
        replay.append((state,action,reward,next_state,done))

        total_reward += reward
        state = next_state

        if(action==1):
            print("Total Reward: ", total_reward,"\n")

        if done:
            session_rewards.append(total_reward)
            total_reward = 0
            state = env.reset()
            break

    if len(replay)>batch:
            batch_ = random.sample(replay,batch)
            states = tf.convert_to_tensor([x[0] for x in batch_])
            actions = tf.convert_to_tensor([x[1] for x in batch_])
            rewards = tf.convert_to_tensor([x[2] for x in batch_])
            next_states = tf.convert_to_tensor([x[3] for x in batch_])
            done = tf.convert_to_tensor([x[4] for x in batch_])

            with tf.GradientTape() as tape:
                #Predicted q-value
                predicted_qvalues = network(states)
                #kiu = network.predict(states)

                predicted_qvalues_for_actions = tf.reduce_sum(predicted_qvalues * tf.one_hot(actions, n_actions), axis=1)
                predicted_qvalues_for_actions =tf.cast(predicted_qvalues_for_actions, tf.float64)

                #Predicted next states' q-value
                predicted_next_qvalues = network(next_states)

                next_state_values = tf.reduce_max(predicted_next_qvalues,axis=1)
                next_state_values = tf.cast(next_state_values, tf.float64)

                #target_qvalues_for_actions = rewards + gamma* next_state_values
                target_qvalues_for_actions = tf.cast(rewards,tf.float64) + gamma* next_state_values

                target_qvalues_for_actions = tf.cast(target_qvalues_for_actions, tf.float64)

                done = tf.cast(done, tf.bool)
                rewards = tf.cast(rewards, tf.float64)

                target_qvalues_for_actions = tf.where(done, rewards, target_qvalues_for_actions)

                loss = (tf.stop_gradient(target_qvalues_for_actions) - predicted_qvalues_for_actions) ** 2
                loss = tf.reduce_mean(loss)

                grads = tape.gradient(loss, network.trainable_variables)
                optimizer.apply_gradients(zip(grads, network.trainable_variables))
                                
                epochs.append(epoch)
                losses.append(loss)
        
                print('Epoch {}, loss {}, epsilon {} '.format(epoch,loss,epsilon))

                network.save('rnn.keras')

                if epoch > 0:
                    if epoch%20==0:
                        epsilon*=epsilon_decay
                        eps.append(epsilon)
                epoch+=1
                if epsilon < 0.016: #0.024
                    epsilon=initial_epsilon

|replace|: 0
Total Reward:  -100 

********|engine unit|********: 2
|replace|: 0
Total Reward:  -100 

********|engine unit|********: 3
|replace|: 3
Total Reward:  -49.26829268292683 

********|engine unit|********: 4
Epoch 0, loss 819.656663435263, epsilon 0.029 
|replace|: 0
Total Reward:  -100 

********|engine unit|********: 5
Epoch 1, loss 7521.530877829903, epsilon 0.029 
|replace|: 0
Total Reward:  -100 

********|engine unit|********: 6
Epoch 2, loss 4148.818363771338, epsilon 0.029 
|replace|: 0
Total Reward:  -100 

********|engine unit|********: 7
Epoch 3, loss 6696.459495004999, epsilon 0.029 
|replace|: 0
Total Reward:  -100 

********|engine unit|********: 8
Epoch 4, loss 6672.436215690156, epsilon 0.029 
|replace|: 0
Total Reward:  -100 

********|engine unit|********: 9
Epoch 5, loss 6701.977713618718, epsilon 0.029 
|replace|: 4
Total Reward:  -48.98989898989899 

********|engine unit|********: 10
Epoch 6, loss 7512.081558391382, epsilon 0.029 
|replace|: 0
Total Rewar

|replace|: 109
Total Reward:  -20.21857923497268 

********|engine unit|********: 63
Epoch 59, loss 0.005957250278226414, epsilon 0.027568125000000002 
|replace|: 112
Total Reward:  -16.467065868263475 

********|engine unit|********: 64
Epoch 60, loss 0.03957307527397284, epsilon 0.027568125000000002 
|replace|: 81
Total Reward:  -35.58718861209964 

********|engine unit|********: 65
Epoch 61, loss 0.048489504530787576, epsilon 0.026878921875000003 
|replace|: 123
Total Reward:  -20.0 

********|engine unit|********: 66
Epoch 62, loss 0.03261864582038666, epsilon 0.026878921875000003 
|replace|: 5
Total Reward:  -48.5207100591716 

********|engine unit|********: 67
Epoch 63, loss 0.0007624594015296146, epsilon 0.026878921875000003 
|cycle reached failure state|: 144 reward: -100 

********|engine unit|********: 68
Epoch 64, loss 3368.8552566626427, epsilon 0.026878921875000003 
|cycle reached failure state|: 184 reward: -100 

********|engine unit|********: 69
Epoch 65, loss 0.0459324

|cycle reached failure state|: 176 reward: -100 

********|engine unit|********: 118
Epoch 114, loss 0.017803457478658174, epsilon 0.02555177510742188 
|replace|: 178
Total Reward:  -24.05247813411079 

********|engine unit|********: 119
Epoch 115, loss 0.014648599054449193, epsilon 0.02555177510742188 
|replace|: 16
Total Reward:  -45.3757225433526 

********|engine unit|********: 120
Epoch 116, loss 0.006421858225046129, epsilon 0.02555177510742188 
|replace|: 37
Total Reward:  -35.546875 

********|engine unit|********: 121
Epoch 117, loss 0.010599396238887284, epsilon 0.02555177510742188 
|replace|: 30
Total Reward:  -42.822966507177036 

********|engine unit|********: 122
Epoch 118, loss 599.6795669250369, epsilon 0.02555177510742188 
|cycle reached failure state|: 184 reward: -100 

********|engine unit|********: 123
Epoch 119, loss 0.03963327684955125, epsilon 0.02555177510742188 
|replace|: 11
Total Reward:  -47.55555555555556 

********|engine unit|********: 124
Epoch 120, los

|replace|: 30
Total Reward:  -41.32947976878613 

********|engine unit|********: 173
Epoch 169, loss 0.06218030949272898, epsilon 0.0236829023062056 
|replace|: 51
Total Reward:  -32.99999999999999 

********|engine unit|********: 174
Epoch 170, loss 0.04544091285492263, epsilon 0.0236829023062056 
|replace|: 97
Total Reward:  -24.739583333333332 

********|engine unit|********: 175
Epoch 171, loss 0.018254098827099347, epsilon 0.0236829023062056 
|replace|: 82
Total Reward:  -29.601990049751247 

********|engine unit|********: 176
Epoch 172, loss 0.015788980084190062, epsilon 0.0236829023062056 
|replace|: 153
Total Reward:  -3.0674846625766863 

********|engine unit|********: 177
Epoch 173, loss 0.005149878861844409, epsilon 0.0236829023062056 
|replace|: 93
Total Reward:  -24.166666666666664 

********|engine unit|********: 178
Epoch 174, loss 0.03928774603714703, epsilon 0.0236829023062056 
|replace|: 95
Total Reward:  -17.68707482993197 

********|engine unit|********: 179
Epoch 1

In [1]:
plt.plot(range(len(losses)), losses, label='Loss')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.title('Loss vs. Epoch')
plt.legend()
plt.show()

NameError: name 'plt' is not defined

In [2]:
plt.figure(figsize=(12, 6))
plt.plot(session_rewards)
plt.xlabel('Episode')
plt.ylabel('Total Reward')
plt.title('Total Reward per Episode')
plt.show()

NameError: name 'plt' is not defined

policy = pd.DataFrame.from_dict(policy).T

policy

result = [item for item in session_rewards if item > -2] 
result

value_network1 = tf.keras.models.load_model('rnn.keras')

In [None]:
engine_unit = int((df_A['unit'].max() * 80 / 100) + 1)
env = CustomEnv(is_training=False)
total_returns_test=[]

#session_rewards = [generate_session(epsilon=0, train=False) for _ in range(int(df_A['unit'].max() * 20 / 100))]
ttotal_reward=0
tsession_rewards=[]
#state = env.reset()
#print("Outside loop")
for episode in range(int(df_A['unit'].max() * 20 / 100)):#num_episodes):
    while True:
        epsilon = 0.001
        value_fn = network.predict(np.array([state]),verbose=0)[0]
    
        if np.random.rand() < epsilon:
            action =np.random.choice(n_actions, 1)[0] 
        else:
            action = np.argmax(value_fn)
    
        
        next_state, reward, done, _ = env.step(action)

        done = 1 if done else 0
        
        #replay.append((state,action,reward,next_state,done))

        ttotal_reward += reward
        state = next_state

        if done:
            #print("\nReplay: ", episode+1)
            tsession_rewards.append(ttotal_reward)
            print("Total Reward: ", ttotal_reward,"\n")
            ttotal_reward = 0
            state = env.reset()
            break

In [None]:
plt.figure(figsize=(12, 6))
plt.plot(tsession_rewards)
plt.xlabel('Episode')
plt.ylabel('Total Reward')
plt.title('Total Reward per Episode')
plt.show()

In [None]:
policy = pd.DataFrame.from_dict(policy).T
policy['remaining_cycles'] = policy['failure_state'] - policy['replace_state']
policy_test = pd.DataFrame.from_dict(policy_test).T
policy_test['remaining_cycles'] = policy_test['failure_state'] - policy_test['replace_state']

In [None]:
policy_test.head(60)

In [None]:
import matplotlib.pyplot as plt

# Plotting Loss vs. Epoch graph
plt.figure(figsize=(20, 4))
plt.plot(policy['failure_state'] , marker='o', label='Failure State')
plt.plot(policy['replace_state'], marker='o', label='Replace State')
plt.xlabel('unit')
plt.xticks(range(0, 211, 5))
plt.ylabel('cycle')
plt.title('unit vs. cycle')
plt.legend()
plt.grid(True)
plt.show()


In [None]:
# Plotting Testing policy
plt.figure(figsize=(20, 4))
plt.plot(policy_test['failure_state'] , marker='o', label='Failure State')
plt.plot(policy_test['replace_state'], marker='o', label='Replace State')
plt.xlabel('unit')
plt.xticks(range(210, 261, 2))
plt.ylabel('cycle')
plt.title('unit vs. cycle')
plt.legend()
plt.grid(True)
plt.show()
