##  Requirements 

In [2]:
import torch
import numpy as np
import gymnasium as gym
import argparse
import math
from random import seed
import random
from datetime import datetime
import pickle
import sklearn
import numpy
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import GaussianNB, CategoricalNB
from sklearn.neighbors import KNeighborsClassifier
from sklearn import preprocessing
from sklearn.metrics import confusion_matrix
from sklearn.metrics import ConfusionMatrixDisplay
import matplotlib.pyplot as plt
from sklearn import metrics
from sklearn.metrics import accuracy_score
from itertools import product
from sklearn.utils import resample
from sklearn.model_selection import KFold, RepeatedKFold
from sklearn.metrics import f1_score
from sklearn import impute
import statistics
from scipy import stats
from copy import deepcopy
from sklearn.ensemble import ExtraTreesClassifier
from sklearn.metrics import classification_report
from sklearn.ensemble import RandomForestClassifier
from math import ceil
import copy
import sys
import os
from sklearn.metrics import jaccard_score
import time
import multiprocessing
from pymoo.algorithms.moo.nsga2 import calc_crowding_distance
import subprocess
import logging
from csv import reader
import argparse
from stable_baselines3 import DQN


In [3]:
if any(arg.startswith('--f=') for arg in sys.argv):
    sys.argv = [arg for arg in sys.argv if not arg.startswith('--f=')]

parser = argparse.ArgumentParser()
parser.add_argument("--epsilon", type=int, default=30)
parser.add_argument("--abstract-level", type=float, default=1)
args = parser.parse_args()

print(f'epsilon: {args.epsilon}')
DISPLAY_SCREEN = False
random.seed(42)
np.random.seed(42)
DD = args.abstract_level

epsilon: 30


## RL

In [4]:
class StoreAndTerminateWrapper(gym.Wrapper):
    '''
    :param env: (gym.Env) Gym environment that will be wrapped
    :param max_steps: (int) Max number of steps per episode
    '''
    def __init__(self, env):
        super(StoreAndTerminateWrapper,self).__init__(env)
        self.max_steps = 200
        self.current_step = 0
        self.env=env
        self.mem = []
        self.TotalReward = 0.0
        self.first_state = None
        self.first_obs = 0
        self.prev_obs = None
        self.states_list = []
        self.info = {}

    def reset(self, *args, **kwargs):
        self.current_step = 0
        obs, info = self.env.reset(*args, **kwargs)
        self.TotalReward = 0.0
        self.first_obs = obs
        return obs,info

    def step(self, action):
        if self.current_step == 0:
            self.prev_obs = self.first_obs
            self.first_state = deepcopy(self.env)
            self.states_list.append(self.first_state)
        self.current_step += 1
        obs, reward, terminated, truncated, info = self.env.step(action)
        self.TotalReward += reward
        self.mem.append(tuple((self.prev_obs,action)))
        self.prev_obs = obs
        if self.current_step >= self.max_steps:
          truncated = True
        if obs[0] <= -1.2:
          truncated = True
          reward = -201 - self.TotalReward
          self.TotalReward = -200
        if terminated or truncated:
          self.mem.append(tuple(('done',self.TotalReward)))
        self.info['mem'] = self.mem
        self.info['state'] = self.states_list
        return obs, reward, terminated, truncated, info

    def set_state(self, state):
        self.env = deepcopy(state)
        obs = np.array(list(self.env.unwrapped.state))
        self.current_step = 0
        self.TotalReward = 0.0
        self.first_obs = obs
        return obs

## RL function

In [5]:
def proportional_sampling_whitout_replacement(index, size):
    s = 0
    s = sum(np.array(index))
    p = [ind / s for ind in index]
    samples = np.random.choice(index, size=size, replace=False, p=p)
    return samples

def population_sample(episodes , ind,  pop_size , threshold, functional_fault_size, reward_fault_size):
  """
  This function is meant to sample episodes from training after that you need to add test episodes using random_test 
  Set the parameters as you want but be careful the input episodes for this function is the memory of the agent and each step has seperate index 
  this function returns the final steps of the selected function then you need to extract that episodes from the input memore that is called 'episodes'
  use the episodes extract function ... 

  samples n episodes from training n1 functinal faults and n2 reward faults 
  reward faults are episodes with reward bellow the thresthreshold 
  from random test samples M episodes m1 random episode and
  m2 episodes with sudden reward change we dont have a sudden reward change in this example  
  """
  epsilon = 0.1
  index = []
  functional_fault = []
  reward_fault = []
  start_states =[]
  ind  = np.where(np.array(episodes)==('done',))
  index= ind[0]
  print(len(ind[0]),'episodes from training')
  population=[]
  for i in index:
    _,r = episodes[i]
    if abs(episodes[i-1][0][0])<(mtc_wrapped.low[0]+epsilon):
      functional_fault.append(i)
      print('function fault') 
    if r<threshold:
      reward_fault.append(i)
      print('reward fault')
  if len(functional_fault)<functional_fault_size:
    print('functional faults size is' ,len(functional_fault),' and its less than desired number' )
    population += functional_fault
    print('sampling more random episodes instead ...!')
  if len(functional_fault)==functional_fault_size:
    population += functional_fault
  if len(functional_fault)>functional_fault_size:
    # proportianl_sample_whitout_replacement()
    sam1=proportional_sampling_whitout_replacement(functional_fault,functional_fault_size)
    print(population)
    print("ff",len(functional_fault))
    population += sam1
  if len(reward_fault)<reward_fault_size:
    print('reward faults size is' ,len(reward_fault),' and its less than desired number' )
    population += reward_fault
    print('sampling more random episodes instead ...!')
  if len(reward_fault)==reward_fault_size:
    population += reward_fault
  if len(reward_fault)>reward_fault_size:
    #proportional sampling
    sam2 = proportional_sampling_whitout_replacement(reward_fault,reward_fault_size)
    population += list(sam2)
  r_size= pop_size-len(population)
  # random_test(model,env,r_size)
  print("RF",len(reward_fault))
  # population += reward_fault
  return population , r_size

def episode_extract(sampled_index, episodes):
  epis = []
  for i in sampled_index:
    # print(episodes[i])
    j = i-1
    while not episodes[j][0] == 'done':
      # print(episodes[j])
      if j==0:
        break
      j-=1
    slice1 = episodes[(j+1):(i+1)]
    epis.append(slice1)
    assert len(slice1)>0, 'Attempt to return Empty episode'
  return epis

def fitness_reward(episode):
    """
    here the reward could be calculated as the lengh of the episode; Since the
    reward of the cartpole is defined based on the number of steps without falling
    last part of the episode contains the signal of ('done',reward)
    """
    return len(episode) - 1

def action_probability(model, state):
        state= torch.as_tensor(state).unsqueeze(0).to(model.device)
        q_value = model.q_net(state).cpu().detach().numpy()[0]
        if q_value.ndim == 1:
            return q_value / q_value.sum()
        else:
            div_factor = q_value.sum(axis=1)
            return (q_value.T / div_factor).T

def fitness_reward_general(episode):
  """
  here the reward could be calculated as the lengh of the episode; Since the
  reward of the cartpole is defined based on the number of steps without falling
  last part of the episode contains the signal of ('done',reward)
  """
  return episode[-1][1]

def fitness_confidence(episode, model, mode):
  """
  confidence level is define as differences between the highest and
  second highest action probabilities of selecting actions OR
  the ratio between the highest and lowest/second highest action probability
  :param `mode`: r for ration and m for differences 
  :param `model`: is the RL agent 
  :param `episode`: is the episode values or sequence from the rl 
  """
  cl = 0.0
  for i in range(len(episode)):
    if i==(len(episode)-1):
        if episode[i][0]=='done':
            return (cl/(len(episode)-1))
        else:
            assert False, "last state is not done , reward"
    else:
      prob=action_probability(model,episode[i][0])
      print(prob)
      high1=prob.argmax()
      first = prob[high1]
      temp = prob
      temp[high1] = 0.0
      high2= temp.argmax()
      second = prob[high2]
      if mode == 'r':
        cl +=  (first/second)
        #In the next version this will be updated to a normalized ratio to avoid having large values 
      if mode == 'm':
        cl += (first - second) #To_Do: first - second / first +second this one is better 
  print("WARNING nothing returned", episode )

def fitness_reward_probability(ml, binary_episode):
    """
    This function returns the third fitness funciton that is ment to guide the search toward
    the episodes with a higher probability of a reward fault and as we have a minimizing
    optimization funciton in MOSA we neeed to change this functionwe can either go with the
    negation of the probability of the reward fault = 1-probability of the reward fault
    that is equal to the probability of the bein a non-faulty episode
    :param `ml`: RF_FF_1rep for functional fault
    :param `binary episode`: episodes decodeed as having abstract states
    """
    # return -(ml.predict_proba(episode)[0][1])
    return ml.predict_proba(binary_episode)[0][0]

def fitness_functional_probability(ml, binary_episode):
    return ml.predict_proba(binary_episode)[0][0]

def abstract_state(model, state1, d):
    if type(state1) == str:
        if state1 == 'done':
            return 'end'
    state_tensor = torch.as_tensor(state1).unsqueeze(0).to(model.device)
    with torch.no_grad():  # 关闭梯度计算以提高效率
        q_values = model.q_net(state_tensor).cpu().numpy()[0]  # 获取第一个样本的Q值
    return tuple([ceil(q_value/d) for q_value in q_values])

def report(model2, x_train, y_train, x_test, y_test):
    plt.ion()
    print("********************** reporting the result of the model **************************")
    print('The score for train data is {0}'.format(model2.score(x_train, y_train)))
    print('The score for test data is {0}'.format(model2.score(x_test, y_test)))

    predictions_train = model2.predict(x_train)
    predictions_test = model2.predict(x_test)

    print("\n\n--------------------------------------recall---------------------------------")

    print(
        'the test recall for the class yes is {0}'.format(metrics.recall_score(y_test, predictions_test, pos_label=1)))
    print('the test recall for the class no is {0}'.format(metrics.recall_score(y_test, predictions_test, pos_label=0)))

    print('the training recall for the class yes is {0}'.format(
        metrics.recall_score(y_train, predictions_train, pos_label=1)))
    print('the training recall for the class no is {0}'.format(
        metrics.recall_score(y_train, predictions_train, pos_label=0)))

    print("\n\n--------------------------------------precision------------------------------")

    print('the test precision for the class yes is {0}'.format(
        metrics.precision_score(y_test, predictions_test, pos_label=1)))
    print('the test precision for the class no is {0}'.format(
        metrics.precision_score(y_test, predictions_test, pos_label=0)))

    print('the training precision for the class yes is {0}'.format(
        metrics.precision_score(y_train, predictions_train, pos_label=1)))
    print('the training precision for the class no is {0}'.format(
        metrics.precision_score(y_train, predictions_train, pos_label=0)))

    print("\n\n")
    print(classification_report(y_test, predictions_test, target_names=['NO ', 'yes']))

    tn, fp, fn, tp = confusion_matrix(y_test, predictions_test).ravel()
    specificity = tn / (tn + fp)
    print("\n\nspecifity :", specificity)
    print("\n\n--------------------------------------confusion----------------------------")
    CM = metrics.confusion_matrix(y_test, predictions_test)
    print("The confusion Matrix:")
    print(CM)
    print('the accuracy score in {0}\n\n'.format(accuracy_score(y_test, predictions_test)))
    print("********************** plotting the confusion matrix & ROC curve **************************")
    ConfusionMatrixDisplay(CM, display_labels=model2.classes_).plot()
    fpr, tpr, thresholds = metrics.roc_curve(y_test, predictions_test)
    roc_auc = metrics.auc(fpr, tpr)
    display = metrics.RocCurveDisplay(fpr=fpr, tpr=tpr, roc_auc=roc_auc, estimator_name='example estimator')
    display.plot()
    plt.pause(3)
    plt.ioff()

def fix_testing(testing_episodes, testing_states, Env2):
    buffer = []
    episodes_set = []
    j = 0
    for i in range(len(testing_episodes)):
        if type(testing_episodes[i][0]) is str and testing_episodes[i][0] == 'done':
            if i == 0:
                continue
            buffer.append(testing_episodes[i])
            episodes_set.append(buffer)
            buffer = []
        else:
            buffer.append(testing_episodes[i])
    if not (episodes_set[0][0][0]==np.array(Env2.set_state(testing_states[0]),dtype="float32")).all():
        del testing_states[0]
    if not (episodes_set[0][0][0]==np.array(Env2.set_state(testing_states[0]),dtype="float32")).all():
        assert False, 'problem in starting states'
    if len(episodes_set) != len(testing_states):
        del testing_states[-1]
    if len(episodes_set) != len(testing_states):
        assert False, 'problem in data prepration'
    return episodes_set, testing_states

def is_functional_fault(episode):
    epsilon = 0.1
    env = mtc_wrapped
    
    # 检查 episode 是否以 'done' 结束
    if episode[-1][0] == 'done':
        # 获取倒数第二个状态（实际的最后一个状态）
        if len(episode) < 2:
            return False
        last_state = episode[-2][0]
        reward = episode[-1][1]  # 奖励在 'done' 元组中
    else:
        last_state = episode[-1][0]
        reward = episode[-1][1] if len(episode[-1]) > 1 else 0
    
    # 确保 last_state 是数值类型
    if isinstance(last_state, (int, float, np.number)):
        # 功能故障的条件：位置过低且奖励为 -200
        position_too_low = last_state < (env.low[0] + epsilon)
        has_low_reward = reward == -200
        
        return position_too_low and has_low_reward
    else:
        return False

def is_reward_fault(episode):
    RF_threshold = -180
    
    # 获取奖励
    if episode[-1][0] == 'done':
        reward = episode[-1][1]
    else:
        reward = episode[-1][1] if len(episode[-1]) > 1 else 0
    
    # 奖励故障的条件：奖励低于阈值且 episode 长度超过 200
    return reward < RF_threshold and len(episode) > 200

def is_functional_fault_last_state(last_step,done_step):
  epsilon = 0.1
  env = mtc_wrapped
  assert done_step[0]=='done', "Wrong input!"
  reward = done_step[1]
  last_state = last_step[0][0]
  if last_state<(env.low[0]+epsilon) and reward == -200:
    return True
  else:
    return False
  
def is_reward_fault_last_state(last_step,done_step):
  RF_threshold = -180
  assert done_step[0]=='done', "Wrong input!"
  reward = done_step[1]
  last_state = last_step[0][0]
  # print(len(episode))
  if reward<RF_threshold and not is_functional_fault_last_state(last_step,done_step):
    return True
  else:
    return False

## ML

In [6]:
def Abstract_classes(ep,abstraction_d,model):
  d=abstraction_d
  abs_states1=[]
  for episode in ep:
    for state,action in episode:
      abs_st = abstract_state(model,state,d)
      if abs_st == 'end':
        continue
      abs_states1.append(abs_st)
  unique1=list(set(abs_states1))
  uni1 = np.array(unique1)
  a=len(abs_states1)
  b=len(set(abs_states1))
  print("abstract states:",b)
  print("Concrete states",a)
  print("ratio",b/a)
  return unique1,uni1
#need modify
def ML_first_representation_func_based(Abs_d,functional_func,reward_func,model,input_episodes,unique1):
  """
  TO-DO : fix epsilon and threshold
  """
  d = Abs_d
  data1_x_b=[]
  data1_y_b= [] 
  data1_y_f_b = []
  for i, episode in enumerate(input_episodes):
    record = np.zeros(len(unique1))
    temp_flag = False
    for state, action in episode:
      ab = abstract_state(model,state,d)
      if ab == 'end':
        assert not temp_flag, f'Episode data problem, two terminations in one episode. Episode number{i}'
        temp_flag = True
        # print(action)
        # print(functional_func(episode))
        if functional_func(episode):
          data1_y_f_b.append(1)
        else:
          data1_y_f_b.append(0)
        if reward_func(episode):
          data1_y_b.append(1)
        else:
          data1_y_b.append(0)
        # print("end\n\n\n")
        # print(len(data1_y_b),"len(input_episodes)",len(input_episodes))
        continue
        # print(state[0])
      ind = unique1.index(ab)
      record[ind] = 1
      # print(state, action)
      assert len(data1_y_b)<len(input_episodes), "assert"
      # if you want the frequency go with the next line 
      # record[ind] += 1
    data1_x_b.append(record)

  return data1_x_b, data1_y_b, data1_y_f_b



## Genetic

In [7]:
def translator(episode,model, d, unique5):
  """
  thid function takes the concrete episodes and returns the encoded episodes 
  based on the presence and absence of the individuals  
  :param 'episode': input episode
  :param 'model': RL model
  :param 'd': abstraction level = 1
  :param 'unique5': abstract classes 
  :return: encoded episodse based on the presence and absence

  """
  d=d
  record = np.zeros(len(unique5))
  for state, action in episode:
    ab = abstract_state(model,state,d)
    if ab == 'end':
      continue
    if ab in unique5:
      ind = unique5.index(ab)
    record[ind] = 1
  return [record]

def transform(state):
  position = state[0]
  noise = np.random.uniform(low=0.95, high=1.05)
  new_position= position * noise 
  new_state =deepcopy(state)
  new_state[0] = new_position 
  return new_state

def mutation_improved(population, model, env, objective_uncovered):
    """
    This is the final mutation function
    It takes the population as input and returns the mutated individual
    :param 'population': Population that we want to mutate
    :param 'model': RL model
    :param 'env': RL environment
    :param 'objective_uncovered: uncovered ubjectives for tournament selection
    :return: mutated candidate (we re-rexecute the episode from the mutation part)
    To-do:
    move deepcopy to the cadidate class methods .set info
    """
    parent = tournament_selection(population, 10, objective_uncovered)  # tournament selection
    parent1 = deepcopy(parent.get_candidate_values())
    if len(parent1) < 3:
        assert False, "parent in mutation is shorter than 3"
    Mutpoint = random.randint(3, (len(parent1) - 3))
    new_state = transform(parent1[Mutpoint][0])
    action = model.predict(new_state)
    if action != int(parent1[Mutpoint][1]):
        print('Mutation lured the agent ... ')
    new_parent = parent1[:Mutpoint]
    new_parent.append([new_state, 'Mut'])
    new_cand = Candidate(new_parent)
    new_cand.set_start_state(parent.get_start_state())

    re_executed_epis = re_execute(model, env, new_cand)

    re_executed_cand = Candidate(re_executed_epis)
    re_executed_cand.set_start_state(new_cand.get_start_state())
    re_executed_cand.set_info(deepcopy(parent.get_info()))
    re_executed_cand.set_info(["mutation is done! ", "mutpoint was:", Mutpoint])

    return re_executed_cand
#need modify
def mutation_improved_p(parent, model, env, m_rate):
    """
    这是最终的突变函数，输入考虑内部m_rate的父代
    根据给定的突变率m_rate，我们可能对 episodes 进行突变。
    :param 'parent' : 我们想要突变的个体
    :param 'model': RL模型
    :param 'env': RL环境
    :param 'm_rate': 突变率：推荐值为1/len(parent)
    :return : 突变后的个体
    To-do:
    将deepcopy移至候选.set信息
    """
    global MUTATION_NUMBER
    chance = random.uniform(0, 1)
    if chance > m_rate:
        return parent
    parent1 = deepcopy(parent.get_candidate_values())
    
    # 检查父代长度是否足够进行突变
    # 至少需要7个元素才能保证3到len-3之间有有效范围
    if len(parent1) < 7:
        # 长度不足，无法进行有效突变，返回原始父代
        return parent
    
    # 计算有效的突变点范围
    min_mut_point = 3
    max_mut_point = len(parent1) - 3
    
    # 再次确保范围有效（防御性检查）
    if min_mut_point > max_mut_point:
        return parent
    
    # 生成随机突变点
    Mutpoint = random.randint(min_mut_point, max_mut_point)
    
    # 执行突变操作
    new_state = transform(parent1[Mutpoint][0])
    action = model.predict(new_state, deterministic=True)
    
    if action[0] != int(parent1[Mutpoint][1]):
        print('Mutation lured the agent ... ')
    
    # 构建新的候选者
    new_parent = parent1[:Mutpoint]
    new_parent.append([new_state, 'Mut'])
    new_cand = Candidate(new_parent)
    new_cand.set_start_state(parent.get_start_state())
    
    # 重新执行并获取奖励
    re_executed_epis = re_execute(model, env, new_cand)
    n_reward = find_reward(re_executed_epis)
    re_executed_epis[-1] = ('done', n_reward)
    
    # 构建并返回重新执行后的候选者
    re_executed_cand = Candidate(re_executed_epis)
    re_executed_cand.set_start_state(new_cand.get_start_state())
    
    MUTATION_NUMBER += 1
    return re_executed_cand
    
#need modify    
def Crossover_improved_v2(population,model,d,objective_uncovered):
  """
  This is the crossover function that we are using 
  It takes the population as input and returns the mutated individual
  :param 'population': Population. we select a parent based on the tournament
   selection and then select the mutation point and then search for the matching point. 
  :param 'model': RL model
  :param 'env': RL environment
  :param 'objective_uncovered: uncovered ubjectives for tournament selection
  :return: mutated candidate (we re-rexecute the episode from the mutation part)
  To-do:
  finding matching episode could be improved bu storing a mapping between concrete states and  
  """
  found_match = False 
  while not (found_match):
    parent = tournament_selection(population, 10, objective_uncovered)  # tournament selection
    parent1 = deepcopy(parent.get_candidate_values())
    parent1_start_point = deepcopy(parent.get_start_state())
    if len(parent1)<4:
      assert False, 'input of crossover is shorter than expected '
    matches_list = []
    crosspoint = random.randint(1,(len(parent1)-3))
    abs_class = list(abstract_state(model,parent1[crosspoint][0],d))
    for i in range(50):
      indx = random.randint(0, len(population) - 1)
      random_candidate = deepcopy(population[indx])
      random_cand_data = random_candidate.get_candidate_values()
      random_cand_start_point = random_candidate.get_start_state()
      for st_index in range(1,len(random_cand_data)-3):
        random_ab = list(abstract_state(model,random_cand_data[st_index][0],d))
        if random_ab == abs_class:
          matches_list.append(st_index)
          found_match = True
      if found_match:
        break 
  # print('Crossover. attemp',i)
  index_match_in_matchlist = random.randint(0, len(matches_list) - 1)
  matchpoint = matches_list[index_match_in_matchlist]
  match_candidate =  deepcopy(random_candidate)
  match = deepcopy(random_cand_data)
  match_start = deepcopy(random_cand_start_point)
  offspring1 = deepcopy(parent1[:crosspoint])
  offspring1 += deepcopy(match[matchpoint:])
  new_reward1  = find_reward(offspring1)
  offspring1[-1] = ('done',new_reward1)
  candid1 = Candidate(offspring1)
  candid1.set_start_state(parent1_start_point)
  offspring2 = deepcopy(match[:matchpoint])
  offspring2 += deepcopy(parent1[crosspoint:])
  new_reward2  = find_reward(offspring2)
  offspring2[-1] = ('done',new_reward2)
  candid2 = Candidate(offspring2)
  candid2.set_start_state(match_start)
  if len(offspring1)<4:
    print(offspring1)
    assert False, 'created offspring 1 in crossover is shorter than expected '

  if len(offspring2)<4:
    print(offspring2)
    assert False, 'created offspring 2 in crossover is shorter than expected '

  return candid1, candid2

def find_reward(episode):
  if len(episode)>200:
    return -200
  if len(episode)<=200:
    if is_functional_fault_last_state(episode[-2],episode[-1]):
      return -200
    else:
      return -(len(episode)-1)
#need to modify
def Crossover_improved_v2_random(population, model, d, objective_uncovered):
    found_match = False
    while not found_match:
        i = random.randint(0, len(population))
        parent1 = deepcopy(population[i].get_candidate_values())
        parent1_start_point = deepcopy(population[i].get_start_state())
        matches_list = []
        crosspoint = random.randint(1, (len(parent1) - 3))
        abs_class = list(model.abstract_state(parent1[crosspoint][0], d))
        attemp = 0
        for i in range(700):
            attemp += 1
            indx = random.randint(0, len(population) - 1)
            random_candidate = deepcopy(population[indx])
            random_cand_data = random_candidate.get_candidate_values()
            random_cand_start_point = random_candidate.get_start_state()
            for st_index in range(1, len(random_cand_data) - 3):
                random_ab = list(model.abstract_state(random_cand_data[st_index][0], d))
                if random_ab == abs_class:
                    matches_list.append(st_index)
                    found_match = True
            if found_match:
                break
    print("match found in --- attemps", attemp)
    index_match_in_matchlist = random.randint(0, len(matches_list) - 1)
    matchpoint = matches_list[index_match_in_matchlist]
    match_candidate = random_candidate
    match = random_cand_data
    match_start = deepcopy(random_cand_start_point)
    offspring1 = deepcopy(parent1[:crosspoint])
    offspring1 += deepcopy(match[matchpoint:])
    offspring1[-1] = ['done', (len(offspring1) - 1)]
    candid1 = Candidate(offspring1)
    candid1.set_start_state(parent1_start_point)

    offspring2 = deepcopy(match[:matchpoint])
    offspring2 += deepcopy(parent1[crosspoint:])
    offspring2[-1] = ['done', (len(offspring2) - 1)]
    candid2 = Candidate(offspring2)
    candid2.set_start_state(match_start)
    return candid1, candid2

def re_execute(model,env,candidate):
  obs =env.reset()
  obs =env.set_state(deepcopy(candidate.get_start_state()))
  episode = candidate.get_candidate_values()
  steps_to_mut_point = len(episode)
  episode_reward = 0.0
  done= False 
  counter = 0 
  for i in range(steps_to_mut_point):
    action, _ = model.predict(obs, deterministic=True)
    action_selected = episode[i][1]
    if action_selected == 'Mut':
      # print(episode[i])
      # print(episode[i][0])
      action_selected, _ = model.predict(episode[i][0], deterministic=True)
      # print("ddd",i,"eee",steps_to_mut_point)
      # print(action_selected)
      # break
    obs, reward, terminated , truncated, info = env.step(int(action_selected)) # its very important to select the action here it means that we may 
    counter+=1
    #follow the previous path until the mutation point or we follow the route that the trained agent wants to follow forcing vs following 
    episode_reward += reward
    # print("counter",counter)
    if terminated or truncated:
      break 
  for j in range(200):
    if terminated or truncated:
      break
    action, _ = model.predict(obs, deterministic=True)
    obs, reward, terminated ,truncated, info = env.step(action) 
    counter+=1
    episode_reward += reward
  assert terminated or truncated
  if episode_reward>201:
    assert False 
  return env.info['mem'][-((counter)+1):]

## Candidate

In [8]:
import numpy as np
class Candidate:
    def __init__(self, candidates_vals):
        if isinstance(candidates_vals, (np.ndarray, np.generic)):
            self.candidate_values = candidates_vals.tolist()
        else:
            self.candidate_values = candidates_vals
        self.objective_values = []
        self.objectives_covered = []
        self.crowding_distance = 0
        self.uncertainity = []
        self.start_state = 0
        self.information = []
        self.mutation = False

    def get_candidate_values(self):
        return self.candidate_values

    def get_uncertainity_value(self, indx):
        return self.uncertainity[indx]
    def get_uncertainity_values(self):
        return self.uncertainity
    def set_uncertainity_values(self,uncertain):
        self.uncertainity = uncertain
    def set_candidate_values(self, cand):
        self.candidate_values = cand
    def set_candidate_values_at_index(self, indx,val):
        self.candidate_values[indx] = val

    def get_objective_values(self):
        return self.objective_values

    def get_objective_value(self, indx):
        return self.objective_values[indx]

    def set_objective_values(self, obj_vals):
        self.objective_values = obj_vals

    def add_objectives_covered(self, obj_covered):
        if obj_covered not in self.objectives_covered:
            self.objectives_covered.append(obj_covered)

    def get_covered_objectives(self):
        return self.objectives_covered

    def set_crowding_distance(self, cd):
        self.crowding_distance = cd

    def get_crowding_distance(self):
        return self.crowding_distance

    def exists_in_satisfied(self, indx):
        for ind in self.objectives_covered:
            if ind == indx:
                return True
        return False

    def is_objective_covered(self, obj_to_check):
        for obj in self.objectives_covered:
            if obj == obj_to_check:
                return True
        return False
    def set_start_state(self,start_point):
      self.start_state = deepcopy(start_point)

    def get_start_state(self):
      return self.start_state

    def set_info(self, new_information):
      self.information.append(new_information)
      
    def get_info(self):
      return self.information

    def mutated(self):
      self.mutation = True

def mutation_number_update(file_address, Mut_Num_to_add, iteration):
    if iteration == 0:
        with open(file_address, 'wb') as file:
            pickle.dump(Mut_Num_to_add, file)
        return
    with open(file_address, 'rb') as file2:
        Mut_num = pickle.load(file2)
    print(Mut_num)
    if type(Mut_num) == list:
        print('list')
        buffer = Mut_num
        buffer.append(Mut_Num_to_add)
        print(buffer)
    else:
        print('int')
        buffer = []
        buffer.append(Mut_num)
        buffer.append(Mut_Num_to_add)
        print(buffer)
    with open(file_address, 'wb') as file:
        pickle.dump(buffer, file)


## MOSA

In [9]:
scaler = preprocessing.StandardScaler()


# domination relation method, same as MOSA
def dominates(value_from_pop, value_from_archive, objective_uncovered):
    dominates_f1 = False
    dominates_f2 = False
    for each_objective in objective_uncovered:
        f1 = value_from_pop[each_objective]
        f2 = value_from_archive[each_objective]
        if f1 < f2:
            dominates_f1 = True
        if f2 < f1:
            dominates_f2 = True
        if dominates_f1 and dominates_f2:
            break
    if dominates_f1 == dominates_f2:
        return False
    elif dominates_f1:
        return True
    return False

def evaulate_population(func, pop, parameters):
    for candidate in pop:
        if isinstance(candidate, Candidate):
            result = func(candidate.get_candidate_values())
            candidate.set_objective_values(result)
            print(candidate.get_objective_values())


def evaulate_population_with_archive(func, pop, already_executed):
    to_ret = []
    for candidate in pop:
        if isinstance(candidate, Candidate):
            if candidate.get_candidate_values() in already_executed:
                continue

            result = func(candidate.get_candidate_values())
            candidate.set_objective_values(result)
            already_executed.append(candidate.get_candidate_values())
            to_ret.append(candidate)
    return to_ret


def exists_in_archive(archive, index):
    for candidate in archive:
        if candidate.exists_in_satisfied(index):
            return True
    return False

def get_from_archive(obj_index, archive):
    for candIndx in range(len(archive)):
        candidate = archive[candIndx]
        if candidate.exists_in_satisfied(obj_index):
            return candidate, candIndx
    return None


# updating archive with adding the number of objective it satisfies, Same as Mosa paper
def update_archive(pop, objective_uncovered, archive, no_of_Objectives, threshold_criteria):
    for objective_index in range(no_of_Objectives):
        for pop_index in range(len(pop)):
            objective_values = pop[pop_index].get_objective_values()
            # if not objective_values[objective_index] or not threshold_criteria[objective_index]:
            if objective_values[objective_index] <= threshold_criteria[objective_index]:
                if exists_in_archive(archive, objective_index):
                    archive_value, cand_indx = get_from_archive(objective_index, archive)
                    obj_archive_values = archive_value.get_objective_values()
                    if obj_archive_values[objective_index] > objective_values[objective_index]:
                        value_to_add = pop[pop_index]
                        value_to_add.add_objectives_covered(objective_index)
                        # archive.append(value_to_add)
                        archive[cand_indx] = value_to_add
                        if objective_index in objective_uncovered:
                            objective_uncovered.remove(objective_index)
                        # archive.remove(archive_value)
                else:
                    value_to_add = pop[pop_index]
                    value_to_add.add_objectives_covered(objective_index)
                    archive.append(value_to_add)
                    if objective_index in objective_uncovered:
                        objective_uncovered.remove(objective_index)

def select_best(tournament_candidates, objective_uncovered):
    best = tournament_candidates[0]  # in case none is dominating other
    for i in range(len(tournament_candidates)):
        candidate1 = tournament_candidates[i]
        for j in range(len(tournament_candidates)):
            candidate2 = tournament_candidates[j]
            if (dominates(candidate1.get_objective_values(), candidate2.get_objective_values(), objective_uncovered)):
                best = candidate1
    return best


def tournament_selection_improved(pop, size, objective_uncovered):
    tournament_candidates = []
    for i in range(size):
        indx = random.randint(0, len(pop) - 1)
        random_candidate = pop[indx]
        tournament_candidates.append(random_candidate)

    best = select_best(tournament_candidates, objective_uncovered)
    return best


def tournament_selection(pop, size, objective_uncovered):
    tournament_candidates = []
    indx = np.random.randint(0, len(pop)-1, size=size)
    for i in indx:
        tournament_candidates.append(pop[i])

    best = select_best(tournament_candidates, objective_uncovered)
    return best

def generate_offspring_improved(population, model, env, d, objective_uncovered):
    population_to_return = []
    probability_C = 0.75
    probability_M = 0.3
    size = len(population)
    while (len(population_to_return) < size):
        probability_crossover = random.uniform(0, 1)
        if probability_crossover <= probability_C:  # 75% probability
            off1, off2 = Crossover_improved_v2(population, model, 1, objective_uncovered)
            population_to_return.append(off1)
            population_to_return.append(off2)
        probability_mutation = random.uniform(0, 1)
        if probability_mutation <= probability_M:  # 30% probability this in for test purposes
            off3 = mutation_improved(population, model, env, objective_uncovered)
            population_to_return.append(off3)
    return population_to_return


def generate_offspring_improved_v2(population,model,env,d,objective_uncovered):
    
    population_to_return = []
    probability_C = 0.75
    probability_M = 0.01
    size = len(population)
    while (len(population_to_return) < size):
      probability_crossover = random.uniform(0, 1)
      if probability_crossover <= probability_C:  # 75% probability
        parent1, parent2 = Crossover_improved_v2(population,model,d,objective_uncovered)
        parent1 = mutation_improved_p(parent1, model,env, (1 / len(parent1.get_candidate_values())))
        parent2 = mutation_improved_p(parent2, model,env, (1 / len(parent2.get_candidate_values())))
        population_to_return.append(parent1)
        population_to_return.append(parent2)

      if probability_crossover > probability_C:
        parent = tournament_selection(population, 10, objective_uncovered) #we may add a very small number of duplicated individulas but its not important as we are removing them in the final executions
        population_to_return.append(mutation_improved_p(parent, model,env,(1 / len(parent.get_candidate_values())))) 
      

    return population_to_return

def save_all_data(pop,no_of_Objectives,threshold_criteria, stored_data):
  '''
  This function will save all individulas with objective lower than treshhold 

  '''
  threshold_criteria_to_add_to_archive = [70, 0.06, 0.05, 0.05] 
  # be careful here ypu can set the satisfiing objectives that based on them you want to store the data  
  for individual in pop:
    individual_objective = individual.get_objective_values()
    for i in range(no_of_Objectives):
      if individual_objective[i]<threshold_criteria_to_add_to_archive[i]:
        # if individual not in stored_data:
        #   ind_ = deepcopy(individual)
        #   stored_data.append(ind_)
        # individual_objective_values = individual.get_objective_values()
        found = False
        for j in range(len(stored_data)):
          if individual_objective == stored_data[j].get_objective_values():
            found = True
            break
        if not found:
          ind_ = deepcopy(individual)
          stored_data.append(ind_)
  # return stored_data

def save_all_data2(pop, stored_data):
    '''
    This function will save all individulas in generations
    you need to remove redundant data (based on fitness and ...)

    '''
    stored_data.append(list(pop))


def Build_Archive(pop, no_of_Objectives, threshold_criteria, stored_data, initial_population):
    '''
    If you are using the Archive of all generated episodes, this function
    removes the duplicated results and builds the Archive.
    :param 'pop': current generation
    :param 'no_of_Objectives': number of objectives
    :param 'threshold_criteria': threshold criteria (we are intrested in episodes that have fitness below these threshold values)
    :param 'stored_data': Archive of final episodes (return)
    :param 'initial_population': initial population. we are not considering these episodes in our archive for the second senario you need to add the number of faults, (implementation in RQ3)
    '''
    threshold_criteria_to_add_to_archive = threshold_criteria
    # be careful as we can have different values for criterias here to add episodes to archive and for GA stopping criteria
    for individual in pop:
        individual_objective = individual.get_objective_values()
        for i in range(no_of_Objectives):
            if individual_objective[i] < threshold_criteria_to_add_to_archive[i]:
                found = False
                for j in range(len(stored_data)):
                    if individual_objective == stored_data[j].get_objective_values():
                        found = True
                        break
                for k in range(len(initial_population)):
                    if individual_objective == initial_population[k].get_objective_values():
                        found = True
                        break
                if not found:
                    ind_ = deepcopy(individual)
                    stored_data.append(ind_)



## Sorting and RUN search

In [10]:
def fast_dominating_sort(R_T, objective_uncovered):
    to_return = []
    front = []
    count = 0
    while len(R_T) > 1:
        count = 0
        for outer_loop in range(len(R_T)):
            best = R_T[outer_loop]
            add = True
            for inner_loop in range(len(R_T)):
                against = R_T[inner_loop]
                if best == against:
                    continue
                if (dominates(best.get_objective_values(), against.get_objective_values(), objective_uncovered)):
                    continue
                else:
                    add = False
                    break

            if add == True:
                if best not in front:
                    front.append(best)

                count = count + 1

        if len(front) > 0:
            to_return.append(front)
            for i in range(len(front)):
                R_T.remove(front[i])
                front = []

        if (len(to_return) == 0) or (count == 0):  # to check if no one dominates no one
            to_return.append(R_T)
            break

    return to_return

def sort_based_on_crowding_distance(e):
    values = e.get_crowding_distance()
    return values


def sort_based_on(e):
    values = e.get_objective_values()
    return values[0]


# sorting based on first objective value
def sort_worse(pop):
    pop.sort(key=sort_based_on, reverse=True)
    return pop


# preference sort, same as algorithm
def preference_sort(R_T, size, objective_uncovered):
    to_return = []
    for objective_index in objective_uncovered:
        min = 100
        best = R_T[0]
        for index in range(len(R_T)):
            objective_values = R_T[index].get_objective_values()
            if objective_values[objective_index] < min:
                min = objective_values[objective_index]
                best = R_T[index]
        to_return.append(best)
        R_T.remove(best)
    if len(R_T) > 0:
        E = fast_dominating_sort(R_T, objective_uncovered)
        for i in range(len(E)):
            to_return.append(E[i])
    return to_return

def get_array_for_crowding_distance(sorted_front):
    list = []
    for value in sorted_front:
        objective_values = value.get_objective_values()

        np_array = numpy.array(objective_values)
        list.append(np_array)

    np_list = np.array(list)
    cd = calc_crowding_distance(np_list)
    return cd

def assign_crowding_distance_to_each_value(sorted_front, crowding_distance):
    for candidate_index in range(len(sorted_front)):
        objective_values = sorted_front[candidate_index]
        objective_values.set_crowding_distance(crowding_distance[candidate_index])


def run_search(func, initial_population, no_of_Objectives, criteria, archive, logger, start, time_budget, size, d, env,
               parameters, second_archive, gens):
    global MUTATION_NUMBER
    MUTATION_NUMBER = 0
    threshold_criteria = criteria
    objective_uncovered = []
    print("initial population ", type(initial_population), len(initial_population))

    for obj in range(no_of_Objectives):
        objective_uncovered.append(obj)  # initializing number of uncovered objective

    random_population = initial_population

    P_T = copy.copy(random_population)
    evaulate_population(func, random_population,
                        parameters)  # evaluating whole generation and storing results propabibly its with candidates

    #print(random_population[0].get_objective_values())
    update_archive(random_population, objective_uncovered, archive, no_of_Objectives,
                   threshold_criteria)  # updating archive
    # save initial population
    save_all_data2(random_population, gens)
    iteration = 0
    # limit of number of generations
    while iteration < 10:
        iteration = iteration + 1  # iteration count
        # To-DO: limit by the time budget instead of the generation number
        for arc in archive:
            logger.info("***ARCHIVE***")
            logger.info("\nValues: " + str(
                arc.get_candidate_values()) + "\nwith objective values: " + str(
                arc.get_objective_values()) + "\nSatisfying Objective: " + str(
                arc.get_covered_objectives()))
        print("Iteration count: " + str(iteration))
        logger.info("Iteration is : " + str(iteration))
        logger.info("Number of mutations : " + str(MUTATION_NUMBER))

        R_T = []

        Q_T = generate_offspring_improved_v2(P_T, model, env, d,
                                             objective_uncovered)  # generate offsprings using crossover and mutation

        evaulate_population(func, Q_T, parameters)  # evaluating offspring
        update_archive(Q_T, objective_uncovered, archive, no_of_Objectives, threshold_criteria)  # updating archive
        save_all_data(Q_T, no_of_Objectives, threshold_criteria, second_archive)
        # save generations
        save_all_data2(Q_T, gens)
        R_T = copy.deepcopy(P_T)  # R_T = P_T union Q_T
        R_T.extend(Q_T)

        F = preference_sort(R_T, size, objective_uncovered)  # Preference sorting and getting fronts

        if len(objective_uncovered) == 0 :  # checking if all objectives are covered
            print("all_objectives_covered")
            logger.info("***Final-ARCHIVE***")
            print(("***Final-ARCHIVE***"))
            for arc in archive:
                print("\nValues: " + str(
                    arc.get_candidate_values()) + "\nwith objective values: " + str(
                    arc.get_objective_values()) + "\nSatisfying Objective: " + str(
                    arc.get_covered_objectives()))

                logger.info("\nValues: " + str(
                    arc.get_candidate_values()) + "\nwith objective values: " + str(
                    arc.get_objective_values()) + "\nSatisfying Objective: " + str(
                    arc.get_covered_objectives()))
            logger.info("Iteration is : " + str(iteration))
            logger.info("Number of mutations : " + str(MUTATION_NUMBER))
            break

        P_T_1 = []  # creating next generatint PT+1
        index = 0

        while len(P_T_1) <= size:  # if length of current generation is less that size of front at top then add it

            if not isinstance(F[index], Candidate):
                if len(P_T_1) + len(F[index]) > size:
                    break
            else:
                if len(P_T_1) + 1 > size:
                    break

            front = F[index]
            if isinstance(F[index], Candidate):  # if front contains only one item
                P_T_1.append(F[index])
                F.remove(F[index])
            else:
                for ind in range(len(F[index])):  # if front have multiple items
                    val = F[index][ind]
                    P_T_1.append(val)

                F.remove(F[index])
        while (len(P_T_1)) < size:  # crowding distance
            copyFront = copy.deepcopy(F[index])
            sorted_front = sort_worse(copyFront)  # sort before crowding distance

            crowding_distance = get_array_for_crowding_distance(sorted_front)  # coverting to libaray compaitble array
            assign_crowding_distance_to_each_value(sorted_front,
                                                   crowding_distance)  # assinging each solution its crowding distance
            sorted_front.sort(key=sort_based_on_crowding_distance, reverse=True)  # sorting based on crowding distance

            if (len(sorted_front) + len(
                    P_T_1)) > size:  # maintaining length and adding solutions with most crowding distances
                for sorted_front_indx in range(len(sorted_front)):
                    candidate = sorted_front[sorted_front_indx]
                    P_T_1.append(candidate)
                    if len(P_T_1) >= size:
                        break

            index = index + 1

        P_T_1 = P_T_1[0:size]
        P_T = P_T_1  # assigning PT+1 to PT

def minimize(func, population, lb, ub, no_of_Objectives, criteria, time_budget, logger, archive, size, d, env,
             parameters, second_archive, gens):
    assert hasattr(func, '__call__')

    start = time.time()
    run_search(func, population, no_of_Objectives, criteria, archive, logger, start, time_budget, size, d, env,
               parameters, second_archive, gens)

In [11]:
class MountainCar_caseStudy():
    def __init__(self):
        logger = logging.getLogger()
        now = datetime.now()
        log_file = 'log/STARLA' + str(i) + '_V2' + str(now) + '.log'
        logging.basicConfig(filename=log_file,
                            format='%(asctime)s %(message)s')
        self.parameters = [model,d,unique5]
        logger.setLevel(logging.WARNING)
    def _evaluate(self,x):
        fv = x
        model,d,unique5 = self.parameters
        obj1 = fitness_reward_general(fv)
        if obj1==None:
          debug_data1=[fv,x]
          with open(f'/content/drive/MyDrive/debug/data.pickle', 'wb') as file:
              pickle.dump(debug_data1, file)
          assert False
        obj2 = fitness_confidence(fv,model,'m')
        binary_fv = translator(fv,model,d,unique5)
        obj3 = fitness_functional_probability(RF_FF_1rep,binary_fv)
        # obj4 = fitness_functional_probability(RF_RF_1rep,binary_fv)
        to_ret = [obj1,obj2,obj3]
        logger = logging.getLogger()
        logger.info(str(fv)+","+str(to_ret))
        return to_ret


def run(i,population ,archive ,second_archive, gens):
    env=mtc_wrapped
    d=500
    size = len(population)
    lb = [0, 0, 0]
    ub = [100000,1000000,100000]

    parameters = [model,d,unique1]
    threshold_criteria = [-180, 0.04, 0.05]


    no_of_Objectives = 3;

    now = datetime.now()
    global logger
    logger = logging.getLogger()
    log_file = 'D:\\code\\RLtest\\starla\\data\\testcase\\Results' + str(i) + '_V2' + str(now).replace(":","_") + '.log'
    logging.basicConfig(filename=log_file,
                        format='%(asctime)s %(message)s')

    logger.setLevel(logging.WARNING)

    archive = minimize(MountainCar_caseStudy()._evaluate, population, lb, ub,
                       no_of_Objectives, threshold_criteria, 7200, 
                       logger,archive,size,d,env , parameters, second_archive,gens)
    logger.info("Iteration completed")
    logger.info("mu"+str(MUTATION_NUMBER))


## analyzer

In [12]:
def analyze_result(result):
    '''
    this function is to aggrigate the differences of the results
    :param `result`: this is the output of the re-execution-improved function
    :return ``:
    '''
    total_dif = 0
    # store_diff=[]
    for i in range(len(result)):
        dif = abs(result[i][1][0] - result[i][1][1])
        # store_diff.append([i,dif])
        total_dif += dif
    return total_dif  # , store_diff


def get_objective_distribution_and_set_candidate_objectives(population, model, d,
                                                            unique1, RF_FF_1rep,
                                                            RF_RF_1rep):
    fit1_list = []
    fit2_list = []
    fit3_list = []
    fit4_list = []
    for i in range(len(population)):
        ind_data = population[i].get_candidate_values()
        fit1 = fitness_reward(ind_data)
        fit2 = fitness_confidence(ind_data, model, 'm')
        binary_fv = translator(ind_data, model, d, unique1)
        fit3 = fitness_functional_probability(RF_FF_1rep, binary_fv)
        fit4 = fitness_reward_probability(RF_RF_1rep, binary_fv)
        obj = [fit1, fit2, fit3, fit4]
        population[i].set_objective_values(obj)
        fit1_list.append(fit1)
        fit2_list.append(fit2)
        fit3_list.append(fit3)
        fit4_list.append(fit4)
    return fit1_list, fit2_list, fit3_list, fit4_list

def was_in_initial_population(solution, population, no_of_Objectives):
    flag = False
    for individuals_ in population:
        if individuals_.get_objective_values() == solution.get_objective_values():
            flag = True
    if not flag:
        return solution
    if flag:
        return 0


def analyze_set_differences(differences_set):
    '''
    input is a set of differences
    '''
    analyzed_results = []
    for item in differences_set:
        res = [len(item[0]), analyze_result(item[0]), item[1], len(item[0]) / item[1]]
        analyzed_results.append(res)
    return analyzed_results

def extract_differences(solution_set):
    '''
    input is a set of solutions like archive or second_archive
    the output a list ([list of differences as a result of re-execution],reward)
    '''
    differences = []
    for dastan in solution_set:
        reward = dastan.get_objective_values()[0]
        differences.append([re_execution_improved_v2(model, env2, dastan), reward])
    return differences


def get_results_distribution(results):
    num_of_diff = []
    diff_confi = []
    diff_ration = []
    for item in results:
        num_of_diff.append(item[0])
        diff_confi.append(item[1])
        diff_ration.append(item[3])
    return num_of_diff, diff_confi, diff_ration



## mountaincar

In [13]:
def random_test_2(model, env, Num):
    obs, info= env.reset()
    counter = 1
    episode_reward = 0.0
    for i in range(Num):
        action, _ = model.predict(obs, deterministic=True)
        obs, reward, terminated, truncated, info = env.step(action)
        done = terminated or truncated
        episode_reward += reward
        if done:
            counter += 1
            end = i
            episode_reward = 0.0
            obs,info= env.reset()
    iter = deepcopy(counter)
    u = 1
    while iter > 1:
        if type(env.info['mem'][-u][0]) is str and env.info['mem'][-u][0] == 'done':
            lastpoint = -u
            iter -= 1
        u += 1
    fin = Num - end
    start = -Num - counter
    randomtest = env.info['mem'][lastpoint:-fin]
    ran_state = env.info['state'][(-counter + 1):-1]
    return randomtest, ran_state


## run

In [14]:
start_time = time.time()
Drive_model = "D:\\code\\RLtest\\starla\\data\\model\\97.zip"
mtc = gym.make('MountainCar-v0')
mtc_wrapped = StoreAndTerminateWrapper(mtc)
model = DQN('MlpPolicy',env=mtc_wrapped, verbose=1)
model = model.load(Drive_model)
d = 500

MUTATION_NUMBER = 0  # set the mutation counter to 0
Run_number = 4
for s in range(1, 10):
    print(f'\033[33mepoch s{s} begin\033[0m')
# print('pong')
# for d in [1, 0.5, 0.1, 0.05]:
    ee, qq = random_test_2(model,mtc_wrapped, 1500_000)
    test, teststate = fix_testing(ee, qq,mtc_wrapped)
    print(f'Abstract Level: {d}')
    unique1, uni1 = Abstract_classes(test, d, model)
    unique5 = unique1
    hash_table = {}
    for k, val in enumerate(unique1):
        hash_table[val] = k

    epsilon = 0.1
    data1_x_b, data1_y_b, data1_y_f_b = ML_first_representation_func_based(d,
                                                                       is_functional_fault,
                                                                       is_reward_fault
                                                                       ,model
                                                                       ,test
                                                                       ,unique1)

#########################################################  Train ML -  Reward fault predictor  #############

    X_train_reward_fault, X_test_reward_fault, y_train_reward_fault, y_test_reward_fault = train_test_split(data1_x_b, data1_y_b, test_size=0.33, random_state=42)

    RF_RF_1rep = RandomForestClassifier(random_state=0, class_weight='balanced')
    RF_RF_1rep.fit(X_train_reward_fault,y_train_reward_fault)
#report(RF_RF_1rep,X_train_reward_fault,y_train_reward_fault,X_test_reward_fault,y_test_reward_fault)

#########################################################  Train ML - Functional fault predictor #############


    X_train_f, X_test_f, y_train_f, y_test_f = train_test_split(data1_x_b, data1_y_f_b, test_size=0.33, random_state=42)
    RF_FF_1rep = RandomForestClassifier(random_state=0, class_weight='balanced')
    RF_FF_1rep.fit(X_train_f,y_train_f)
    
    print('len population', len(test))
    start_state_ep1 = teststate
    ep1 = test
    population = []
    for i in range(0, 1500):  # size of the initial population is 1500
        #if len(ep1[i]) < 400:
            #continue
        cd = Candidate(ep1[i])
        cd.set_start_state(start_state_ep1[i])
        population.append(cd)
    archive1 = []
    second_arch1 = []
    generations = []  # all of the episodes generated during the search
    run(0, population, archive1, second_arch1, generations)
    with open(
            f'D:\\code\\Results\\May17_arch1_r110_rt400_population1500lastfull_run{Run_number}_{s}.pickle',
            'wb') as file:
        pickle.dump(archive1, file)
    with open(
           f'D:\\code\\Results\\May17_second_arch1_r110_rt400_population1500lastfull_run{Run_number}_{s}.pickle',
            'wb') as file:
        pickle.dump(second_arch1, file)
    with open(
             f'D:\\code\\Results\\May17_generations_r110_rt400_population1500lastfull_run{Run_number}_{s}.pickle',
            'wb') as file:
        pickle.dump(generations, file)
    mutation_number_update( f'D:\\code\\Results\\Mutation_number_run{Run_number}.pickle', MUTATION_NUMBER,
                           s)

Using cuda device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.
[33mepoch s1 begin[0m
Abstract Level: 500
abstract states: 1
Concrete states 1499815
ratio 6.667488990308804e-07
len population 10732
initial population  <class 'list'> 1500
[0.33198863 0.33268684 0.3353245 ]
[0.33186656 0.33271712 0.33541626]
[0.33170342 0.33273324 0.33556336]
[0.33118692 0.33285964 0.3359535 ]
[0.33027557 0.33299842 0.33672598]
[0.32944345 0.33307016 0.3374864 ]
[0.32873917 0.33312196 0.33813882]
[0.32820165 0.3331443  0.33865407]
[0.32785085 0.33313513 0.33901405]
[0.3272747  0.33316693 0.3395584 ]
[0.32616228 0.33329147 0.34054622]
[0.32529244 0.33337557 0.34133196]
[0.32471198 0.33341342 0.34187457]
[0.32445312 0.33340186 0.34214506]
[0.32455927 0.33331776 0.34212297]
[0.32444432 0.33323482 0.34232086]
[0.32389823 0.33322093 0.34288087]
[0.3234976  0.33318883 0.34331363]
[0.3232512  0.33313885 0.34360996]
[0.32316285 0.33307207 0.34376514]
[0.32321504 0.33300558 0.3437

KeyboardInterrupt: 