First we need to import used libraries:

In [379]:
import gym, itertools, pandas as pd

We generate two lists - one called ruleset which contains each possible combiantion of 16 bits and another called observation_space, where we keep combinations of 4 bits which are the dimensions of observation vector. We store them both in separate files so we don't have to generate them each time.

In [380]:
lst = list(itertools.product([0, 1], repeat=16))
lst = ["".join(str(seq)).replace(',','').replace(' ','').replace('(','').replace(')','') for seq in lst]

df = pd.DataFrame(lst)
df = df.astype(str)
df.to_csv('ruleset_combinations.csv', index=False)


lst2 = list(itertools.product([0, 1], repeat=4))
lst2 = ["".join(str(seq)).replace(',','').replace(' ','').replace('(','').replace(')','') for seq in lst2]

df2 = pd.DataFrame(lst2)
df2 = df2.astype(str)
df2.to_csv('observation_space.csv', index=False)

Then we can create a list of rulesets based on two files mentioned above.

In [381]:
df = pd.read_csv(r'observation_space.csv', dtype=str)
df2 = pd.read_csv(r'ruleset_combinations.csv', dtype=str)

observations = df.values.tolist()
rulesets = df2.values.tolist()

list_rulesets = []

for ruleset in rulesets:
    dict = {}
    rule = "".join(ruleset)
    for i in range(len(observations)):
        dict[observations[i][0]] = rule[i]
    list_rulesets.append(dict)

df3 = pd.DataFrame(list_rulesets)
df3 = df3.astype(str)
df3.to_csv('list_rulesets.csv', index=False)

We define function action to behave accrodingly to our ruleset:

In [382]:
def action(observation, ruleset):
    cart_position = observation[0]
    cart_velocity = observation[1]
    pole_angle = observation[2]
    pole_angular_velocity = observation[3]

    cart_position = 1 if cart_position >= 0 else 0
    cart_velocity = 1 if cart_velocity >= 0 else 0
    pole_angle = 1 if pole_angle >= 0 else 0
    pole_angular_velocity = 1 if pole_angular_velocity >= 0 else 0

    key = str(cart_position) + str(cart_velocity) + str(pole_angle) + str(pole_angular_velocity)
    return int(ruleset[key])

Then we run the simulation of CartPole for each possible ruleset. Results are saved into a file for easier readability.

In [383]:
env = gym.make("CartPole-v1", render_mode="human")
observation, info = env.reset()

lst_return = []
score = 0

for ruleset in list_rulesets:
    dict_return = {}
    maxscore = 0
    dict_return["Ruleset"] = rulesets[list_rulesets.index(ruleset)]
    for episode in range(1000):
        observation, reward, terminated, truncated, info = env.step(action(observation, ruleset))
        score += reward
        
        if terminated or truncated:
            observation, info = env.reset()
            if score > maxscore:
                maxscore = score 
            score = 0
    dict_return["Maxscore"] = int(maxscore)
    lst_return.append(dict_return)

env.close()

df_final = pd.DataFrame(lst_return)
df_final = df_final.astype(str)
df_final.to_csv('results.csv', index=False, header=False, sep=';')