In [1]:
# Find RL_Note path and append sys path
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2' 

import random
import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt
from dqn_agent import DQNAgent
from env_config import env_configs
from gym_wrapper import GymWrapper
from utils import ImageFeaturization

gpus = tf.config.experimental.list_physical_devices('GPU')
if gpus:
  try:
    tf.config.experimental.set_virtual_device_configuration(gpus[0], [tf.config.experimental.VirtualDeviceConfiguration(memory_limit=1024)])
  except RuntimeError as e:
    print(e)

In [2]:
lists = (
            ('DQN','ER'),\
        )
print('Batch list : ',lists)

scores_avg, scores_raw, episodes, losses, epsilons = [], [], [], [], []
def save_statistics():
    # View data
    plt.clf()
    plt.subplot(311)
    plt.plot(scores_avg, 'b')
    plt.plot(scores_raw, 'b', alpha=0.8, linewidth=0.5)
    plt.xlabel('Episodes'); plt.ylabel('average score'); plt.grid()
    plt.title(FILENAME)
    plt.subplot(312)
    plt.plot(epsilons, 'b')
    plt.xlabel('Episodes'); plt.ylabel('epsilon'); plt.grid()
    plt.subplot(313)
    plt.plot(losses, 'b')
    plt.xlabel('Episodes'); plt.ylabel('losses') ;plt.grid()
    plt.savefig(FILENAME + "_TF.jpg", dpi=100)

if __name__ == "__main__":
    for item in lists:
        STATE_TYPE = 'IMG'
        # STATE_TYPE = 'MLP'
        cfg = {\
                "ENV":{
                    "NAME":"CartPole-v1",
                    # "IMG_SIZE":(240,160,4),
                    "IMG_SIZE":(120,90,4),
                    "STATE_TYPE":STATE_TYPE,
                    "IMG_TYPE":"GRAY",
                    # "IMG_TYPE":"RGB",
                    },
                "RL":{
                    "ALGORITHM":'DQN',
                    "STATE_TYPE":STATE_TYPE,
                    "NETWORK":{
                        "LAYER":[128,128],
                    },
                    "ER":
                        {
                            "ALGORITHM":'ER',
                        },
                    "BATCH_SIZE":64,
                    "TRAIN_START":500,
                    "MEMORY_SIZE":20000,
                    },
                "ADD_NAME":STATE_TYPE,
                }
        env_config = env_configs[cfg["ENV"]["NAME"]]
        FILENAME = cfg["ENV"]["NAME"] + '_' + cfg["RL"]["ALGORITHM"] + '_' + cfg["RL"]["ER"]["ALGORITHM"]
        if cfg['RL']["ER"]["ALGORITHM"] == "HER":
            FILENAME = FILENAME + '_' + cfg["ER"]["STRATEGY"]
        FILENAME = FILENAME + '_' + cfg["ADD_NAME"]
        EPISODES = env_config["EPISODES"]
        END_SCORE = env_config["END_SCORE"]

        env = GymWrapper(cfg=cfg['ENV'])

        if cfg["RL"]["ALGORITHM"] == "DQN":
            agent = DQNAgent(env, cfg)
        # elif cfg["RL"]["ALGORITHM"] == "MDQN":
        #     agent = MDQNAgent(env, cfg)
        image_featurization = ImageFeaturization(data_format = 'last', img_size=cfg['ENV']['IMG_SIZE'][0:2])
        plt.clf()
        figure = plt.gcf()
        figure.set_size_inches(8,6)

        save_freq = 10; save_idx = 0
        score_avg = 0
        end = False
        show_media_info = True
        goal = (0.5,0.0)
        
        for e in range(EPISODES):
            # Episode initialization
            done = False
            score = 0
            loss_list = []
            image = env.reset()
            state, is_enough = image_featurization(image)
            while not done:
                # env.render()
                # Interact with env.
                if is_enough == True:
                    action = agent.get_action(state)
                else:
                    action = random.randrange(env.env.action_space.n)
                # action = random.randrange(env.action_space.n)
                image, reward, done, info = env.step(action)
                # print(np.shape(state))
                # print(np.shape(image))
                next_state, is_enough = image_featurization(image)
                agent.remember(state, action, reward, next_state, done, goal)
                loss = agent.train_model()
                agent.update_network(done)
                state = next_state
                # 
                score += reward
                loss_list.append(loss)
                # break
                if show_media_info:
                    print("-------------- Variable shapes --------------")
                    print("State Shape : ", np.shape(state))
                    print("Action Shape : ", np.shape(action))
                    print("Reward Shape : ", np.shape(reward))
                    print("done Shape : ", np.shape(done))
                    print("---------------------------------------------")
                    if cfg['ENV']['STATE_TYPE'] == "IMG":
                        plt.imshow(np.squeeze(image,axis=2),cmap='gray')
                        # plt.imshow(state)
                    show_media_info = False
                if done == True:
                    score_avg = 0.9 * score_avg + 0.1 * score if score_avg != 0 else score
                    print("episode: {0:3d} | score avg: {1:3.2f} | mem size {2:6d} |"
                        .format(e, score_avg, len(agent.memory)))

                    # episodes.append(e)
                    scores_avg.append(score_avg)
                    scores_raw.append(score)
                    losses.append(np.mean(loss_list))
                    epsilons.append(agent.epsilon)
                    save_idx+=1
                    if save_idx % save_freq == 0:
                        save_statistics()
                    # 이동 평균이 0 이상일 때 종료
                    if score_avg > END_SCORE:
                        agent.save_model("")
                        save_statistics()
                        end = True
                        break
            if end == True:
                env.close()
                print("End")
                break

Batch list :  (('DQN', 'ER'),)
CartPole-v1_DQN_ER_IMG
States (120, 90, 4), Actions 2
feature  (120, 90, 4)
(120, 90, 4)
(90, 120, 1)
-------------- Variable shapes --------------
State Shape :  (120, 90, 4)
Action Shape :  ()
Reward Shape :  ()
done Shape :  ()
---------------------------------------------
(120, 90, 4)
(90, 120, 1)
(120, 90, 4)
(90, 120, 1)
(90, 120, 4)
(90, 120, 1)
(120, 90, 4)
(90, 120, 1)
(120, 90, 4)
(90, 120, 1)
(120, 90, 4)
(90, 120, 1)
(120, 90, 4)
(90, 120, 1)
(120, 90, 4)
(90, 120, 1)
(120, 90, 4)
(90, 120, 1)
(120, 90, 4)
(90, 120, 1)
(120, 90, 4)
(90, 120, 1)
(120, 90, 4)
(90, 120, 1)
episode:   0 | score avg: 13.00 | mem size     13 |
(120, 90, 4)
(90, 120, 1)
(120, 90, 4)
(90, 120, 1)
(120, 90, 4)
(90, 120, 1)
(120, 90, 4)
(90, 120, 1)
(120, 90, 4)
(90, 120, 1)
(120, 90, 4)
(90, 120, 1)
(120, 90, 4)
(90, 120, 1)
(120, 90, 4)
(90, 120, 1)
(120, 90, 4)
(90, 120, 1)
(120, 90, 4)
(90, 120, 1)
(120, 90, 4)
(90, 120, 1)
(120, 90, 4)
(90, 120, 1)
episode:   1 | s

ValueError: Failed to convert a NumPy array to a Tensor (Unsupported object type numpy.ndarray).