In [1]:
from cartpole1 import QLearnCartPoleSolver
from collections import deque
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.optimizers import Adam
import math

import numpy as np
import gym
import random

class DQNQLearnCartPoleSolver(QLearnCartPoleSolver):

    def __init__(self, env,  episodes, epsilon_decay_rate=0.995):
        super().__init__(env, episodes=episodes, min_epsilon=0.001)
        self.memory = deque(maxlen=100000)
        self.epsilon_decay_rate = epsilon_decay_rate
        self.epsilon = 1
        self.batch_size = 64
        self.lr = 0.01
        self.model = Sequential()
        self.model.add(Dense(24,input_dim=4,activation='relu'))
        self.model.add(Dense(48, activation='relu'))
        self.model.add(Dense(2,activation='relu'))
        self.model.compile(loss='mse', optimizer=Adam(lr=self.lr, decay=self.lr))
    

    def action(self, state, epsilon):
        return self.env.action_space.sample() if np.random.random() <= epsilon else np.argmax(self.model.predict(state))

    def remember(self, state, action, reward, next_state, done):
        self.memory.append((state, action, reward, next_state, done))

    def updated_q_value(self, state, action, reward, new_state):
        return (reward + self.discount * np.max(self.model.predict(new_state)[0]))
    
    def get_epsilon(self,t):
        return max(self.min_epsilon, min(self.epsilon,1.0 - math.log10((t+1)*self.epsilon_decay_rate)))

    def preprocess_state(self, state):
        return np.reshape(state, [1, 4])    

    def replay(self):
        x_batch, y_batch = [], []
        minibatch = random.sample(self.memory, min(len(self.memory), self.batch_size))
        for state, action, reward, next_state, done in minibatch:
            y_target = self.model.predict(state)
            y_target[0][action] = reward if done else self.updated_q_value(state, action, reward, next_state)
            x_batch.append(state[0])
            y_batch.append(y_target[0])
        
        self.model.fit(np.array(x_batch), np.array(y_batch), batch_size=len(x_batch), verbose=0)
        if self.epsilon > self.min_epsilon:
            self.epsilon *= self.epsilon_decay_rate

    def train(self):
        scores = []
        for episode in range(self.episodes):
            state = self.preprocess_state(self.env.reset())
            done = False
            reward_current_ep = 0
            step = 1
            while not done:
                self.env.render()
                action = self.action(state, self.get_epsilon(episode))
                next_state, reward, done, _ = self.env.step(action) 
                next_state =  self.preprocess_state(next_state)
                self.remember(state, action, reward, next_state, done)
                state = next_state
                reward_current_ep += reward
                # print(f"Trainingsession {episode+1}:", step, "steps")
                step +=1
            scores.append(reward_current_ep)
            print(f"{scores[episode]}  score for ep {episode+1} epsilon {self.epsilon}")
            self.replay()
        print('Finished training!')
        self.env.close()

    def run(self):
        done = False
        state = self.preprocess_state(self.env.reset())
        score = 0
        step = 0
        while not done:
            action = self.action(state)
            next_state, reward, done, _ = self.env.step(action) 
            next_state = self.preprocess_state(next_state)
            self.remember(state, action, reward, next_state, done)
            state = next_state
            score += reward
            step +=1
        print(f"score {score}")
        if score == 200: 
            return
        self.env.close()


env = gym.make('CartPole-v0')


model = DQNQLearnCartPoleSolver(env, episodes=100)
model.train()

2021-10-14 12:21:55.775259: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory
2021-10-14 12:21:55.775284: I tensorflow/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine.
2021-10-14 12:21:57.030176: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcuda.so.1'; dlerror: libcuda.so.1: cannot open shared object file: No such file or directory
2021-10-14 12:21:57.030214: W tensorflow/stream_executor/cuda/cuda_driver.cc:269] failed call to cuInit: UNKNOWN ERROR (303)
2021-10-14 12:21:57.030231: I tensorflow/stream_executor/cuda/cuda_diagnostics.cc:156] kernel driver does not appear to be running on this host (PCMAN): /proc/driver/nvidia/version does not exist
2021-10-14 12:21:57.030402: I tensorflow/core/platform/cpu_feature_guar

13.0  score for ep 1 epsilon 1


2021-10-14 12:21:57.801501: I tensorflow/compiler/mlir/mlir_graph_optimization_pass.cc:185] None of the MLIR Optimization Passes are enabled (registered 2)


16.0  score for ep 2 epsilon 0.995
13.0  score for ep 3 epsilon 0.990025
16.0  score for ep 4 epsilon 0.985074875
14.0  score for ep 5 epsilon 0.9801495006250001
8.0  score for ep 6 epsilon 0.9752487531218751
9.0  score for ep 7 epsilon 0.9703725093562657


KeyboardInterrupt: 

In [None]:
model.run()