In [None]:
!pip install ale
!pip install atari-py
!pip install gym[atari]==0.15.7
!pip install autorom
!pip install autorom[accept-rom-license]
!pip install keras
!pip install keras-rl2
!pip install tensorflow-gpu
!pip install tensorflow

In [None]:
#from rl.agents import DQNAgent
#from rl.memory import SequentialMemory
#from rl.policy import LinearAnnealedPolicy, EpsGreedyQPolicy

#dqn.load_weights('SavedWeights/5h-Fast/dqn_weights.h5f')
#scores = dqn.test(env, nb_episodes=1, visualize=True)
#print(np.mean(scores.history['episode_reward']))

In [None]:
#NON LOOPED CELL
import tensorflow as tf
import gym
import random
import sys
import os
import numpy as np 
import keras
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Flatten, Convolution2D
from tensorflow.keras.optimizers import Adam
from rl.agents import DQNAgent
from rl.memory import SequentialMemory
from rl.policy import LinearAnnealedPolicy, EpsGreedyQPolicy
from rl.callbacks import FileLogger, ModelIntervalCheckpoint

print("Num GPUs Available: ", len(tf.config.list_physical_devices()))

if tf.test.gpu_device_name(): 

    print('Default GPU Device:{}'.format(tf.test.gpu_device_name()))

     
else:

   print("Please install GPU version of TF")

def build_model(height, width, channels, actions):
    model = Sequential()
    model.add(Convolution2D(32, (8,8), strides=(4,4), activation='relu', input_shape=(3,height, width, channels)))
    model.add(Convolution2D(64, (4,4), strides=(2,2), activation='relu'))
    model.add(Convolution2D(64, (3,3), activation='relu'))
    model.add(Flatten())
    model.add(Dense(512, activation='relu'))
    model.add(Dense(256, activation='relu'))
    model.add(Dense(actions, activation='linear'))
    return model



def build_agent(model, actions):
    policy = LinearAnnealedPolicy(EpsGreedyQPolicy(), nb_steps = 1000, attr='eps', value_max=1., value_min=.1, value_test=.2)
    memory = SequentialMemory(limit=1000, window_length=3)
    dqn = DQNAgent(model=model, memory=memory, policy=policy
                   ,enable_dueling_network=True
                   ,dueling_type='avg'
                   ,nb_actions=actions
                   ,nb_steps_warmup=10000
                  )
    return dqn



env = gym.make('Freeway-v0')
height, width, channels = env.observation_space.shape

actions = env.action_space.n
model = build_model(height, width, channels, actions)

weights_filename = 'dqn_{}_weights.h5f'
checkpoint_weights_filename = 'dqn_' + 'freeway' + '_weights_{step}.h5f'
log_filename = 'dqn_{}_log.json'
callbacks = [ModelIntervalCheckpoint(checkpoint_weights_filename, interval=100)]
callbacks += [FileLogger(log_filename, interval=10)]

dqn = build_agent(model,actions)
dqn.compile(optimizer = Adam(learning_rate=1),metrics=["mae"])
his = dqn.fit(env, nb_steps=10000, visualize=False, verbose=1)

scores = dqn.test(env, nb_episodes=1, visualize=True)

import pandas as pd
import matplotlib.pyplot as plt

#dqn.save_weights('SavedWeights/50-Fast/dqn_weights.h5f')

In [None]:
dqn.save_weights('SavedWeights/1000-10000-11000-lr1e-19Fast/dqn_weights.h5f')

In [None]:
scores = dqn.test(env, nb_episodes=10, visualize=True)

In [None]:

pd.DataFrame(his.history).plot(figsize=(8, 3))
plt.grid(True)
plt.gca().set_ylim(0, 3) # set the vertical range to [0-1]
plt.show()

plt.plot(his.history['nb_steps'], his.history['episode_reward'])
plt.ylabel('Reward')
plt.xlabel('Step')
plt.title("Loss/Step")
plt.show()
