In [1]:
# Import necessary libraries and packages
import numpy as np
import gymnasium as gym
import random

In [2]:
# Load the ROM for Atari Tennis
from ale_py import ALEInterface
from ale_py.roms import Tennis
ale = ALEInterface()
ale.loadROM(Tennis)

In [3]:
# Hyperparameters and training parameters
hyperparameters = {
    "alpha": 0.1,               # Learning Rate
    "epsilon": 1.0,             # Exploration Factor (initial)
    "gamma": 0.99,              # Discount Factor
    "training_episodes": 5,    # Number of training episodes
    "max_steps": 100,           # Maximum steps per episode
}

In [4]:
# Initialize the environment
env = gym.make('ALE/Tennis-v5',full_action_space=True)
env.reset() 

(array([[[  0,   0,   0],
         [  0,   0,   0],
         [  0,   0,   0],
         ...,
         [  0,   0,   0],
         [  0,   0,   0],
         [  0,   0,   0]],
 
        [[  0,   0,   0],
         [  0,   0,   0],
         [  0,   0,   0],
         ...,
         [  0,   0,   0],
         [  0,   0,   0],
         [  0,   0,   0]],
 
        [[  0,   0,   0],
         [  0,   0,   0],
         [  0,   0,   0],
         ...,
         [  0,   0,   0],
         [  0,   0,   0],
         [  0,   0,   0]],
 
        ...,
 
        [[  0,   0,   0],
         [  0,   0,   0],
         [  0,   0,   0],
         ...,
         [ 82, 126,  45],
         [ 82, 126,  45],
         [ 82, 126,  45]],
 
        [[  0,   0,   0],
         [  0,   0,   0],
         [  0,   0,   0],
         ...,
         [ 82, 126,  45],
         [ 82, 126,  45],
         [ 82, 126,  45]],
 
        [[  0,   0,   0],
         [  0,   0,   0],
         [  0,   0,   0],
         ...,
         [ 82, 126,  45],
  

In [5]:
actions = env.action_space.n 
height, width, channels = env.observation_space.shape

In [6]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Flatten, Conv2D

def build_model(height, width, channels, actions):
    model = Sequential()
    model.add(Conv2D(32, (8,8), strides=(4,4), activation='relu', input_shape=(height,width,channels)))
    model.add(Conv2D(64, (4,4), strides=(2,2), activation='relu'))
    model.add(Conv2D(64, (3,3), activation='relu'))
    model.add(Flatten())
    model.add(Dense(512,activation='relu'))
    model.add(Dense(256,activation='relu'))
    model.add(Dense(actions,activation='linear'))
    return model

In [7]:
model = build_model(height, width, channels, actions)
model.summary()

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [8]:
from tensorflow.keras.optimizers import Adam

class DQNAgent:
    def __init__(self, model, actions, learning_rate=0.1, epsilon_start=1.0, epsilon_min=0.1, epsilon_decay=0.995):
        self.model = model
        self.actions = actions
        self.learning_rate = learning_rate
        self.epsilon = epsilon_start
        self.epsilon_min = epsilon_min
        self.epsilon_decay = epsilon_decay
        self.optimizer = Adam()
        self.model.compile(optimizer=self.optimizer, loss='mse')

    def act(self, state):
        if np.random.rand() <= self.epsilon:
            return np.random.choice(self.actions)
        else:
            q_values = self.model.predict(state)
            return np.argmax(q_values[0])

    def train(self, state, action, next_state, reward, done):
        target = reward
        if not done:
            target = reward + self.epsilon_min * np.max(self.model.predict(next_state)[0])
        target_f = self.model.predict(state)
        target_f[0][action] = target
        self.model.fit(state, target_f, epochs=1, verbose=0)
        if self.epsilon > self.epsilon_min:
            self.epsilon *= self.epsilon_decay

def build_agent(model):
    agent = DQNAgent(model=model, actions=actions)
    return agent

In [9]:
agent = build_agent(model)

In [19]:
from tensorflow.keras.models import save_model

# Training loop
episodes = 1
save_interval = 1
for episode in range(episodes):
    state_tuple = env.reset()
    observation = state_tuple[0]  # Extract the observation from the tuple
    state = np.expand_dims(observation, axis=0)  # Add batch dimension
    total_reward = 0
    done = False
    while not done:
        action = agent.act(state)
        observations = env.step(action)
        next_state = observations[0]
        reward = observations[1]
        done = observations[2]
        next_state = np.expand_dims(next_state, axis=0)  # Add batch dimension
        agent.train(state, action, next_state, reward, done)
        state = next_state
        total_reward += reward
    print(f"Episode: {episode + 1}, Total Reward: {total_reward}")

    # Save the model weights every save_interval episodes
    if (episode + 1) % save_interval == 0:
        model.save("agent_model.keras")

env.close()


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 18ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 20ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 17ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 24ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 17ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 19ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 17ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 17ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 17ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 17ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 18ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 18ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 17ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 18

In [21]:
from tensorflow.keras.models import load_model

# Load the saved model
saved_model_path = "agent_model.keras"  
loaded_model = load_model(saved_model_path)

# Create a new agent with the loaded model
testing_agent = build_agent(loaded_model)

env = gym.make('ALE/Tennis-v5',full_action_space=True, render_mode="human")
env.reset() 

# Testing loop
num_episodes = 1  # Number of episodes for testing
for episode in range(episodes):
    state_tuple = env.reset()
    observation = state_tuple[0]  # Extract the observation from the tuple
    state = np.expand_dims(observation, axis=0)  # Add batch dimension
    total_reward = 0
    done = False
    while not done:
        env.render()
        action = testing_agent.act(state)
        observations = env.step(action)
        next_state = observations[0]
        reward = observations[1]
        done = observations[2]
        next_state = np.expand_dims(next_state, axis=0)  # Add batch dimension
        state = next_state
        total_reward += reward
    print(f"Episode: {episode + 1}, Total Reward: {total_reward}")
env.close()


  saveable.load_own_variables(weights_store.get(inner_path))
  logger.warn(


Episode: 1, Total Reward: -24.0


: 

In [12]:
# Randomized Algorithm (Low Performance) -- For Comparison Purposes

# env = gym.make('ALE/Tennis-v5',full_action_space=True,render_mode='human')
# env.reset() 
# def random_agent(hyperparameters):
#     episodes = hyperparameters["training_episodes"]

#     for episode in range(1, episodes+1):
#         state = env.reset()
#         done = False
#         score = 0 

#         while not done:
#             env.render()
#             action = env.action_space.sample()
#             observations = env.step(action)
#             next_state = observations[0]
#             reward = observations[1]
#             done = observations[2]
#             score+=reward
#         print('Episode:{} Score:{}'.format(episode, score))
#     env.close()

# random_agent(hyperparameters)