<a href="https://colab.research.google.com/github/Federico6419/ProjectNeuralNetworks/blob/main/MachineLearningProject.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## Install libraries


In [6]:
!pip install gymnasium
!pip install swig     #This solves the errori in the installation of gymnasium[box2d]
!pip install gymnasium[box2d]
!pip install gym-notebook-wrapper   #This installs Gym-Notebook-Wrapper, that provides small wrappers for running and rendering OpenAI Gym

#To solve the xvfb missing file problem
!sudo apt-get install xvfb
!pip install xvfbwrapper

Reading package lists... Done
Building dependency tree... Done
Reading state information... Done
xvfb is already the newest version (2:21.1.4-2ubuntu1.7~22.04.1).
0 upgraded, 0 newly installed, 0 to remove and 18 not upgraded.


## Import libraries

In [7]:
!git clone https://github.com/Federico6419/MachineLearningProject          #It clones my github repository
%cd MachineLearningProject

import gymnasium as gym
import gnwrapper
import torch

import numpy as np
import matplotlib
import matplotlib.pyplot as plt

import config
from model import Model

from collections import deque

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

Cloning into 'MachineLearningProject'...
remote: Enumerating objects: 31, done.[K
remote: Counting objects: 100% (31/31), done.[K
remote: Compressing objects: 100% (28/28), done.[K
remote: Total 31 (delta 11), reused 0 (delta 0), pack-reused 0[K
Receiving objects: 100% (31/31), 46.16 KiB | 3.30 MiB/s, done.
Resolving deltas: 100% (11/11), done.
/content/MachineLearningProject/MachineLearningProject/MachineLearningProject


# Instantiate environment

In [None]:
env = gym.make("CarRacing-v2", render_mode = "human")
env.action_space.seed(42)


# F

In [8]:
episode_reward = 0
buffer = deque([], config.BUFFER_SIZE)             #Initialize the Queue that contains the past experience
epsilon = config.MAX_EPSILON
alpha = config.ALPHA
decay = config.EPSILON_DECAY

#For the plotting
cum_reward_table = np.zeros(config.NUM_EPISODES)
cum_reward_nn = np.zeros(config.NUM_EPISODES)

#Initialize the Model
model = Model().to(config.DEVICE)

#Initialize the Target Model
target_model = Model().to(config.DEVICE)

optimizer = optim.Adam(model.parameters(), lr=config.LR)
optimizer_target = optim.Adam(target_model.parameters(), lr=config.LR)

huber_loss=nn.HuberLoss(delta=1.0)

# Define the policy to know how chose the action
#Q-Table
def select_action(state, epsilon):
    rv = random.uniform(0, 1)
    if rv < epsilon:
        return env.action_space.sample()
    else:
        return np.argmax(Q[state])

#Neural Network
def select_action_nn(state, epsilon):
    rv = random.uniform(0, 1)
    if rv < epsilon:
        return env.action_space.sample()
    else:
        prediction = model(torch.from_numpy(state)).detach().numpy
        action=np.argmax(prediction) # Select action with max predicted Q-value
        return action


## update the epsilon value along the iteration until converges to MIN_EPSILON
def update_epsilon(epsilon):
    epsilon -= epsilon/100 # reduce epsilon by 1/100
    if epsilon<=config.MIN_EPSILON:
        return config.MIN_EPSILON
    else:
        return epsilon

## update the epsilon every episode by epsilon decay variable
def update_epsilon_nn(epsilon):
    epsilon *= decay
    if epsilon<=config.MIN_EPSILON:
        return config.MIN_EPSILON
    else:
        return epsilon


env = gym.make("CarRacing-v2", render_mode="human")


if(config.use_qtable):
    # define the Q table
    #Q = np.zeros([27684, env.action_space.n]) # little discretization
    Q = np.zeros([19051200, env.action_space.n]) #big discretization

###see the limit of the values of the box observation space
#print(env.observation_space.high)
#print(env.observation_space.low)

###see in more detail the action space and the observation space
#print(env.action_space)
#print(env.observation_space)


if(config.use_qtable): # use a q table to reach the goal
    for i in range(config.NUM_EPISODES):
        observation, info = env.reset()# use seed to have same initial state
        #state = config.discretize(observation)
        state = config.big_discretize(observation)
        for j in range(500):
            action = select_action(state,epsilon)
            obv, reward, done, truncated, info = env.step(action)
            #next_state = config.discretize(obv)
            next_state = config.big_discretize(obv)

            next_max = np.max(Q[next_state])

            Q[state,action] += alpha*(reward+config.GAMMA*next_max-Q[state,action])
            state = next_state

            episode_reward += reward

            if done or truncated:
                break

        print("episode: ", i)
        print("episode cumulative reward : ", episode_reward)
        print("epsilon: ",epsilon)
        epsilon = update_epsilon(epsilon)
        cum_reward_table[i]=episode_reward
        episode_reward = 0 #reset the total reward each episode

    #save the q table for testing
    #np.savetxt('q_table.csv', Q, delimiter=','fmt='%f18')
    #np.savetxt('q_table_little_discretization2000.csv', Q, delimiter=',') # full precision
    np.savetxt('q_table_big_discretization1000.csv', Q, delimiter=',') # full precision

else: #use a nn to approximate the q function
#Use a Neural Network to approximate the q function
    for i in range(config.NUM_EPISODES):
        state, info = env.reset()
        for j in range(500):
            action = select_action_nn(state, epsilon)
            next_state, reward, done, truncated, info = env.step(action)

            episode_reward += reward

            #Remove the oldest item if the queue is full so can add new one
            if len(buffer)>=config.BUFFER_SIZE:
                buffer.popleft() # dequeue oldest item


            buffer.append([*state,action,reward,*next_state,done])

            state = next_state # update current state

            if done or truncated:

                # train NN every 4 episodes and if buffer has at least BATCH_SIZE tuple
                if len(buffer) >= config.BATCH_SIZE and ((i+1) % 4 == 0):
                    batch = random.sample(buffer, config.BATCH_SIZE)
                    dataset = np.array(batch)
                    states = torch.from_numpy((dataset[:,:8]).astype('float32'))
                    actions = torch.from_numpy(dataset[:,8:9].astype('int64'))
                    rewards = torch.from_numpy(dataset[:,9:10].astype('float32'))
                    next_states = torch.from_numpy((dataset[:,10:18]).astype('float32'))
                    dones = torch.from_numpy(dataset[:,18:19].astype('float32'))

                    #-------vanilla dqn------------#

                    """# Find next best action so can compute the next reward for the target
                    #predictions_next = target_model(next_states).detach().max(1)[0].unsqueeze(1)
                    #next_actions=np.argmax(predictions_next) # Select action with max Q-value

                    #Compute corresponding (predicted) reward of next state
                    #next_rewards = predictions_next[next_actions]
                    next_rewards = target_model(next_states).detach().max(1)[0].unsqueeze(1)
                    #-------------------------------#"""
                    #---------double dqn-------------#

                    # Find next best action using model network
                    predictions_next = model(next_states).detach().numpy()
                    next_actions = np.argmax(predictions_next,axis=1) # Select action with max Q-value
                    next_actions =  next_actions[..., np.newaxis]

                    #evaluate Q(s',a') founded by model using the target network
                    next_rewards = target_model(next_states).gather(1, torch.from_numpy(next_actions))
                    #next_rewards = torch.from_numpy(evaluations[next_actions])

                    #-------------------------------#

                    targets = rewards + config.GAMMA_NN*next_rewards*(1-dones)

                    #compute the predicted value of the model(output)
                    output=model(states).gather(1, actions)
                    #compute the huber loss
                    loss = huber_loss(output, targets)
                    #Train network
                    optimizer.zero_grad()#clear existing gradient
                    loss.backward() #backpropagate the error
                    optimizer.step() # update weights
                    #save the weight of the network
                    config.save_model(model,optimizer,i+1)
                    print("Save weigths in: "+ config.CHECKPOINT)
                    epsilon = update_epsilon_nn(epsilon)

                #update weights of target network every 10 episodes
                if  (i+1) % config.TARGET_FREQ_UPDATE == 0:
                    print("Target network updated")
                    config.load_model(config.CHECKPOINT,target_model,optimizer_target)

                print("episode ", i)
                print("episode cumulative reward: ", episode_reward)
                print("current epsilon: ", epsilon)
                print("#---------------------------------------------#")
                break

        cum_reward_nn[i]=episode_reward
        episode_reward = 0


env.close()

NameError: ignored

## Example

In [None]:
env = gnwrapper.Animation(CarRacing())
env = CarRacing(render_mode="rgb_array")
#env = CarRacing(render_mode="state_pixels")
#env = gym.make("CarRacing-v2", domain_randomize=True, render_mode="state_pixels")

env.reset()
#env.render()
#im = env.render()
#im = env.render("state_pixels")

#plt.imshow(im)

for i in range(40):
  env.step(action=[-0.3,1,0])
  im = env.render()
  plt.imshow(im)

"""
def state_image_preprocess(state_image):
    state_image = state_image.transpose((2,0,1))
    state_image = np.ascontiguousarray(state_image, dtype=np.float32) / 255
    state_image = torch.from_numpy(state_image)
    return state_image.unsqueeze(0).to(device)

state_image_preprocess(im).shape
plt.imshow(state_image_preprocess(im).cpu().squeeze(0).permute(1, 2, 0).numpy())
"""

  and should_run_async(code)


NameError: ignored