<a href="https://colab.research.google.com/github/Federico6419/MachineLearningProject/blob/main/Testing.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
!pip install gymnasium              #Install Gymnasium
!pip install swig                   #This solves the error in the installation of gymnasium[box2d]
!pip install gymnasium[box2d]       #Install Box2D
!pip install gym-notebook-wrapper   #This installs Gym-Notebook-Wrapper, that provides small wrappers for running and rendering OpenAI Gym

#To solve the xvfb missing file problem
!sudo apt-get install xvfb
!pip install xvfbwrapper


Collecting gymnasium
  Downloading gymnasium-0.29.1-py3-none-any.whl (953 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m953.9/953.9 kB[0m [31m8.7 MB/s[0m eta [36m0:00:00[0m
Collecting farama-notifications>=0.0.1 (from gymnasium)
  Downloading Farama_Notifications-0.0.4-py3-none-any.whl (2.5 kB)
Installing collected packages: farama-notifications, gymnasium
Successfully installed farama-notifications-0.0.4 gymnasium-0.29.1
Collecting swig
  Downloading swig-4.1.1.post1-py2.py3-none-manylinux_2_5_x86_64.manylinux1_x86_64.whl (1.8 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.8/1.8 MB[0m [31m6.1 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: swig
Successfully installed swig-4.1.1.post1
Collecting box2d-py==2.3.5 (from gymnasium[box2d])
  Downloading box2d-py-2.3.5.tar.gz (374 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m374.4/374.4 kB[0m [31m4.4 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing

In [2]:
!git clone https://github.com/Federico6419/MachineLearningProject               #It clones my github repository
%cd MachineLearningProject

Cloning into 'MachineLearningProject'...
remote: Enumerating objects: 449, done.[K
remote: Counting objects: 100% (40/40), done.[K
remote: Compressing objects: 100% (33/33), done.[K
remote: Total 449 (delta 19), reused 6 (delta 6), pack-reused 409[K
Receiving objects: 100% (449/449), 4.67 MiB | 9.37 MiB/s, done.
Resolving deltas: 100% (268/268), done.
/content/MachineLearningProject


In [18]:
import gym
import numpy as np
import random
import torch
import torch.nn as nn
import torch.optim as optim
from torch import Tensor
from collections import deque
import config_test
from model import Model
import matplotlib.pyplot as plt
import cv2
import csv

#Average rewards
average_random = np.zeros(config_test.NUM_EPISODES)
average_QTable = np.zeros(config_test.NUM_EPISODES)
average_nn = np.zeros(config_test.NUM_EPISODES)

#Cumulative reward
cum_reward_random = np.zeros(config_test.NUM_EPISODES)
cum_reward_QTable = np.zeros(config_test.NUM_EPISODES)
cum_reward_nn = np.zeros(config_test.NUM_EPISODES)

#Action Space
action_space = [
                (-1, 1, 0.2), (0, 1, 0.2), (1, 1, 0.2),
                (-1, 1,   0), (0, 1,   0), (1, 1,   0),
                (-1, 0, 0.2), (0, 0, 0.2), (1, 0, 0.2),
                (-1, 0,   0), (0, 0,   0), (1, 0,   0)
              ]

#This is the function that discretizes the image
def discretize_image(image):
    image = image.unsqueeze(0)

    #Execute the 3x3 Average Pooling
    pooling = nn.AvgPool2d(8, stride=8)
    image = pooling(image)
    image = image.squeeze(0)

    #Create the image identifier
    string = "S"
    for i, x in enumerate(image.numpy()):
        for j in x:
            if(j < 30):
                string = string + "0"
            elif(j < 60):
                string = string + "1"
            elif(j < 90):
                string = string + "2"
            elif(j < 120):
                string = string + "3"
            elif(j < 150):
                string = string + "4"
            elif(j < 180):
                string = string + "5"
            elif(j < 210):
                string = string + "6"
            elif(j < 256):
                string = string + "7"

    return string


#Initialize the Model and load weights
model = Model().to(config_test.DEVICE)
optimizer = optim.Adam(model.parameters(), lr=config_test.LR)
config_test.load_model(config_test.CHECKPOINT_FOLDER, model, optimizer)

#Load the Q-Table csv file
Q_Table = {}
with open('QTable/QTable.csv', 'r') as csvfile:
    csvreader = csv.reader(csvfile)
    headers = next(csvreader)  # Leggi la prima riga come intestazione/chiavi
    for header in headers:
        Q_Table[header] = []  # Inizializza le liste vuote per ogni colonna

    for row in csvreader:
        for col_idx, value in enumerate(row):
            Q_Table[headers[col_idx]].append(value)


#Policies
def policy(p, state):
    if(p == "Random"):
        return action_space[random.randrange(len(action_space))]          #Sample a random action from the Action Space

    elif(p == "Q-Table"):
        if state in Q_Table:
            return action_space[np.argmax(Q_Table[state])]                #Return the action with the highest value for the current state in the Q-Table
        else:
            return (0, 1, 0)

    elif(p == "nn"):
        prediction = model(torch.from_numpy(state.astype('float32')).to(config_test.DEVICE)).detach().cpu().numpy()
        action = action_space[np.argmax(prediction)]              #Select the action with the maximum predicted Q-Value
        return action


#Initialize the Car Racing Environment
env = gym.make("CarRacing-v2", render_mode="human")
observation = env.reset()

#Testing
def test(strategy):
    c_reward = 0                #Cumulative reward
    total_reward = 0
    episode = 0
    for i in range(config_test.NUM_EPISODES):
            observation = env.reset()

            observation = cv2.cvtColor(observation, cv2.COLOR_BGR2GRAY)           #Convert the state into a Grayscale Image, that is a Matrix 96x96 composed by Integer values

            if(strategy == "Neural Network policy"):
                frames_queue = deque([observation]*3, maxlen = 3)
            elif(strategy == "Q-Table policy"):
                #Discretize the image
                #observation = torch.from_numpy(observation.astype('float32'))
                #observation = discretize_image(observation)
                print("Q-TABLE POLICY")
            else:
                print("RANDOM POLICY")

            for _ in range(250):
                env.render()

                if(strategy == "Random policy"):
                    action = policy("Random", observation)
                elif(strategy == "Q-Table policy"):
                    observation = torch.from_numpy(observation.astype('float32'))
                    observation = discretize_image(observation)
                    action = policy("Q-Table", observation)
                elif(strategy == "Neural Network policy"):
                    current_frame = np.array(frames_queue)
                    action = policy("nn", current_frame)

                observation, reward, done, truncated = env.step(action)
                c_reward += reward

                observation = cv2.cvtColor(observation, cv2.COLOR_BGR2GRAY)

                if(strategy == "Neural Network policy"):
                    frames_queue.append(observation)

                if done or truncated:
                    total_reward += c_reward
                    break

            if(strategy == "Random policy"):
                cum_reward_random[episode] = c_reward # save the cum_reward for the relative episode
                average_random[episode]=total_reward/(episode+1)
            elif(strategy == "Q-Table policy"):
                cum_reward_QTable[episode] = c_reward
                average_QTable[episode]=total_reward/(episode+1)
            elif(strategy == "Neural Network policy"):
                cum_reward_nn[episode] = c_reward
                average_nn[episode]=total_reward/(episode+1)

            print("The cumulative reward is:",c_reward)
            c_reward = 0 # reset the current cumulative reward
            print("Episode: ",episode)
            episode += +1

    print(f"Tests for {strategy} finished after {config_test.NUM_EPISODES} episodes")



test("Random policy")
test("Q-Table policy")
test("Neural Network policy")

env.close()

Q-TABLE POLICY
The cumulative reward is: 66.24087591240945
Episode:  0
Q-TABLE POLICY
The cumulative reward is: 9.24657534246563
Episode:  1
Q-TABLE POLICY
The cumulative reward is: 27.287581699346156
Episode:  2
Q-TABLE POLICY
The cumulative reward is: 5.821917808219094
Episode:  3
Q-TABLE POLICY
The cumulative reward is: 10.84229390680985
Episode:  4
Q-TABLE POLICY
The cumulative reward is: 13.461538461538225
Episode:  5
Q-TABLE POLICY
The cumulative reward is: 9.013605442176711
Episode:  6
Q-TABLE POLICY
The cumulative reward is: 6.802120141342645
Episode:  7
Q-TABLE POLICY
The cumulative reward is: -1.4705882352940691
Episode:  8
Q-TABLE POLICY
The cumulative reward is: 13.062283737023993
Episode:  9
Tests for Q-Table policy finished after 10 episodes
The cumulative reward is: 101.24584717608006
Episode:  0
The cumulative reward is: 163.15331010453028
Episode:  1
The cumulative reward is: 149.82517482517545
Episode:  2
The cumulative reward is: 156.81818181818247
Episode:  3
The cu

KeyboardInterrupt: ignored