In [39]:
import pygame
import matplotlib.pyplot as plt
from PIL import Image
import numpy as np
import sys
import os
# sys.path.append('~/Desktop/Tufts-CS/CS138/hw02')
from track import build_track
from agent import Agent
from environment import Environment
# Constants
GRID_SIZE = 32
CELL_SIZE = 20
FPS = 60
GRID_WIDTH = GRID_SIZE * CELL_SIZE
GRID_HEIGHT = GRID_SIZE * CELL_SIZE
BLACK = (0, 0, 0)
WHITE = (255, 255, 255)
TRACK_COLOR = (120, 120, 120)
GRAVEL_COLOR = (255, 255, 255)
FIN_COLOR = (255, 0, 0)
START_COLOR = (0, 255, 0)
CAR_COLOR = (0, 0, 255)
GRAVEL = -1
TRACK = 0
START = 1
FINISH = 2
num_action = 9
# ##build the track
track = build_track()

In [47]:
##train on-policy
race_track = Environment(track)
epsilons = [0.1,0.15,0.2,0.25]
res = []
for e in epsilons:
    agent1 = Agent(epsilon = e)
    episode_len1 = agent1.mc_control(race_track, 1000, on_policy=True)
    res.append(episode_len1)

In [51]:
##train off-policy
race_track = Environment(track)
epsilons = [0.1,0.15,0.2,0.25]
res1 = []
# for e in epsilons:
#     agent1 = Agent(epsilon = e)
#     episode_len1 = agent1.mc_control(race_track, 1000, on_policy=False)
#     res1.append(episode_len1)
episode_len1 = agent1.mc_control(race_track, 1000, on_policy=False)

In [53]:
plt.figure(figsize = (15,10))
plt.style.use('seaborn-darkgrid')
palette = plt.get_cmap('Set1')
for i in range(len(gammas)):
    plt.semilogx(np.arange(1000), res[i], color=palette(i+1), linewidth=2.5, alpha=0.9, label=f'on-policy MC control (epsilon={epsilons[i]})')
plt.ylim(0, 3000)
plt.xlabel('Episode', fontsize=16)
plt.ylabel('Episode Length', fontsize=16)
plt.title('Episode Lengths over Episodes', fontsize=18)

plt.xticks(fontsize=14)
plt.yticks(fontsize=14)
plt.legend(fontsize=14)
plt.grid(True)
plt.savefig("op_t2.png")

In [44]:
plt.figure(figsize = (15,10))
plt.style.use('seaborn-darkgrid')
palette = plt.get_cmap('Set1')
plt.semilogx(np.arange(1000), res1[0], color=palette(2), linewidth=1.5, alpha=0.9, label='off-policy MC control(epsilon=0.1)')
plt.semilogx(np.arange(1000), res[0], color=palette(3), linewidth=1.5, alpha=0.9, label='on-policy MC control(epsilon=0.1)')

plt.ylim(0, 3000)
plt.xlabel('Episode', fontsize=16)
plt.ylabel('Episode Length', fontsize=16)
plt.title('Episode Lengths over Episodes', fontsize=18)

plt.xticks(fontsize=14)
plt.yticks(fontsize=14)
plt.legend(fontsize=14)
plt.grid(True)
plt.savefig("op_t3.png")

In [None]:
##To visualize the tragectory
def draw_grid(screen,grid, path):
        for y in range(GRID_SIZE):
            for x in range(GRID_SIZE):
                if grid[x][y] == START:
                    color = START_COLOR
                    pygame.draw.rect(screen, START_COLOR, (y * CELL_SIZE, x * CELL_SIZE, CELL_SIZE, CELL_SIZE), 0)

                elif grid[x][y] == FINISH:
                    color = FIN_COLOR
                    pygame.draw.rect(screen, FIN_COLOR, (y * CELL_SIZE, x * CELL_SIZE, CELL_SIZE, CELL_SIZE), 0)

                if grid[x][y] == TRACK:
                    color = TRACK_COLOR
                elif grid[x][y] == GRAVEL:
                    color = GRAVEL_COLOR
                pygame.draw.rect(screen, color, (y * CELL_SIZE, x * CELL_SIZE, CELL_SIZE, CELL_SIZE), 1)
        
        #Draw the car
        for state in path:
            pygame.draw.rect(screen, CAR_COLOR, (state[1] * CELL_SIZE, state[0] * CELL_SIZE, CELL_SIZE, CELL_SIZE), 0)

def display(track,path, index):   
    # Initialize Pygame
    pygame.init()      
    # Create a Pygame window
    screen = pygame.display.set_mode((GRID_WIDTH, GRID_HEIGHT))
    pygame.display.set_caption("Race Track")       
    clock = pygame.time.Clock()
    screen.fill(WHITE)

    running = True
    while running:
        for event in pygame.event.get():
            if event.type == pygame.QUIT:
                running = False

        screen.fill(WHITE)  # Clear the screen
        draw_grid(screen, track, path)     # Draw the grid
        pygame.display.flip()
        running = False
        screenshot_dir = "screenshots"  # Directory to save screenshots
        screenshot_name = f"screenshot{index}.png"  # Name of the screenshot file

        # Create the screenshots directory if it doesn't exist
        if not os.path.exists(screenshot_dir):
            os.makedirs(screenshot_dir)

        # Capture the current frame and save it as a screenshot
        pygame.image.save(screen, os.path.join(screenshot_dir, screenshot_name))
        clock.tick(FPS)
    pygame.quit()

In [None]:
##generate off-policy tragectory
def off_generate_tragectory(agent, env):
    agent.state, agent.speed = env.reset()
    episode_states = []
    total_reward = 0

    # Generate an episode using target policy
    while True:
        total_reward += -1
        episode_states.append(agent.state)
#         policy = agent.soft_policy(agent.Q[agent.state])
        action = agent.target_policy[agent.state]
        print(action, agent.actions[action])
        reward, terminated, new_state, new_speed = env.take_action(agent.state, agent.speed, agent.actions[action])
        print(new_state, new_speed)
        if new_state in env.start_set:
            episode_states = []
#             new_speed = [0,1]
        agent.state = new_state
        agent.speed = new_speed

        if terminated:
            episode_states.append(agent.state)
            break
    return episode_states, total_reward

In [None]:
##generate on-policy tragectory
def on_generate_tragectory(agent, env):
    agent.state, agent.speed = env.reset()
    episode_states = []
    total_r = 0

    # Generate an episode using target policy
    while True:
        total_r += -1
        episode_states.append(agent.state)
        policy = agent.soft_policy(agent.Q[agent.state])
        action = np.random.choice(np.arange(agent.num_actions), p=policy)
        # print(action, self.actions[action])
        reward, terminated, new_state, new_speed = env.take_action(agent.state, agent.speed, agent.actions[action])
        if new_state in env.start_set:
            episode_states = []
        # env.display(self.state)                
        agent.state = new_state
        agent.speed = new_speed

        if terminated:
            episode_states.append(agent.state)
            break
    return episode_states, total_r



In [None]:
r_list = []
for i in range(1000):
    path, total_r = off_generate_tragectory(agent, race_track)
    r_list.append(total_r)
    # display(track, path, i)

r_list1 = []
for i in range(1000):
    path, total_r = generate_tragectory(agent1, race_track)
    r_list1.append(total_r)
    # display(track, path, i)

In [None]:
##plot average reward for off-policy
plt.figure(figsize = (15,10))
plt.style.use('seaborn-darkgrid')
plt.plot(np.arange(1000), r_list, label = "reward of each episode")
plt.plot(np.arange(1000), [np.mean(r_list)] * 1000, linewidth=2.5, alpha=0.9, label = "mean rewards")
plt.xlabel('Episode', fontsize=16)
plt.ylabel('Reward of an episode', fontsize=16)
plt.title('Rewards of 1000 random episodes', fontsize=15)
plt.xticks(fontsize=14)
plt.yticks(fontsize=14)
plt.legend(fontsize = 18)
plt.grid(True)
plt.savefig("screenshots/op_r.png")


In [None]:
##plot average reward for on-policy
plt.figure(figsize = (15,10))
plt.style.use('seaborn-darkgrid')
plt.plot(np.arange(1000), r_list1, label = "reward of each episode")
plt.plot(np.arange(1000), [np.mean(r_list1)] * 1000, linewidth=2.5, alpha=0.9, label = "mean rewards")
plt.xlabel('Episode', fontsize=16)
plt.ylabel('Reward of an episode', fontsize=16)
plt.title('Rewards of 1000 random episodes', fontsize=15)
plt.xticks(fontsize=14)
plt.yticks(fontsize=14)
plt.grid(True)
plt.legend(fontsize = 18)
plt.savefig("screenshots/op_r1.png")

In [None]:
# Create a grid of subplots
num_rows = 2  # Number of rows in the grid
num_cols = 5  # Number of columns in the grid
fig, axes = plt.subplots(num_rows, num_cols, figsize=(8, 8))

# Populate the subplots with images
for i in range(0,num_rows,2):
    for j in range(num_cols):
        index = i * num_cols + j  # Calculate the index for accessing image_paths
        # Open the image using PIL
        img = Image.open(f'screenshots/screenshot{index}.png')
        axes[i, j].imshow(img)
        axes[i, j].set_title(f'Path {index//2 + 1}')
        axes[i, j].axis('off')

plt.tight_layout()
plt.subplots_adjust(hspace= -0.5)

# plt.show()
plt.savefig('screenshots/fig1.png')
