In [None]:
reseult = requests.post(reset,headers=headers)
reseult.status_code

In [None]:
# keep track of moves
obsers = []

# Set up the logger
log_filename = "maze_agent_run.log"  

try:
    if os.path.exists(log_filename):
        os.remove(log_filename)
except:
    pass
# Set the logging level for urllib3 to WARNING
urllib3_logger = logging.getLogger("urllib3")
urllib3_logger.setLevel(logging.INFO)

logging.basicConfig(
    level=logging.DEBUG,  # Set the logging level
    format="%(asctime)s - %(name)s - %(levelname)s - %(message)s",  # Format for log messages
    handlers=[
        logging.FileHandler(log_filename),  # Log to a file
        #logging.StreamHandler()  # Log to the console
    ]
    
)


def get_info(response):
    if response.status_code != 200:
        print("Error code at response")
        return None, None, None, None, None

    # Retrieve JSON data from response
    data = response.json()

    # Use .get() method with default to None for each field
    done = data.get('done', None)
    info = data.get('info', None)
    observation = data.get('observation', None)
    reward = data.get('reward', None)
    trunc = data.get('truncated', None)

    # Convert observation to numpy array if it's not None
    if observation is not None:
        observation = np.array(observation, dtype=np.float32)
        obsers.append(observation)  # Assuming obsers is defined elsewhere
    else:
        observation = None

    return done, info, observation, reward, trunc
    
# implement retry policy
@retry(stop=stop_after_attempt(5),wait=wait_exponential(multiplier=1,min=4,max=10))
def make_request(url,headers,data=None):
    if data:
        response = requests.post(url,headers=headers,json=data)
    else:
        response = requests.post(url,headers=headers)
    #raise http error for bad responses
    response.raise_for_status()

    time.sleep(0.1)
    logging.info(f"Action:{url.split('/')[3]} data {data}, obs: {response.json().get('observation')},reward: {response.json().get('reward')} ")
    return response



class TrainingCallback(BaseCallback):
    def __init__(self, verbose=1):
        super(TrainingCallback, self).__init__(verbose)
        self.episode_steps = 0
        self.episode_rewards = 0
        self.episode_lengths = []
        self.episode_rewards_list = []
    
    def _on_step(self) -> bool:
        # Increment step and reward counters for the current episode
        self.episode_steps += 1
        self.episode_rewards += self.locals['rewards']# Assuming rewards is a list

        # Check if the episode is done
        if self.locals['dones'][0]:
            # Log the number of steps and reward for the episode
            self.episode_lengths.append(self.episode_steps)
            self.episode_rewards_list.append(self.episode_rewards)

            logging.info(f"Episode finished - Steps: {self.episode_steps}, Reward: {self.episode_rewards}")

            # Reset for the next episode
            self.episode_steps = 0
            self.episode_rewards = 0
        

        return True

    def _on_rollout_end(self) -> None:
        total_episodes = len(self.episode_lengths)
        if total_episodes > 0:
            avg_episode_length = sum(self.episode_lengths) / total_episodes
            avg_reward_per_episode = sum(self.episode_rewards_list) / total_episodes
            logging.info(f"End of rollout. Total episodes: {total_episodes}, "
                         f"Average episode length: {avg_episode_length}, "
                         f"Average reward per episode: {avg_reward_per_episode}")


class MazeAPIEnv(gym.Env):
    def __init__(self, api_step_url, headers,api_reset):
        super(MazeAPIEnv, self).__init__()
        self.headers = headers = {'Content-Type': 'application/json'}
        self.api_step_url = api_step_url  # URL for the API step endpoint
        self.headers = headers  # Headers for authorization or any other required fields
        self.api_reset = api_reset


        # Define the action and observation space
        self.action_space = spaces.Discrete(4)
        
        self.observation_space = spaces.Box(low=0, high=4, shape=(2,), dtype=np.float32)  # Observation space

        self.current_state = np.array([0.0, 0.0], dtype=np.float32)  # Starting position
        self.done = False
        
        #array to help exploration
        self.visited = set()
        self.min_reward = -1
        self.max_reward = -0.01
        
    def normalize(self, observation):
        """ Normalize the observation to the range [0, 1]. """
        return (observation ) / 10 
    
    def normalize_reward(self,reward):
        return (2*(reward-self.min_reward)/(self.max_reward-self.min_reward))-1
            
    def reset(self,seed=None,**kwargs):
       
        response = make_request(url=self.api_reset, headers=self.headers)

        self.current_state = np.array([0.0, 0.0], dtype=np.float32)
        self.done = False
        self.visited = set()
        
        return self.current_state, {}
        
    def step(self, action):
        # Send the action to the API
        content = {'action': int(action)}
        
        response = make_request(url=self.api_step_url, headers=self.headers, data=content)
    
        if response.status_code !=200 :
            print("error code in step")
        
        # Extract the response data
        done,info,raw_observation,reward,truncated = get_info(response)
        reward = reward+1
        
        
        if tuple(raw_observation) not in self.visited:
            self.visited.add(tuple(raw_observation))
        
        self.current_state = self.normalize(np.array(raw_observation, dtype=np.float32))
        reward = self.normalize_reward(reward)
        # Update current state
        self.done = done
        

        #logging.info(f"Current State = {self.current_state}")      
          
        return self.current_state, reward, done,truncated, {}
    


In [None]:
import matplotlib.pyplot as plt
import numpy as np

def plot_maze_position(position, maze_size=(5, 5)):
    """
    Function to plot the agent's position in a 5x5 maze.
    
    Parameters:
    - position (tuple): A tuple (x, y) representing the agent's position in the maze.
    - maze_size (tuple): The size of the maze (default is (5, 5)).
    """
    # Create a blank 5x5 grid
    maze = np.zeros(maze_size)
    
    # Mark the agent's position with a 1
    maze[position[1], position[0]] = 1  # y is row, x is column
    
    # Plot the maze
    fig, ax = plt.subplots()
    
    # Show the grid lines
    ax.set_xticks(np.arange(-0.5, maze_size[0], 1))
    ax.set_yticks(np.arange(-0.5, maze_size[1], 1))
    ax.grid(True)

    # Show the maze as an image
    ax.imshow(maze, cmap='Blues', origin='upper')

    # Highlight the agent's position
    ax.scatter(position[0], position[1], color='red', s=200, label='Agent Position')

    # Set the ticks and labels
    ax.set_xticklabels(np.arange(0, maze_size[0] + 1))
    ax.set_yticklabels(np.arange(0, maze_size[1] + 1))

    # Title and legend
    ax.set_title(f'Agent Position in {maze_size[0]}x{maze_size[1]} Maze')
    ax.legend(loc='upper right')

    # Invert y-axis to have the origin at the bottom-left corner
    ax.invert_yaxis()
    
    plt.show()


In [None]:
api_new_game = "http://18.185.60.20:5005/new_game"


headers = {'Content-Type': 'application/json'}

response = requests.post(api_new_game, headers=headers)
uuid = response.json().get('uuid')
 

step = "http://18.185.60.20:5005/step/"+uuid  
reset = "http://18.185.60.20:5005/reset/"+uuid



action_seq = [2,1,3,1,1,1,1,1,2,2,2]
response = requests.post(reset,headers=headers)
observation = response.json().get('observation')  
#print(response.text)
print(f"Original observation {observation}")

for action in action_seq:
    
    content = {'action': int(action)}
    response = requests.post(step,headers=headers,json=content)
    done,info,observation,reward,trunc = get_info(response)
    print(f"Move: {action}, obs:{observation}, reward:{reward}, done: {done}")
        

observation = response.json().get('observation')  
print("-------------")
print(f"Observation berore reset {observation}")


response = requests.post(reset,headers=headers)

observation = response.json().get('observation')  
print(f"Observation After reset: {observation}")


In [None]:
import matplotlib.pyplot as plt
import numpy as np
import cv2
import os

# Create a directory to store the frames
os.makedirs('frames', exist_ok=True)

# Create a 10x10 grid
grid_size = 10

# Define a sequence of positions (2D coordinates)
positions = obsers

# Generate frames with an index counter
for i, (x, y) in enumerate(positions):
    fig, ax = plt.subplots(figsize=(6, 6))
    ax.set_xlim(-0.5, grid_size - 0.5)
    ax.set_ylim(-0.5, grid_size - 0.5)
    ax.set_xticks(np.arange(-0.5, grid_size, 1))
    ax.set_yticks(np.arange(-0.5, grid_size, 1))
    ax.grid(True)
    
    # Plot the agent's position
    ax.plot(x, y, 'ro', markersize=8)  # 'ro' for red circle marker

    # Add the frame number as a text annotation in the plot
    ax.text(0.05, 0.95, f'Frame: {i}', transform=ax.transAxes, fontsize=12, verticalalignment='top', color='blue')

    # Save each frame as a PNG file
    plt.savefig(f'frames/frame_{i:03d}.png')
    plt.close()

# Parameters for saving the video
frame_rate = 2  # frames per second (fps)
frame_size = (600, 600)  # Size of the output video

# Initialize the video writer with the appropriate codec
out = cv2.VideoWriter('output_video.mp4', cv2.VideoWriter_fourcc(*'mp4v'), frame_rate, frame_size)

# Loop over the saved frames and write them to the video
for i in range(len(positions)):
    frame = cv2.imread(f'frames/frame_{i:03d}.png')
    frame_resized = cv2.resize(frame, frame_size)  # Resize to match the video size
    out.write(frame_resized)

# Release the video writer
out.release()

print("Video saved successfully!")


In [1]:
"http://18.185.60.20:5005/step/" .split("/")[3]

'step'