In [None]:
import gym_super_mario_bros
from nes_py.wrappers import JoypadSpace
from gym_super_mario_bros.actions import SIMPLE_MOVEMENT
import numpy as np
import matplotlib.pyplot as plt
import random

# Reset method for JoypadSpace
JoypadSpace.reset = lambda self, **kwargs: self.env.reset(**kwargs)

# Create the environment
env = gym_super_mario_bros.make('SuperMarioBros-v0', apply_api_compatibility=True, render_mode="human")
env = JoypadSpace(env, SIMPLE_MOVEMENT)

# Confirm the action space and observation space
print("Action space:", env.action_space)
print("Observation space shape:", env.observation_space.shape)
print("Available actions:", SIMPLE_MOVEMENT)

In [None]:
import gym_super_mario_bros
from nes_py.wrappers import JoypadSpace
from gym_super_mario_bros.actions import SIMPLE_MOVEMENT

# Create the environment
env = gym_super_mario_bros.make('SuperMarioBros-v0', apply_api_compatibility=True, render_mode="human")
env = JoypadSpace(env, SIMPLE_MOVEMENT)

# Reset the environment to get the initial state
state = env.reset()

# Test action
action = 0  # No action, just to test

# Take a step in the environment
step_result = env.step(action)

# Print the result to see how many values are returned
print(f"Step result: {step_result}")
print(f"Number of returned values: {len(step_result)}")


In [None]:
class State:
    def __init__(self, name):
        self.name = name

    def on_enter(self, entity):
        pass

    def on_exit(self, entity):
        pass

    def update(self, entity):
        pass

class Transition:
    def __init__(self, to_state, condition):
        self.to_state = to_state
        self.condition = condition

    def is_triggered(self, entity):
        return self.condition(entity)

class FSM:
    def __init__(self, initial_state):
        self.current_state = initial_state
        self.transitions = {}

    def add_transition(self, from_state, to_state, condition):
        if from_state not in self.transitions:
            self.transitions[from_state] = []
        self.transitions[from_state].append(Transition(to_state, condition))

    def update(self, entity):
        for transition in self.transitions.get(self.current_state, []):
            if transition.is_triggered(entity):
                self.current_state.on_exit(entity)
                self.current_state = transition.to_state
                self.current_state.on_enter(entity)
                break
        self.current_state.update(entity)


In [None]:
class Standing(State):
    def __init__(self):
        super().__init__("Standing")

    def on_enter(self, entity):
        print("Mario is now standing")

    def update(self, entity):
        if entity.is_running:
            entity.fsm.add_transition(entity.fsm.current_state, Running(), lambda entity: entity.is_running)
        if entity.is_jumping:
            entity.fsm.add_transition(entity.fsm.current_state, Jumping(), lambda entity: entity.is_jumping)

class Running(State):
    def __init__(self):
        super().__init__("Running")

    def on_enter(self, entity):
        print("Mario is now running")

    def update(self, entity):
        if not entity.is_running:
            entity.fsm.add_transition(entity.fsm.current_state, Standing(), lambda entity: not entity.is_running)
        if entity.is_jumping:
            entity.fsm.add_transition(entity.fsm.current_state, Jumping(), lambda entity: entity.is_jumping)

class Jumping(State):
    def __init__(self):
        super().__init__("Jumping")

    def on_enter(self, entity):
        print("Mario is now jumping")

    def update(self, entity):
        if not entity.is_jumping:
            entity.fsm.add_transition(entity.fsm.current_state, Standing(), lambda entity: not entity.is_jumping)


In [None]:
class Mario:
    def __init__(self):
        self.is_running = False
        self.is_jumping = False
        self.fsm = FSM(Standing())
        self.action = 0  # Default to no action

    def set_running(self, running):
        self.is_running = running

    def set_jumping(self, jumping):
        self.is_jumping = jumping

    def update(self):
        self.fsm.update(self)


In [None]:
# Create Mario instance
mario = Mario()

# Create the environment
env = gym_super_mario_bros.make('SuperMarioBros-v0', apply_api_compatibility=True, render_mode="human")
env = JoypadSpace(env, SIMPLE_MOVEMENT)

# Reset environment to start state
state = env.reset()

# Function to map FSM state to environment action
def get_action_from_state(mario):
    if isinstance(mario.fsm.current_state, Standing):
        return 0  # No action
    elif isinstance(mario.fsm.current_state, Running):
        return 1  # Move right
    elif isinstance(mario.fsm.current_state, Jumping):
        return 5  # Jump
    return 0  # Default to no action

# Simulate a game loop
for _ in range(1000000):
    env.render()
    # Example logic to set running and jumping
    mario.set_running(random.choice([True, False]))
    mario.set_jumping(random.choice([True, False]))

    # Update Mario FSM
    mario.update()

    # Get action from FSM state
    action = get_action_from_state(mario)

    # Step the environment with the chosen action
    result = env.step(action)
    state, reward, done, truncated, info = result if len(result) == 5 else result[:4]

    if done or truncated:
        break

# Close the environment
env.close()


In [1]:
import gym_super_mario_bros
from nes_py.wrappers import JoypadSpace
from gym_super_mario_bros.actions import SIMPLE_MOVEMENT
import numpy as np
import matplotlib.pyplot as plt
import random
import cv2

# Define FSM classes
class State:
    def __init__(self, name):
        self.name = name

    def on_enter(self, entity):
        pass

    def on_exit(self, entity):
        pass

    def update(self, entity):
        pass

class Transition:
    def __init__(self, to_state, condition):
        self.to_state = to_state
        self.condition = condition

    def is_triggered(self, entity):
        return self.condition(entity)

class FSM:
    def __init__(self, initial_state):
        self.current_state = initial_state
        self.transitions = {}

    def add_transition(self, from_state, to_state, condition):
        if from_state not in self.transitions:
            self.transitions[from_state] = []
        self.transitions[from_state].append(Transition(to_state, condition))

    def update(self, entity):
        for transition in self.transitions.get(self.current_state, []):
            if transition.is_triggered(entity):
                self.current_state.on_exit(entity)
                self.current_state = transition.to_state
                self.current_state.on_enter(entity)
                break
        self.current_state.update(entity)

# Implement specific states for Mario
class Standing(State):
    def __init__(self):
        super().__init__("Standing")

    def on_enter(self, entity):
        print("Mario is now standing")

    def update(self, entity):
        if entity.is_running:
            entity.fsm.add_transition(entity.fsm.current_state, Running(), lambda entity: entity.is_running)
        if entity.is_jumping:
            entity.fsm.add_transition(entity.fsm.current_state, Jumping(), lambda entity: entity.is_jumping)

class Running(State):
    def __init__(self):
        super().__init__("Running")

    def on_enter(self, entity):
        print("Mario is now running")

    def update(self, entity):
        if not entity.is_running:
            entity.fsm.add_transition(entity.fsm.current_state, Standing(), lambda entity: not entity.is_running)
        if entity.is_jumping:
            entity.fsm.add_transition(entity.fsm.current_state, Jumping(), lambda entity: entity.is_jumping)

class Jumping(State):
    def __init__(self):
        super().__init__("Jumping")

    def on_enter(self, entity):
        print("Mario is now jumping")

    def update(self, entity):
        if not entity.is_jumping:
            entity.fsm.add_transition(entity.fsm.current_state, Standing(), lambda entity: not entity.is_jumping)

# Create Mario entity and integrate FSM
class Mario:
    def __init__(self):
        self.is_running = False
        self.is_jumping = False
        self.fsm = FSM(Standing())
        self.action = 0  # Default to no action

    def set_running(self, running):
        self.is_running = running

    def set_jumping(self, jumping):
        self.is_jumping = jumping

    def update(self):
        self.fsm.update(self)

# Initialize logging
entropy_losses = []
train_losses = []

# Function to log entropy loss and train loss
def log_metrics(entropy_loss, train_loss):
    entropy_losses.append(entropy_loss)
    train_losses.append(train_loss)

# Function to plot the metrics
def plot_metrics():
    plt.figure(figsize=(12, 6))
    plt.subplot(1, 2, 1)
    plt.plot(entropy_losses, label='Entropy Loss')
    plt.xlabel('Episodes')
    plt.ylabel('Entropy Loss')
    plt.legend()
    plt.subplot(1, 2, 2)
    plt.plot(train_losses, label='Train Loss')
    plt.xlabel('Episodes')
    plt.ylabel('Train Loss')
    plt.legend()
    plt.show()

# Create Mario instance
mario = Mario()

# Create the environment
env = gym_super_mario_bros.make('SuperMarioBros-v0', apply_api_compatibility=True, render_mode="rgb_array")
env = JoypadSpace(env, SIMPLE_MOVEMENT)

# Reset environment to start state
state = env.reset()

# Get frame shape for video writer by rendering a single frame in RGB mode
frame = env.render()
frame_shape = (frame.shape[1], frame.shape[0])

# Save gameplay video
video_writer = cv2.VideoWriter('mario_gameplay.avi', cv2.VideoWriter_fourcc(*'XVID'), 30, frame_shape)

# Function to map FSM state to environment action
def get_action_from_state(mario):
    if isinstance(mario.fsm.current_state, Standing):
        return 0  # No action
    elif isinstance(mario.fsm.current_state, Running):
        return 1  # Move right
    elif isinstance(mario.fsm.current_state, Jumping):
        return 5  # Jump
    return 0  # Default to no action

# Simulate a game loop
for episode in range(1000):
    total_entropy_loss = 0
    total_train_loss = 0
    done = False
    truncated = False
    state = env.reset()
    
    for step in range(500):
        if done or truncated:
            break
        
        # Render the environment for real-time display
        frame = env.render()
        
        # Capture frame for video saving
        video_writer.write(cv2.cvtColor(frame, cv2.COLOR_RGB2BGR))
        
        # Example logic to set running and jumping
        mario.set_running(random.choice([True, False]))
        mario.set_jumping(random.choice([True, False]))

        # Update Mario FSM
        mario.update()

        # Get action from FSM state
        action = get_action_from_state(mario)

        # Step the environment with the chosen action
        result = env.step(action)
        state, reward, done, truncated, info = result if len(result) == 5 else result[:4]

        # Dummy loss values for demonstration (replace with actual computation)
        entropy_loss = random.uniform(0, 1)
        train_loss = random.uniform(0, 1)

        # Log metrics
        total_entropy_loss += entropy_loss
        total_train_loss += train_loss

    # Average the losses over the episode
    log_metrics(total_entropy_loss / (step + 1), total_train_loss / (step + 1))

    if (episode + 1) % 100 == 0:
        print(f"Episode {episode + 1}: Average Entropy Loss = {total_entropy_loss / (step + 1):.4f}, Average Train Loss = {total_train_loss / (step + 1):.4f}")

# Close the environment and video writer
env.close()
video_writer.release()

# Plot the metrics
plot_metrics()


  logger.warn(
  logger.warn(
  logger.warn(
  if not isinstance(terminated, (bool, np.bool8)):


Mario is now running
Mario is now jumping
Mario is now standing
Mario is now running
Mario is now standing
Mario is now running
Mario is now standing
Mario is now running
Mario is now jumping
Mario is now standing
Mario is now running
Mario is now standing
Mario is now jumping
Mario is now standing
Mario is now running
Mario is now jumping
Mario is now standing
Mario is now running
Mario is now jumping
Mario is now standing
Mario is now running
Mario is now standing
Mario is now jumping
Mario is now standing
Mario is now running
Mario is now jumping
Mario is now standing
Mario is now running
Mario is now jumping
Mario is now standing
Mario is now running
Mario is now jumping
Mario is now standing
Mario is now running
Mario is now jumping
Mario is now standing
Mario is now running
Mario is now jumping
Mario is now standing
Mario is now jumping
Mario is now standing
Mario is now running
Mario is now standing
Mario is now jumping
Mario is now standing
Mario is now running
Mario is now sta

  return (self.ram[0x86] - self.ram[0x071c]) % 256


Mario is now standing
Mario is now jumping
Mario is now standing
Mario is now running
Mario is now jumping
Mario is now standing
Mario is now running
Mario is now jumping
Mario is now standing
Mario is now jumping
Mario is now standing
Mario is now running
Mario is now jumping
Mario is now standing
Mario is now running
Mario is now jumping
Mario is now standing
Mario is now running
Mario is now jumping
Mario is now standing
Mario is now running
Mario is now jumping
Mario is now standing
Mario is now running
Mario is now jumping
Mario is now standing
Mario is now jumping
Mario is now standing
Mario is now jumping
Mario is now standing
Mario is now running
Mario is now jumping
Mario is now standing
Mario is now running
Mario is now standing
Mario is now running
Mario is now standing
Mario is now running
Mario is now standing
Mario is now running
Mario is now standing
Mario is now running
Mario is now standing
Mario is now running
Mario is now standing
Mario is now jumping
Mario is now st