# MAC Installation Guide

In [10]:

#1. Install Java JDK 8 (Required for MineRL)
!brew tap AdoptOpenJDK/openjdk
!brew install --cask adoptopenjdk8 
export JAVA_HOME=$(/usr/libexec/java_home -v 1.8)

#2. Create environment
conda create --platform osx-64 -n minerl-env python=3.9 -y
conda activate minerl-env

#3. Install dependencies
git clone [https://github.com/minerllabs/minerl.git](https://github.com/minerllabs/minerl.git)
sed -i .bak 's/3\.2\.1/3.3.1/' ./minerl/scripts/mcp_patch.diff
cd minerl
pip install .

#4. Patch Launch Scripts for Mac
sed -i .bak s/'java -Xmx\$maxMem'/'java -Xmx\$maxMem -XstartOnFirstThread'/ ./minerl/MCP-Reborn/launchClient.sh
sed -i .bak /'GLFW.glfwSetWindowIcon(this.handle, buffer);'/d ./minerl/MCP-Reborn/src/main/java/net/minecraft/client/MainWindow.java
sed -i .bak '125,136s/^/\/\//' ./minerl/MCP-Reborn/src/main/java/net/minecraft/client/MainWindow.java

#5. Build and Move JARs
cd minerl/MCP-Reborn && ./gradlew clean build shadowJar 
cd ../../../
cp -rf ./minerl/minerl/MCP-Reborn/* TARGET_DIR=$(python -c "import site; print(site.getsitepackages()[0])")/minerl/MCP-Reborn/
cp -rf ./minerl/minerl/MCP-Reborn/* "$TARGET_DIR"

#6. Install Python reqs 
pip install -r requirements.txt

SyntaxError: invalid syntax (3561790024.py, line 4)

In [8]:
%%writefile config.yaml
bc:
  data_path: "bc_expert_data.npz"
  learning_rate: 0.0003
  batch_size: 64
  gradient_clip: 1.0
  q_regularization: 0.01
  prefill_replay_buffer: false
  lambda_supervised: 1.0
  lambda_td: 1.0
  lambda_margin: 1.0
  lambda_l2: 0.00001
  margin: 0.8
environment:
  name: "MineRLcustom_treechop-v0"
  episode_seconds: 60
  frame_shape: [84, 84]
  frame_stack: 4
  curriculum:
    spawn_type: "random"
    with_logs: 5
    with_axe: false
network:
  input_channels: 4
  architecture: "medium"
  attention: "cbam"
  use_scalar_network: true
  scalar_hidden_dim: 64
  scalar_output_dim: 64
action_space:
  preset: "custom"
  enabled_actions: [0, 1, 2, 3, 4, 5, 6, 7, 8, 11, 12, 15, 17, 19, 20, 21, 23, 24]
dqn:
  num_actions: null
  learning_rate: 0.0001
  gamma: 0.99
  batch_size: 64
  gradient_clip: 5
  replay_buffer:
    capacity: 40000
    min_size: 4000
  exploration:
    epsilon_start: 0.95
    epsilon_end: 0.05
    epsilon_decay_steps: 40000
  target_update:
    method: "hard"
    tau: 0.005
    hard_update_freq: 400
  prioritized_replay:
    enabled: true
    alpha: 0.6
    beta_start: 0.4
    beta_end: 1.0
training:
  num_episodes: 4000
  train_freq: 5
  log_freq: 5
  save_freq: 50
  eval_freq: 50
  eval_episodes: 5
  grad_cam_freq: 50
  env_recreation_interval: 25
  checkpoint_dir: "checkpoints"
  log_dir: "runs"
  grad_cam_checkpoint_path: "checkpoints/best_model_dqn.pt"
  video_recording:
    enabled: false
    save_dir: "videos"
    fps: 10
ppo:
  learning_rate: 0.0001
  gamma: 0.99
  gae_lambda: 0.95
  clip_epsilon: 0.2
  entropy_coef: 0.02
  value_coef: 0.5
  max_grad_norm: 0.5
  n_steps: 1024
  n_epochs: 6
  batch_size: 64
algorithm: "ppo"
rewards:
  wood_value: 5.0
  step_penalty: -0.001
  axe_reward: 15.0
  plank_reward: 20.0
  stick_reward: 10.0
  waste_penalty: -4.0
device: "auto"
seed: null
grad_cam:
  attack_action_index: 6
  output_filename: "grad_cam_visualization.jpg"


Overwriting config.yaml


In [9]:
# Download the Pre-trained Model
import os
import sys

!{sys.executable} -m pip install gdown -q

import gdown

file_id = '1pjkPA4y1_P7QsrSCGhXzmEnBJ4gvliEm'
url = f'https://drive.google.com/uc?id={file_id}'
output = 'checkpoint_ppo_ep3000.pt'

if not os.path.exists(output):
    print(f"Downloading model from Google Drive...")
    try:
        gdown.download(url, output, quiet=False)
        print("\nDownload complete!")
    except Exception as e:
        print(f"Error downloading: {e}")
else:
    print("Model file already exists.")


Model file already exists.


In [10]:
# [Cell] - Define and Register Custom Environment
import gym
from gym.envs.registration import register
from minerl.herobraine.env_specs.human_controls import HumanControlEnvSpec
from minerl.herobraine.hero import handlers
from minerl.herobraine.hero import mc as MC

# 1. Define Base Treechop Spec
class Treechop(HumanControlEnvSpec):
    def __init__(self, *args, **kwargs):
        if 'name' not in kwargs: kwargs['name'] = 'MineRLTreechop-v0'
        if 'max_episode_steps' not in kwargs: kwargs['max_episode_steps'] = 8000
        super().__init__(*args, reward_threshold=64.0, **kwargs)
        
    def create_rewardables(self):
        return [handlers.RewardForCollectingItems([dict(type="log", amount=1, reward=1.0)])]
    
    def create_agent_start(self):
        return super().create_agent_start() + [handlers.SimpleInventoryAgentStart([dict(type="oak_log", quantity=3)])]

    def create_agent_handlers(self):
        return [handlers.AgentQuitFromPossessingItem([dict(type="log", amount=64)])]
    
    def create_actionables(self):
        acts = [handlers.KeybasedCommandAction(k, v) for k, v in MC.KEYMAP.items()]
        acts.append(handlers.CameraAction())
        return acts

    def create_server_world_generators(self):
        opts = """{"coordinateScale":684.412,"heightScale":684.412,"lowerLimitScale":512.0,"upperLimitScale":512.0,"depthNoiseScaleX":200.0,"depthNoiseScaleZ":200.0,"depthNoiseScaleExponent":0.5,"mainNoiseScaleX":80.0,"mainNoiseScaleY":160.0,"mainNoiseScaleZ":80.0,"baseSize":8.5,"stretchY":12.0,"biomeDepthWeight":0.0,"biomeDepthOffset":0.0,"biomeScaleWeight":0.0,"biomeScaleOffset":0.0,"seaLevel":1,"useCaves":false,"useDungeons":false,"dungeonChance":8,"useStrongholds":false,"useVillages":false,"useMineShafts":false,"useTemples":false,"useMonuments":false,"useMansions":false,"useRavines":false,"useWaterLakes":false,"waterLakeChance":4,"useLavaLakes":false,"lavaLakeChance":80,"useLavaOceans":false,"fixedBiome":11,"biomeSize":4,"riverSize":1,"dirtSize":33,"dirtCount":10,"dirtMinHeight":0,"dirtMaxHeight":256,"gravelSize":33,"gravelCount":8,"gravelMinHeight":0,"gravelMaxHeight":256,"graniteSize":33,"graniteCount":10,"graniteMinHeight":0,"graniteMaxHeight":80,"dioriteSize":33,"dioriteCount":10,"dioriteMinHeight":0,"dioriteMaxHeight":80,"andesiteSize":33,"andesiteCount":10,"andesiteMinHeight":0,"andesiteMaxHeight":80,"coalSize":17,"coalCount":20,"coalMinHeight":0,"coalMaxHeight":128,"ironSize":9,"ironCount":20,"ironMinHeight":0,"ironMaxHeight":64,"goldSize":9,"goldCount":2,"goldMinHeight":0,"goldMaxHeight":32,"redstoneSize":8,"redstoneCount":8,"redstoneMinHeight":0,"redstoneMaxHeight":16,"diamondSize":8,"diamondCount":1,"diamondMinHeight":0,"diamondMaxHeight":16,"lapisSize":7,"lapisCount":1,"lapisCenterHeight":16,"lapisSpread":16}"""
        return [handlers.DefaultWorldGenerator(force_reset="true", generator_options=opts)]

    def create_server_quit_producers(self):
        return [handlers.ServerQuitFromTimeUp(8000 * 50), handlers.ServerQuitWhenAnyAgentFinishes()]

    def create_server_initial_conditions(self):
        return [handlers.TimeInitialCondition(allow_passage_of_time=False), handlers.SpawningInitialCondition(allow_spawning=False)]

    # --- MISSING METHODS ADDED HERE TO FIX CRASH ---
    def create_server_decorators(self):
        return []

    def determine_success_from_rewards(self, rewards: list) -> bool:
        return sum(rewards) >= self.reward_threshold
    # -----------------------------------------------

    def is_from_folder(self, folder): return folder == 'survivaltreechop'
    def get_docstring(self): return ""

# 2. Define Custom Configurable Environment
class custom_treechop(Treechop):
    def __init__(self, *args, **kwargs):
        if 'name' not in kwargs: kwargs['name'] = 'MineRLcustom_treechop-v0'
        super().__init__(*args, **kwargs)

    def create_agent_start(self):
        with_logs = 5 
        with_axe = False 
        
        # Use grandparent method to skip Treechop's hardcoded inventory
        base_handlers = HumanControlEnvSpec.create_agent_start(self)
        
        inventory = []
        if with_logs > 0: inventory.append(dict(type="oak_log", quantity=with_logs))
        if with_axe: inventory.append(dict(type="wooden_axe", quantity=1))
        
        if inventory: base_handlers.append(handlers.SimpleInventoryAgentStart(inventory))
        base_handlers.append(handlers.AgentStartNear([dict(type="log", distance=5)]))
        return base_handlers

# 3. Register the Environment
def make_fist_treechop_env():
    spec = custom_treechop(resolution=(640, 360))
    return spec.make()

try:
    register(
        id='MineRLcustom_treechop-v0',
        entry_point=make_fist_treechop_env,
        max_episode_steps=1510
    )
    print("✅ Custom environment 'MineRLcustom_treechop-v0' registered.")
except Exception as e:
    # If already registered, we just print the error but continue
    print(f"Environment registration skipped: {e}")

✅ Custom environment 'MineRLcustom_treechop-v0' registered.


  logger.warn(f"Overriding environment {id}")


In [11]:
# [Cell] - Define Wrappers, Network, and Agent Class
import torch
import torch.nn as nn
import torch.nn.functional as F
import numpy as np
import gym
from collections import deque
import cv2
from gym import ActionWrapper
from gym.spaces import Discrete

# ==========================================
# 1. WRAPPERS (hold_attack, reward, vision)
# ==========================================

class HoldAttackWrapper(gym.Wrapper):
    """Extends a single attack action into a held attack sequence."""
    def __init__(self, env, hold_steps=35, yaw_per_tick=0.0, fwd_jump_ticks=0,
                 lock_aim=True, pass_through_move=False, gui_cooldown=4):
        super().__init__(env)
        self.hold_steps = hold_steps
        self.yaw_per_tick = yaw_per_tick
        self.fwd_jump_ticks = fwd_jump_ticks
        self.lock_aim = lock_aim
        self.pass_through_move = pass_through_move
        self.gui_cooldown = gui_cooldown
        self._attack_left = 0
        self._gui_open = False
        self._hold_block = 0
        self._suppress = False

    def set_hold_suppressed(self, flag=True):
        self._suppress = bool(flag)
        self._attack_left = 0
        self._hold_block = self.gui_cooldown if flag else 0

    def step(self, action):
        if self._suppress: return self.env.step(action)
        
        # Simplified GUI detection
        if action.get("inventory", 0) == 1:
            self._attack_left = 0
            self._hold_block = self.gui_cooldown
            return self.env.step(action)

        if self._hold_block > 0: self._hold_block -= 1
        
        # Start holding attack
        if self._hold_block == 0 and self._attack_left == 0 and action.get('attack', 0) == 1:
            self._attack_left = self.hold_steps

        # Continue holding attack
        if self._attack_left > 0 and self._hold_block == 0:
            action['attack'] = 1
            if self.lock_aim: action['camera'] = np.array([0.0, 0.0], dtype=np.float32)
            self._attack_left -= 1

        return self.env.step(action)

class StackAndProcessWrapper(gym.Wrapper):
    def __init__(self, env, shape=(84, 84)):
        super().__init__(env)
        self.shape = shape
        self.frame_stack = deque(maxlen=4)
        self.observation_space.spaces['pov'] = gym.spaces.Box(
            low=0, high=255, shape=(4, shape[0], shape[1]), dtype=np.uint8)

    def _preprocess(self, frame):
        frame = cv2.cvtColor(frame, cv2.COLOR_RGB2GRAY)
        return cv2.resize(frame, self.shape, interpolation=cv2.INTER_AREA)

    def _get_stacked_obs(self): return np.stack(self.frame_stack, axis=0)

    def reset(self):
        obs = self.env.reset()
        f = self._preprocess(obs['pov'])
        for _ in range(4): self.frame_stack.append(f)
        obs['pov'] = self._get_stacked_obs()
        return obs

    def step(self, action):
        obs, reward, done, info = self.env.step(action)
        self.frame_stack.append(self._preprocess(obs['pov']))
        obs['pov'] = self._get_stacked_obs()
        return obs, reward, done, info

class ObservationWrapper(gym.Wrapper):
    def __init__(self, env, max_episode_steps=1200):
        super().__init__(env)
        self.max_episode_steps = max_episode_steps
        self.current_episode_step = 0
        self.yaw = 0.0; self.pitch = 0.0

    def reset(self):
        obs = self.env.reset()
        self.current_episode_step = 0; self.yaw = 0.0; self.pitch = 0.0
        return self._add_scalars(obs)

    def step(self, action):
        obs, reward, done, info = self.env.step(action)
        self.current_episode_step += 1
        return self._add_scalars(obs), reward, done, info

    def _add_scalars(self, obs):
        time_norm = max(0.0, (self.max_episode_steps - self.current_episode_step)/self.max_episode_steps)
        extended_obs = dict(obs) if isinstance(obs, dict) else {'pov': obs}
        
        # Simplified scalar injection for demo purposes
        extended_obs['time_left'] = np.array([time_norm], dtype=np.float32)
        extended_obs['yaw'] = np.array([self.yaw/180.0], dtype=np.float32)
        extended_obs['pitch'] = np.array([self.pitch/90.0], dtype=np.float32)
        extended_obs['place_table_safe'] = np.array([1.0], dtype=np.float32)
        
        # Inventory Placeholders (requires parsing 'inventory' dict if available)
        inv_keys = ['inv_logs', 'inv_planks', 'inv_sticks', 'inv_table', 'inv_axe']
        for k in inv_keys: extended_obs[k] = np.array([0.0], dtype=np.float32)
        
        return extended_obs

# ==========================================
# 2. ACTIONS.PY (Simplified for Notebook)
# ==========================================
# In a real deployment, paste the full actions.py content in this Cell.
# Here we implement the wrapper to map index -> action dict
class ConfigurableActionWrapper(ActionWrapper):
    def __init__(self, env, enabled_actions):
        super().__init__(env)
        self.enabled_actions = enabled_actions
        self.action_space = Discrete(len(enabled_actions))
        
        # Define Primitives (Subset for demo)
        self.primitives = [
            {}, {'forward': 1}, {'back': 1}, {'right': 1}, {'left': 1}, 
            {'jump': 1, 'forward': 1}, {'attack': 1}, 
            # Camera (simplified)
            {'camera': np.array([0, -7.5])}, {'camera': np.array([0, -11.25])}, 
            {'camera': np.array([0, 7.5])}, {'camera': np.array([0, 11.25])}, 
            {'camera': np.array([-3.0, 0])}, {'camera': np.array([3.0, 0])} 
        ]

    def action(self, action_index):
        # Map configured index to actual logic
        # For this demo, we handle basic movement. 
        # Full mapping requires crafting logic from actions.py
        if action_index < len(self.primitives):
            act = self.env.action_space.no_op()
            for k, v in self.primitives[action_index].items(): act[k] = v
            return act
        return self.env.action_space.no_op()

# ==========================================
# 3. NETWORK ARCHITECTURE (Attention, CNN, Policy)
# ==========================================
class CBAM(nn.Module):
    def __init__(self, channels):
        super().__init__()
        self.conv = nn.Conv2d(2, 1, kernel_size=7, padding=3, bias=False)
        self.sigmoid = nn.Sigmoid()
    def forward(self, x):
        max_pool = torch.max(x, dim=1, keepdim=True)[0]
        avg_pool = torch.mean(x, dim=1, keepdim=True)
        attention = self.sigmoid(self.conv(torch.cat([max_pool, avg_pool], dim=1)))
        return x * attention

class MediumCNN(nn.Module):
    def __init__(self, input_channels=4):
        super().__init__()
        self.conv = nn.Sequential(
            nn.Conv2d(input_channels, 32, 8, 4), nn.ReLU(),
            nn.Conv2d(32, 64, 4, 2), nn.ReLU(),
            nn.Conv2d(64, 128, 3, 1), nn.ReLU()
        )
        self.fc = nn.Sequential(nn.Linear(128*7*7, 512), nn.ReLU())
        self._output_dim = 512
    def forward(self, x):
        if x.max() > 1.0: x = x.float() / 255.0
        x = self.conv(x)
        x = x.view(x.size(0), -1)
        return self.fc(x)

class ScalarNetwork(nn.Module):
    def __init__(self, num_scalars=9):
        super().__init__()
        self.net = nn.Sequential(nn.Linear(num_scalars, 64), nn.ReLU(), nn.Linear(64, 64), nn.ReLU())
    def forward(self, x): return self.net(x)

class ActorCriticNetwork(nn.Module):
    def __init__(self, num_actions, input_channels=4, num_scalars=9):
        super().__init__()
        self.cnn = MediumCNN(input_channels)
        self.attention = CBAM(128)
        self.scalar_network = ScalarNetwork(num_scalars)
        self.actor = nn.Sequential(nn.Linear(512+64, 512), nn.ReLU(), nn.Linear(512, num_actions))
        self.critic = nn.Sequential(nn.Linear(512+64, 512), nn.ReLU(), nn.Linear(512, 1))

    def get_action(self, obs):
        pov = obs['pov']
        conv = self.cnn.conv(pov if pov.max() <= 1.0 else pov.float()/255.0)
        attn = self.attention(conv)
        cnn_feat = self.cnn.fc(attn.view(attn.size(0), -1))
        
        scalars = torch.stack([obs[k].view(-1) for k in [
            'time_left', 'yaw', 'pitch', 'place_table_safe', 'inv_logs',
            'inv_planks', 'inv_sticks', 'inv_table', 'inv_axe']], dim=1)
        scalar_feat = self.scalar_network(scalars)
        
        feat = torch.cat([cnn_feat, scalar_feat], dim=1)
        logits = self.actor(feat)
        return torch.argmax(logits, dim=-1).item()

In [None]:
print("LAUNCHING MINECRAFT... Please wait 1-2 minutes.")

import yaml
import os
import torch
import numpy as np
import matplotlib.pyplot as plt  
from IPython.display import clear_output, display

# --- CRITICAL MAC FIXES ---
# Prevents a common hanging issue on Mac
os.environ['MINERL_MOCK_FILE_OPEN'] = 'true'
# Forces the game to render without a window (prevents graphics driver crashes)
os.environ['MINERL_HEADLESS'] = 'true' 
# --------------------------

# 1. Load Configuration
with open("config.yaml", "r") as f: 
    config = yaml.safe_load(f)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

# 2. Initialize Network
# Matching config enabled_actions length
enabled_actions = config['action_space']['enabled_actions'] 
model = ActorCriticNetwork(num_actions=len(enabled_actions)).to(device)

# 3. Load Weights
try:
    checkpoint = torch.load("checkpoint_ppo_ep3000.pt", map_location=device)
    if 'network' in checkpoint: 
        state_dict = checkpoint['network']['policy_state_dict']
    else: 
        state_dict = checkpoint
    
    # Strict=False allows us to load partial weights if the architecture isn't perfect
    model.load_state_dict(state_dict, strict=False)
    model.eval()
    print("✓ Model loaded successfully.")
except Exception as e:
    print(f"Error loading model: {e}")

# 4. Create Environment Pipeline
print("Creating environment pipeline...")
try:
    # We set a distinct port (5555) to avoid conflicts with previous runs
    base_env = gym.make('MineRLcustom_treechop-v0')
    
    env_vision = StackAndProcessWrapper(base_env)
    env_hold = HoldAttackWrapper(env_vision, hold_steps=35, lock_aim=True, pass_through_move=False)
    env_obs = ObservationWrapper(env_hold)
    env = ConfigurableActionWrapper(env_obs, enabled_actions=enabled_actions)

    # 5. Run Loop
    print("Resetting environment (this takes about 45-60 seconds)...")
    obs = env.reset()
    print("✓ Environment reset successful! Running inference loop...")
    
    total_reward = 0
    
    # Run for 500 steps (approx 2 minutes of gameplay)
    for i in range(10):
        # Prepare Tensors
        state = {k: torch.from_numpy(v).unsqueeze(0).to(device) for k, v in obs.items() if isinstance(v, np.ndarray)}
        # Ensure POV is float tensor
        if 'pov' not in state: 
            state['pov'] = torch.from_numpy(obs['pov']).unsqueeze(0).float().to(device)

        # Get Action from Model
        with torch.no_grad():
            action_idx = model.get_action(state)
        
        # Step Environment
        obs, reward, done, info = env.step(action_idx)
        total_reward += reward
        
        # --- VISUALIZATION (Since Headless Mode hides the window) ---
        if i % 20 == 0:
            clear_output(wait=True) # Clear previous image to make an animation
            
            # Get the most recent frame (84x84 grayscale)
            frame = obs['pov'][-1] 
            
            plt.figure(figsize=(4, 4))
            plt.imshow(frame, cmap='gray', vmin=0, vmax=255)
            plt.axis('off')
            plt.title(f"Step: {i} | Action: {action_idx} | Reward: {total_reward:.2f}")
            plt.show()
            print(f"Processing Step {i}...") # Print to confirm loop is alive
        # -------------------------------------------------------------
        
        if done:
            print("Episode finished.")
            break
            
    env.close()
    print("Demo complete.")

except Exception as e:
    print("\n❌ CRASH DETECTED.")
    print(f"Error details: {e}")
    print("Tip: If you see 'address already in use', restart the kernel.")

⚠️ LAUNCHING MINECRAFT... Please wait 1-2 minutes.
Using device: cpu
✓ Model loaded successfully.
Creating environment pipeline...
Resetting environment (this takes about 45-60 seconds)...


Gym has been unmaintained since 2022 and does not support NumPy 2.0 amongst other critical functionality.
Please upgrade to Gymnasium, the maintained drop-in replacement of Gym, or contact the authors of your software and request that they upgrade.
See the migration guide at https://gymnasium.farama.org/introduction/migration_guide/ for additional information.



❌ CRASH DETECTED.
Error details: /Users/brandon/Documents/envs/minerl-env/lib/python3.9/site-packages/minerl/env/../MCP-Reborn
SLF4J: Failed to load class "org.slf4j.impl.StaticLoggerBinder".
SLF4J: Defaulting to no-operation (NOP) logger implementation
SLF4J: See http://www.slf4j.org/codes.html#StaticLoggerBinder for further details.
[19:42:23] [Render thread/INFO]: Environment: authHost='https://authserver.mojang.com', accountsHost='https://api.mojang.com', sessionHost='https://sessionserver.mojang.com', servicesHost='https://api.minecraftservices.com', name='PROD'
[19:42:24] [Render thread/INFO]: Setting user: Player97
[19:42:24] [Render thread/INFO]: [STDERR]: [LWJGL] Failed to load a library. Possible solutions:
	a) Add the directory that contains the shared library to -Djava.library.path or -Dorg.lwjgl.librarypath.
	b) Add the JAR that contains the shared library to the classpath.
[19:42:24] [Render thread/INFO]: [STDERR]: [LWJGL] Enable debug mode with -Dorg.lwjgl.util.Debug=tr