In [3]:
import torch
import torch.nn as nn
import pickle
from agent import MineRLAgent


class CustomIMPALA(nn.Module):
    """
    Custom IMPALA CNN for processing image inputs.
    """
    def __init__(self, input_shape, chans, width):
        super(CustomIMPALA, self).__init__()
        self.layers = nn.ModuleList()
        in_channels = input_shape[2]  # Number of input channels (e.g., 3 for RGB)

        for out_channels in chans:
            self.layers.append(
                nn.Sequential(
                    nn.Conv2d(in_channels, out_channels, kernel_size=3, stride=1, padding=1),
                    nn.ReLU(),
                    nn.MaxPool2d(2)
                )
            )
            in_channels = out_channels

        self.fc = nn.Linear(chans[-1] * (input_shape[0] // (2 ** len(chans))) * (input_shape[1] // (2 ** len(chans))), width)

    def forward(self, x):
        for layer in self.layers:
            x = layer(x)
        x = x.reshape(x.size(0), -1)  # Flatten for FC layer
        return self.fc(x)


class CustomTransformer(nn.Module):
    """
    Transformer block for recurrence and attention.
    """
    def __init__(self, input_dim, hidsize, num_heads, num_layers, memory_size):
        super(CustomTransformer, self).__init__()
        self.embedding = nn.Linear(input_dim, hidsize)
        self.positional_encoding = nn.Parameter(torch.randn(1, memory_size, hidsize))

        self.layers = nn.ModuleList([
            nn.TransformerEncoderLayer(
                d_model=hidsize,
                nhead=num_heads,
                dim_feedforward=4 * hidsize,
                activation="relu"
            ) for _ in range(num_layers)
        ])

    def forward(self, x):
        x = self.embedding(x)
        x = x + self.positional_encoding[:, :x.size(1), :]
        for layer in self.layers:
            x = layer(x)
        return x


class MinecraftPolicy(nn.Module):
    """
    Custom PyTorch model to replicate the `.model` file.
    """
    def __init__(self, args):
        super(MinecraftPolicy, self).__init__()
        img_shape = args["img_shape"]
        impala_chans = args["impala_chans"]
        impala_width = args["impala_width"]
        hidsize = args["hidsize"]
        attention_heads = args["attention_heads"]
        attention_memory_size = args["attention_memory_size"]
        n_recurrence_layers = args["n_recurrence_layers"]

        # IMPALA CNN for image processing
        self.impala = CustomIMPALA(img_shape, impala_chans, impala_width)

        # Transformer-based recurrent layers
        self.recurrence = CustomTransformer(
            input_dim=impala_width,
            hidsize=hidsize,
            num_heads=attention_heads,
            num_layers=n_recurrence_layers,
            memory_size=attention_memory_size
        )

        # Output head
        self.policy_head = nn.Linear(hidsize, args.get("output_dim", 10))  # Example output size; adjust as needed

    def forward(self, img, timesteps=None):
        x = self.impala(img)
        if timesteps is not None:
            x = x.unsqueeze(1).repeat(1, timesteps, 1)  # Add time dimension
        x = self.recurrence(x)
        return self.policy_head(x[:, -1, :])  # Use the last timestep


# Example instantiation:
def create_minecraft_policy(model_args):
    return MinecraftPolicy(model_args)


# Load model args from the .model file
with open(r'F:\16831_RL\Proj\MCRL-Proj\Model_Weights\2x_pre\2x.model', 'rb') as f:
    model_data = pickle.load(f)

# Extract arguments for the policy
model_args = model_data['model']['args']['net']['args']

# Create the PyTorch model
custom_model = create_minecraft_policy(model_args)

# Create a MineRLAgent and attach the custom model
agent = MineRLAgent(None, policy_kwargs=model_args)
agent.model = custom_model

# Load weights into the model
weights_path = r'F:\16831_RL\Proj\MCRL-Proj\Model_Weights\2x_pre\rl-from-house-2x.weights'
agent.load_weights(weights_path)

# Move the model to shared memory for multiprocessing
agent.model.share_memory()


MinecraftPolicy(
  (impala): CustomIMPALA(
    (layers): ModuleList(
      (0): Sequential(
        (0): Conv2d(3, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
        (1): ReLU()
        (2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
      )
      (1): Sequential(
        (0): Conv2d(16, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
        (1): ReLU()
        (2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
      )
      (2): Sequential(
        (0): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
        (1): ReLU()
        (2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
      )
    )
    (fc): Linear(in_features=8192, out_features=8, bias=True)
  )
  (recurrence): CustomTransformer(
    (embedding): Linear(in_features=8, out_features=2048, bias=True)
    (layers): ModuleList(
      (0-3): 4 x TransformerEncoderLayer(
        (self_attn): MultiheadAtte

In [4]:
# Test the model with dummy inputs
dummy_img = torch.randn(1, *model_args["img_shape"])  # Batch size of 1, shape [1, 128, 128, 3]
dummy_img = dummy_img.permute(0, 3, 1, 2)  # Change to [batch_size, channels, height, width]
output = custom_model(dummy_img)
print("Output shape:", output.shape)


Output shape: torch.Size([1, 10])


In [32]:
import numpy as np
import os
obs = np.load(os.path.join(r"F:\16831_RL\Proj\MC_RL\Model_Weights\pre_log\session_11_11_24_21-55\obs_rew", "obs.npy"), allow_pickle=True)
print(obs.shape)
print(obs[0]['pov'].shape)


action = agent.get_action(obs[0])
print("Action:", action)
action = agent.get_action(obs[1])
print("Action:", action)
action = agent.get_action(obs[2])
print("Action:", action)


(5,)
(360, 640, 3)
Action: {'attack': array([0]), 'back': array([0]), 'forward': array([0]), 'jump': array([0]), 'left': array([0]), 'right': array([0]), 'sneak': array([0]), 'sprint': array([0]), 'use': array([0]), 'drop': array([0]), 'inventory': array([0]), 'hotbar.1': array([0]), 'hotbar.2': array([0]), 'hotbar.3': array([0]), 'hotbar.4': array([0]), 'hotbar.5': array([0]), 'hotbar.6': array([0]), 'hotbar.7': array([0]), 'hotbar.8': array([0]), 'hotbar.9': array([0]), 'camera': array([[-5.80948313, -5.80948313]])}
Action: {'attack': array([1]), 'back': array([0]), 'forward': array([0]), 'jump': array([0]), 'left': array([0]), 'right': array([0]), 'sneak': array([0]), 'sprint': array([0]), 'use': array([0]), 'drop': array([0]), 'inventory': array([0]), 'hotbar.1': array([0]), 'hotbar.2': array([0]), 'hotbar.3': array([0]), 'hotbar.4': array([0]), 'hotbar.5': array([0]), 'hotbar.6': array([0]), 'hotbar.7': array([0]), 'hotbar.8': array([0]), 'hotbar.9': array([0]), 'camera': array([[

In [1]:
import pickle

model_path = r'F:\16831_RL\Proj\MC_RL\Model_Weights\2x_pre\2x.model'
with open(model_path, 'rb') as f:
    agent_parameters = pickle.load(f)

print(agent_parameters.keys())
print(agent_parameters['model']['args'].keys())
print(agent_parameters['model']['args']['net']['args'])


dict_keys(['version', 'model', 'extra_args'])
dict_keys(['net', 'pi_head_opts'])
{'active_reward_monitors': {'craft_stats': {'args': {'collapse_var': True, 'items': ['planks', 'stick', 'crafting_table', 'wooden_pickaxe', 'stone_pickaxe', 'furnace', 'iron_ingot', 'iron_pickaxe', 'diamond_pickaxe', 'torch']}, 'weight': 0}, 'mine_stats': {'args': {'collapse_var': True, 'items': ['log', 'coal_ore', 'stone', 'iron_ore', 'diamond_ore', 'obsidian']}, 'weight': 0}, 'order_invariant_curriculum': {'args': {'curriculum': {'coal': [5, 0.4], 'cobblestone': [11, 0.09090909090909091], 'crafting_table': [1, 1], 'diamond': [10000, 2.6666666666666665], 'diamond_pickaxe': [10000, 8], 'furnace': [1, 1], 'iron_ingot': [3, 1.3333333333333333], 'iron_ore': [3, 1.3333333333333333], 'iron_pickaxe': [1, 4], 'log': [8, 0.125], 'obsidian': [10000, 16], 'planks': [20, 0.05], 'stick': [16, 0.0625], 'stone_pickaxe': [1, 1], 'torch': [16, 0.125], 'wooden_pickaxe': [1, 1]}}, 'weight': 1}, 'pickup_stats': {'args': {'co

In [13]:
import torch
weights_path = r'F:\16831_RL\Proj\MCRL-Proj\Model_Weights\2x_pre\rl-from-house-2x.weights'
weights = torch.load(weights_path, map_location=torch.device('cpu'))
print(weights.keys())
# print(weights)
print(len(weights.keys()))
# print(weights['net.img_process.cnn.stacks.0.firstconv.layer.weight'].shape)

for key, value in weights.items():
    print(f"{key}: {value.shape if hasattr(value, 'shape') else 'Scalar'}")


odict_keys(['net.img_process.cnn.stacks.0.firstconv.layer.weight', 'net.img_process.cnn.stacks.0.firstconv.layer.bias', 'net.img_process.cnn.stacks.0.n.weight', 'net.img_process.cnn.stacks.0.n.bias', 'net.img_process.cnn.stacks.0.blocks.0.conv0.norm.weight', 'net.img_process.cnn.stacks.0.blocks.0.conv0.norm.bias', 'net.img_process.cnn.stacks.0.blocks.0.conv0.layer.weight', 'net.img_process.cnn.stacks.0.blocks.0.conv1.norm.weight', 'net.img_process.cnn.stacks.0.blocks.0.conv1.norm.bias', 'net.img_process.cnn.stacks.0.blocks.0.conv1.layer.weight', 'net.img_process.cnn.stacks.0.blocks.1.conv0.norm.weight', 'net.img_process.cnn.stacks.0.blocks.1.conv0.norm.bias', 'net.img_process.cnn.stacks.0.blocks.1.conv0.layer.weight', 'net.img_process.cnn.stacks.0.blocks.1.conv1.norm.weight', 'net.img_process.cnn.stacks.0.blocks.1.conv1.norm.bias', 'net.img_process.cnn.stacks.0.blocks.1.conv1.layer.weight', 'net.img_process.cnn.stacks.1.firstconv.norm.weight', 'net.img_process.cnn.stacks.1.firstconv.no

  weights = torch.load(weights_path, map_location=torch.device('cpu'))


In [15]:
# Open the file containing MineRLAgent
with open('agent.py', 'r') as file:
    for i, line in enumerate(file, 1):
        if 'lambda' in line:
            print(f"Line {i}: {line.strip()}")

weights_path = r'F:\16831_RL\Proj\MCRL-Proj\Model_Weights\2x_pre\rl-from-house-2x.weights'
weights = torch.load(weights_path, map_location=torch.device('cpu'))

for key, value in weights.items():
    if callable(value):  # Check if any value is a function
        print(f"Key {key} contains a callable value: {value}")



  weights = torch.load(weights_path, map_location=torch.device('cpu'))


In [16]:
import tempfile

# Save weights to a temporary file
with tempfile.NamedTemporaryFile(delete=True) as temp_file:
    temp_file.write(shared_weights["weights"])
    temp_file.flush()
    agent.load_weights(temp_file.name)


NameError: name 'shared_weights' is not defined