A notebook to explore key characteristics of the image-based and vector-based PPO trained models

### Import Required Libraries and Modules

In [1]:
import os
import sys
from stable_baselines3 import PPO
import numpy as np




##

### Loading Trained Models

In [2]:
image_model = PPO.load("image_model/image_model")
vector_model = PPO.load("vector_model/vector_model")

Exception: code expected at least 16 arguments, got 15
Exception: code expected at least 16 arguments, got 15
Exception: code expected at least 16 arguments, got 15
	Missing key(s) in state_dict: "pi_features_extractor.cnn.0.weight", "pi_features_extractor.cnn.0.bias", "pi_features_extractor.cnn.2.weight", "pi_features_extractor.cnn.2.bias", "pi_features_extractor.cnn.4.weight", "pi_features_extractor.cnn.4.bias", "pi_features_extractor.linear.0.weight", "pi_features_extractor.linear.0.bias", "vf_features_extractor.cnn.0.weight", "vf_features_extractor.cnn.0.bias", "vf_features_extractor.cnn.2.weight", "vf_features_extractor.cnn.2.bias", "vf_features_extractor.cnn.4.weight", "vf_features_extractor.cnn.4.bias", "vf_features_extractor.linear.0.weight", "vf_features_extractor.linear.0.bias".  
Exception: code expected at least 16 arguments, got 15
Exception: code expected at least 16 arguments, got 15
Exception: code expected at least 16 arguments, got 15


### Actor-Critic Policy Network Architecture

In [3]:
image_policy = image_model.policy
image_policy

ActorCriticCnnPolicy(
  (features_extractor): NatureCNN(
    (cnn): Sequential(
      (0): Conv2d(15, 32, kernel_size=(8, 8), stride=(4, 4))
      (1): ReLU()
      (2): Conv2d(32, 64, kernel_size=(4, 4), stride=(2, 2))
      (3): ReLU()
      (4): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1))
      (5): ReLU()
      (6): Flatten(start_dim=1, end_dim=-1)
    )
    (linear): Sequential(
      (0): Linear(in_features=3136, out_features=512, bias=True)
      (1): ReLU()
    )
  )
  (pi_features_extractor): NatureCNN(
    (cnn): Sequential(
      (0): Conv2d(15, 32, kernel_size=(8, 8), stride=(4, 4))
      (1): ReLU()
      (2): Conv2d(32, 64, kernel_size=(4, 4), stride=(2, 2))
      (3): ReLU()
      (4): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1))
      (5): ReLU()
      (6): Flatten(start_dim=1, end_dim=-1)
    )
    (linear): Sequential(
      (0): Linear(in_features=3136, out_features=512, bias=True)
      (1): ReLU()
    )
  )
  (vf_features_extractor): NatureCNN(
    (cnn)

In [4]:
vector_policy = vector_model.policy
vector_policy

ActorCriticPolicy(
  (features_extractor): FlattenExtractor(
    (flatten): Flatten(start_dim=1, end_dim=-1)
  )
  (pi_features_extractor): FlattenExtractor(
    (flatten): Flatten(start_dim=1, end_dim=-1)
  )
  (vf_features_extractor): FlattenExtractor(
    (flatten): Flatten(start_dim=1, end_dim=-1)
  )
  (mlp_extractor): MlpExtractor(
    (policy_net): Sequential(
      (0): Linear(in_features=470, out_features=256, bias=True)
      (1): ReLU()
      (2): Linear(in_features=256, out_features=64, bias=True)
      (3): ReLU()
    )
    (value_net): Sequential(
      (0): Linear(in_features=470, out_features=256, bias=True)
      (1): ReLU()
      (2): Linear(in_features=256, out_features=64, bias=True)
      (3): ReLU()
    )
  )
  (action_net): Linear(in_features=64, out_features=2, bias=True)
  (value_net): Linear(in_features=64, out_features=1, bias=True)
)

### Action and Observation Spaces

In [5]:
image_model.action_space 

Box([-1.  0.], 1.0, (2,), float32)

In [6]:
vector_model.action_space

Box([-1.  0.], 1.0, (2,), float32)

In [7]:
image_model.observation_space

Box(0, 255, (15, 84, 84), uint8)

In [8]:
vector_model.observation_space

Box(-inf, inf, (470,), float32)

### Size of Trainable Parameters

In [9]:
print("Image Model Trainable Parameters:", sum(p.numel() for p in image_policy.parameters() if p.requires_grad))

Image Model Trainable Parameters: 2002531


In [10]:
print(f"Breakdown of Image-Based Model Trainable Parameters:\n{'-'*52}")
for name, module in image_model.policy.named_modules():
    if hasattr(module, "weight"):
        params = sum(p.numel() for p in module.parameters())
        print(f"{name}: {params}")


Breakdown of Image-Based Model Trainable Parameters:
----------------------------------------------------
features_extractor.cnn.0: 30752
features_extractor.cnn.2: 32832
features_extractor.cnn.4: 36928
features_extractor.linear.0: 1606144
mlp_extractor.policy_net.0: 131328
mlp_extractor.policy_net.2: 16448
mlp_extractor.value_net.0: 131328
mlp_extractor.value_net.2: 16448
action_net: 130
value_net: 65


In [11]:
print("Vector Model Trainable Parameters:", sum(p.numel() for p in vector_policy.parameters() if p.requires_grad))

Vector Model Trainable Parameters: 274371


In [12]:
print(f"Breakdown of Vector-Based Model Trainable Parameters:\n{'-'*53}")
for name, module in vector_model.policy.named_modules():
    if hasattr(module, "weight"):
        params = sum(p.numel() for p in module.parameters())
        print(f"{name}: {params}")

Breakdown of Vector-Based Model Trainable Parameters:
-----------------------------------------------------
mlp_extractor.policy_net.0: 120576
mlp_extractor.policy_net.2: 16448
mlp_extractor.value_net.0: 120576
mlp_extractor.value_net.2: 16448
action_net: 130
value_net: 65


### Size of Model Input

In [13]:
image_obs_shape = image_model.policy.observation_space.shape
input_dim = np.prod(image_obs_shape)
print("Size of Image Input:", input_dim)


Size of Image Input: 105840


In [14]:
vector_obs_shape = vector_model.policy.observation_space.shape
input_dim = np.prod(vector_obs_shape)
print("Size of Vector Input:", input_dim)

Size of Vector Input: 470
