In [2]:
# %%
import os, json, math, re, pathlib, itertools
from typing import Dict, Tuple, Optional
import numpy as np
import torch
import torch.nn.functional as F
from safetensors.torch import load_file as load_safetensors

torch.set_grad_enabled(False)

# <<< EDIT IF NEEDED >>>
model_weights = "../weights/downloads/model.safetensors"  # <= your path
out_dir = "./headers"
os.makedirs(out_dir, exist_ok=True)


1) Load safetensors and index keys

In [3]:
state = load_safetensors(model_weights)


2. Helper functions for printing out and understanding model weights

In [4]:
def make_dictionary_tree(dict, split='.'):
    tree = {}

    for key, value in dict.items():
        parts = key.split(split)
        tree_key = parts[0]
        if len(parts) == 1:
            tree[tree_key] = value
            continue
        else:
            subtree_key = split.join(parts[1:])
            
            if tree_key not in tree:
                tree[tree_key] = { subtree_key: value }
            else:
                tree[tree_key][subtree_key] = value

    
    # if all items are numbers, sort them by numeric order
    if all(re.match(r'^\d+$', str(k)) for k, v in tree.items()):
        tree = { k: v for k, v in sorted(tree.items(), key=lambda x: int(x[0])) }

    for key in tree.keys():
        if isinstance(tree[key], Dict):
            tree[key] = make_dictionary_tree(tree[key], split=split)    

    return tree

def dictionary_tree_as_string(tree, prefix='', key_prefix=''):
    items = list(tree.items())
    result = ""

    for key, value in items[:-1]:
        if isinstance(value, dict):
            result += f"{prefix}├───┬ {key}\n"
            if key_prefix:
                result += f"{prefix}|   ├─ in: {key_prefix}.{key}\n"
            if isinstance(value, dict):
                result += f"{prefix}|   ├─ children: {list(value.keys())}\n"
            result += dictionary_tree_as_string(
                value, 
                f"{prefix}│   ",
                f"{key_prefix}.{key}"
            )
        else:
            result += f"{prefix}├──── {key}: {value.shape} [{value.dtype}]\n"

    last_key, last_value = items[-1]
    if isinstance(last_value, dict):
        result += f"{prefix}└───┬ {last_key}:\n"
        if key_prefix:
            result += f"{prefix}    ├─ in: {key_prefix}.{last_key}\n"
        if isinstance(last_value, dict):
            result += f"{prefix}    ├─ children: {list(last_value.keys())}\n"
        result += dictionary_tree_as_string(
            last_value, 
            f"{prefix}    ",
            f"{key_prefix}.{last_key}"
        )
    else:
        result += f"{prefix}└──── {last_key}: {last_value.shape} [{last_value.dtype}]\n"

    return result


4. Get the different dimensions of the model weights

In [5]:
dimensions = {}

for name, param in state.items():
    for shape in param.shape:
        if shape not in dimensions:
            dimensions[shape] = set()
        dimensions[shape].add(name)
        
print("Model weight dimensions:")
for dim, names in sorted(dimensions.items()):
    print(f"  {dim}: {len(names)} tensors")
    for n in list(names):
        print(f"    - {n}")


Model weight dimensions:
  3: 1 tensors
    - model.vlm_with_expert.vlm.model.vision_model.embeddings.patch_embedding.weight
  16: 1 tensors
    - model.vlm_with_expert.vlm.model.vision_model.embeddings.patch_embedding.weight
  32: 4 tensors
    - model.action_in_proj.weight
    - model.state_proj.weight
    - model.action_out_proj.bias
    - model.action_out_proj.weight
  320: 64 tensors
    - model.vlm_with_expert.lm_expert.layers.7.self_attn.v_proj.weight
    - model.vlm_with_expert.lm_expert.layers.4.self_attn.v_proj.weight
    - model.vlm_with_expert.vlm.model.text_model.layers.14.self_attn.v_proj.weight
    - model.vlm_with_expert.lm_expert.layers.10.self_attn.k_proj.weight
    - model.vlm_with_expert.lm_expert.layers.13.self_attn.v_proj.weight
    - model.vlm_with_expert.vlm.model.text_model.layers.4.self_attn.v_proj.weight
    - model.vlm_with_expert.vlm.model.text_model.layers.1.self_attn.v_proj.weight
    - model.vlm_with_expert.lm_expert.layers.8.self_attn.k_proj.weight
    

In [41]:
class CppCode:
    def __init__(self):
        self.pragmas = []
        self.imports: list[str] = []
        self.lines: list[str] = []
        self.namespaces: dict[str, CppCode] = {}

    def add_pragma(self, pragma: str):
        if pragma not in self.pragmas:
            self.pragmas.append(pragma)

    def get_namespace(
        self, 
        namespace: list[str] | str
    ) -> 'CppCode':
        if isinstance(namespace, str):
            namespace = [namespace]
            
        if namespace:
            name = namespace[0]

            # if name is just a number, prefix with underscore
            if re.match(r'^\d+$', name):
                name = f"_{name}"

            if name not in self.namespaces:
                self.namespaces[name] = CppCode()
            
            subspace = self.namespaces[name]
            
            return subspace.get_namespace(
                namespace=namespace[1:]
            )
        else:
            return self

    def add_import(self, import_line: str):
        for line in import_line.splitlines():
            self.imports.append(line.strip())

    def add_line(self, line: str):
        for line in line.splitlines():
            self.lines.append(line.strip())
            
    def write_string(
        self,
        indent: str = '',
    ) -> str:
        result = ""
        if self.pragmas:
            for pragma in self.pragmas:
                result += f"{indent}{pragma}\n"
            result += "\n"
        if self.imports:
            for imp in self.imports:
                result += f"{indent}{imp}\n"
            result += "\n"
        if self.lines:
            for line in self.lines:
                result += f"{indent}{line}\n"
        for name, subspace in self.namespaces.items():
            result += f"{indent}namespace {name} {{\n"
            result += subspace.write_string(indent + '    ')
            result += f"{indent}}} // namespace {name}\n\n"
        return result
        

In [49]:
header = CppCode()

header.add_pragma("#pragma once")
header.add_import("#include <cstdint>")

smolvla = header.get_namespace('smolvla')

smolvla.add_line("""
// === Type Definitions ===
using size_t = std::size_t; // Standard size type (for shape)

using float32_t = float;    // 32-bit floating point
using bfloat16_t = float;   // 16-bit bfloat16 floating point (TODO: implement proper bfloat16 type)
""")

size_to_constant_name = {
    3: "PATCH_EMBED_DIM_SMALL",
    16: "PATCH_EMBED_DIM_LARGE",
    768: "VISION_HIDDEN_DIM",
    3072: "VISION_FFN_DIM",
    1024: "VISION_POS_EMBED_DIM",
    12288: "VISION_CONNECTOR_DIM",
    32: "ACTION_STATE_DIM",
    1440: "ACTION_TIME_MLP_IN_DIM",
    720: "HIDDEN_DIM",
    960: "TEXT_EMBED_DIM",
    2560: "TEXT_FFN_DIM",
    2048: "EXPERT_FFN_DIM",
    320: "ATTENTION_PROJ_DIM",
    49280: "TOKEN_EMBED_DIM",
    12208: "CONNECTOR_PROJ_DIM",
}

smolvla.add_line("""
// === Model Dimension Constants ===
""")
for dim, name in sorted(size_to_constant_name.items()):
    smolvla.add_line(f"constexpr size_t {name} = {dim};")

smolvla.add_line("""
// === Model Weight Declarations ===
""")

for name, param in state.items():
    param_array = name.split('.')

    namespace = smolvla.get_namespace(param_array[:-1])
    var_name = param_array[-1]
    
    if param.dtype == torch.float32:
        dtype_str = "float32_t"
    elif param.dtype == torch.bfloat16:
        dtype_str = "bfloat16_t"
    else:
        raise ValueError(f"Unsupported dtype: {param.dtype} for {name}")

    shape_str = f"[{']['.join(size_to_constant_name[s] for s in param.shape)}]"
    namespace.add_line(f"const {dtype_str} {var_name}{shape_str};")

header_text = header.write_string()
with open(os.path.join(out_dir, "model_weights.h"), 'w') as f:
    f.write(header_text)



In [48]:
def create_cpp_code_structure(
    state_tree,
    namespace: CppCode
):
    for key, value in state_tree.items():
        if isinstance(value, dict):
            subspace = namespace.get_namespace(key)
            create_cpp_code_structure(
                state_tree=value,
                namespace=subspace
            )
        else:
            if value.dtype == torch.float32:
                dtype_str = "float32_t"
            elif value.dtype == torch.bfloat16:
                dtype_str = "bfloat16_t"
            else:
                raise ValueError(f"Unsupported dtype: {value.dtype} for {key}")

            shape_str = f"[{']['.join(size_to_constant_name[s] for s in value.shape)}]"
            namespace.add_line(f"const {dtype_str} {key}{shape_str} = {{")
            # add values after converting to float32 to numpy
            values = value.to(torch.float32).cpu().numpy()
            # if its 2 dimensions, format as matrix
            if len(values.shape) == 2:
                for row in values:
                    row_str = ', '.join(f"{v}" for v in row)
                    namespace.add_line(f"    {{ {row_str} }},")
            else:
                flat_values = values.flatten()
                line = "    "
                for i, v in enumerate(flat_values):
                    line += f"{v}, "
                    if (i + 1) % 10 == 0:
                        namespace.add_line(line)
                        line = "    "
                if line.strip():
                    namespace.add_line(line)
            
            namespace.add_line("};\n") 
        

def save_selected_layer(
    state_tree,
    layer
):
    source = CppCode()
    source.add_import('#include "model_weights.h"')
    
    keys = layer.split('.')
    namespace = source
    selected_layer = state_tree
    for key in keys:
        if key not in selected_layer:
            print (f"Layer {layer} not found in state tree. (stopped at key '{key}')")
            print ("Available keys at this level:", list(selected_layer.keys()))
            return 
        
        namespace = namespace.get_namespace(key)
        selected_layer = selected_layer[key]

    create_cpp_code_structure(
        state_tree=selected_layer,
        namespace=namespace
    )
    source_text = source.write_string()
    layer_filename = "WEIGHTS_" + layer.replace('.', '_') + ".cpp"
    with open(os.path.join(out_dir, layer_filename), 'w') as f:
        f.write(source_text)

layers_to_save = [
    'model.vlm_with_expert.vlm.model.vision_model.encoder.layers.6',
    'model.vlm_with_expert.vlm.model.text_model.layers.3',
    'model.vlm_with_expert.lm_expert.layers.2',
]

state_tree = make_dictionary_tree(state)

for layer in layers_to_save:
    save_selected_layer(
        state_tree,
        layer=layer
    )

