In [3]:
import yaml
from pathlib import Path
from model_builder import GenericModelParser
import torch.nn as nn

In [2]:
# Build any model from YAML
parser = GenericModelParser('resnet18.yaml', num_classes=10)
model = parser.build()

# Test forward pass
x = torch.randn(1, 3, 224, 224)
out = model(x)
print(f"Input shape: {x.shape}")
print(f"Output shape: {out.shape}")
print(f"\nModel summary:")
print(model)

Building model from ResNet18
Input shape: [3, 224, 224]

Layer 0: Conv (repeat=1)
  Before: [3, 224, 224]
  After: [64, 112, 112]

Layer 1: BatchNorm (repeat=1)
  Before: [64, 112, 112]
  After: [64, 112, 112]

Layer 2: ReLU (repeat=1)
  Before: [64, 112, 112]
  After: [64, 112, 112]

Layer 3: MaxPool (repeat=1)
  Before: [64, 112, 112]
  After: [64, 56, 56]

Layer 4: ResBlock (repeat=2)
  Before: [64, 56, 56]
  After: [64, 56, 56]

Layer 5: ResBlock (repeat=1)
  Before: [64, 56, 56]
  After: [128, 28, 28]

Layer 6: ResBlock (repeat=1)
  Before: [128, 28, 28]
  After: [128, 28, 28]

Layer 7: ResBlock (repeat=1)
  Before: [128, 28, 28]
  After: [256, 14, 14]

Layer 8: ResBlock (repeat=1)
  Before: [256, 14, 14]
  After: [256, 14, 14]

Layer 9: ResBlock (repeat=1)
  Before: [256, 14, 14]
  After: [512, 7, 7]

Layer 10: ResBlock (repeat=1)
  Before: [512, 7, 7]
  After: [512, 7, 7]

Head 0: AdaptiveAvgPool
  Before: [512, 7, 7]
  After: [512, 1, 1]

Head 1: Flatten
  Before: [512, 1, 1]
 

In [5]:
modules = {
        # Standard PyTorch layers
        'ReLU': lambda args: nn.ReLU(inplace=True),
        'Sigmoid': lambda args: nn.Sigmoid(),
        'AvgPool': lambda args: nn.AvgPool2d(*args),
    }

modules.get("ReLU")

<function __main__.<lambda>(args)>

In [13]:
# # test yaml read

# # Read YAML file
# with open('model1.yaml', 'r') as f:
#     data = yaml.safe_load(f)

model_cfg_path = 'model1.yaml'
with open(model_cfg_path, encoding="ascii", errors="ignore") as f:
    model = yaml.safe_load(f)  # model dict

for k, v in model.items():
    print(k, v)

all_layers = model['backbone'] + model['head']

nc 80
depth_multiple 0.33
width_multiple 0.25
anchors [[10, 13, 16, 30, 33, 23], [30, 61, 62, 45, 59, 119], [116, 90, 156, 198, 373, 326]]
backbone [[-1, 1, 'Conv', [64, 6, 2, 2]], [-1, 1, 'Conv', [128, 3, 2]], [-1, 3, 'C3', [128]], [-1, 1, 'Conv', [256, 3, 2]], [-1, 6, 'C3', [256]], [-1, 1, 'Conv', [512, 3, 2]], [-1, 9, 'C3', [512]], [-1, 1, 'Conv', [1024, 3, 2]], [-1, 3, 'C3', [1024]], [-1, 1, 'SPPF', [1024, 5]]]
head [[-1, 1, 'Conv', [512, 1, 1]], [-1, 1, 'Upsample', ['None', 2, 'nearest']], [[-1, 6], 1, 'Concat', [1]], [-1, 3, 'C3', [512, False]], [-1, 1, 'Conv', [256, 1, 1]], [-1, 1, 'Upsample', ['None', 2, 'nearest']], [[-1, 4], 1, 'Concat', [1]], [-1, 3, 'C3', [256, False]], [-1, 1, 'Conv', [256, 3, 2]], [[-1, 14], 1, 'Concat', [1]], [-1, 3, 'C3', [512, False]], [-1, 1, 'Conv', [512, 3, 2]], [[-1, 10], 1, 'Concat', [1]], [-1, 3, 'C3', [1024, False]], [[17, 20, 23], 1, 'Detect', ['nc', 'anchors']]]


In [16]:
for i, layer_def in enumerate(all_layers):
    from_idx, num_repeats, module_name, args = layer_def
    print(i, from_idx, num_repeats, module_name, args)
    # break

0 -1 1 Conv [64, 6, 2, 2]
1 -1 1 Conv [128, 3, 2]
2 -1 3 C3 [128]
3 -1 1 Conv [256, 3, 2]
4 -1 6 C3 [256]
5 -1 1 Conv [512, 3, 2]
6 -1 9 C3 [512]
7 -1 1 Conv [1024, 3, 2]
8 -1 3 C3 [1024]
9 -1 1 SPPF [1024, 5]
10 -1 1 Conv [512, 1, 1]
11 -1 1 Upsample ['None', 2, 'nearest']
12 [-1, 6] 1 Concat [1]
13 -1 3 C3 [512, False]
14 -1 1 Conv [256, 1, 1]
15 -1 1 Upsample ['None', 2, 'nearest']
16 [-1, 4] 1 Concat [1]
17 -1 3 C3 [256, False]
18 -1 1 Conv [256, 3, 2]
19 [-1, 14] 1 Concat [1]
20 -1 3 C3 [512, False]
21 -1 1 Conv [512, 3, 2]
22 [-1, 10] 1 Concat [1]
23 -1 3 C3 [1024, False]
24 [17, 20, 23] 1 Detect ['nc', 'anchors']


In [None]:
# def parse_model(d, ch):
#     """Parses a YOLOv5 model from a dict `d`, configuring layers based on input channels `ch` and model architecture."""
#     anchors, nc, gd, gw, act, ch_mul = (
#         d["anchors"],
#         d["nc"],
#         d["depth_multiple"],
#         d["width_multiple"],
#         d.get("activation"),
#         d.get("channel_multiple"),
#     )
#     if act:
#         Conv.default_act = eval(act)  # redefine default activation, i.e. Conv.default_act = nn.SiLU()
#     if not ch_mul:
#         ch_mul = 8
#     na = (len(anchors[0]) // 2) if isinstance(anchors, list) else anchors  # number of anchors
#     no = na * (nc + 5)  # number of outputs = anchors * (classes + 5)

#     layers, save, c2 = [], [], ch[-1]  # layers, savelist, ch out
#     for i, (f, n, m, args) in enumerate(d["backbone"] + d["head"]):  # from, number, module, args
#         m = eval(m) if isinstance(m, str) else m  # eval strings
#         for j, a in enumerate(args):
#             with contextlib.suppress(NameError):
#                 args[j] = eval(a) if isinstance(a, str) else a  # eval strings

#         n = n_ = max(round(n * gd), 1) if n > 1 else n  # depth gain
#         if m in {
#             Conv,
#             GhostConv,
#             Bottleneck,
#             GhostBottleneck,
#             SPP,
#             SPPF,
#             DWConv,
#             MixConv2d,
#             Focus,
#             CrossConv,
#             BottleneckCSP,
#             C3,
#             C3TR,
#             C3SPP,
#             C3Ghost,
#             nn.ConvTranspose2d,
#             DWConvTranspose2d,
#             C3x,
#         }:
#             c1, c2 = ch[f], args[0]
#             if c2 != no:  # if not output
#                 c2 = make_divisible(c2 * gw, ch_mul)

#             args = [c1, c2, *args[1:]]
#             if m in {BottleneckCSP, C3, C3TR, C3Ghost, C3x}:
#                 args.insert(2, n)  # number of repeats
#                 n = 1
#         elif m is nn.BatchNorm2d:
#             args = [ch[f]]
#         elif m is Concat:
#             c2 = sum(ch[x] for x in f)
#         # TODO: channel, gw, gd
#         elif m in {Detect, Segment}:
#             args.append([ch[x] for x in f])
#             if isinstance(args[1], int):  # number of anchors
#                 args[1] = [list(range(args[1] * 2))] * len(f)
#             if m is Segment:
#                 args[3] = make_divisible(args[3] * gw, ch_mul)
#         elif m is Contract:
#             c2 = ch[f] * args[0] ** 2
#         elif m is Expand:
#             c2 = ch[f] // args[0] ** 2
#         else:
#             c2 = ch[f]

#         m_ = nn.Sequential(*(m(*args) for _ in range(n))) if n > 1 else m(*args)  # module
#         t = str(m)[8:-2].replace("__main__.", "")  # module type
#         np = sum(x.numel() for x in m_.parameters())  # number params
#         m_.i, m_.f, m_.type, m_.np = i, f, t, np  # attach index, 'from' index, type, number params
#         LOGGER.info(f"{i:>3}{str(f):>18}{n_:>3}{np:10.0f}  {t:<40}{str(args):<30}")  # print
#         save.extend(x % i for x in ([f] if isinstance(f, int) else f) if x != -1)  # append to savelist
#         layers.append(m_)
#         if i == 0:
#             ch = []
#         ch.append(c2)
#     return nn.Sequential(*layers), sorted(save)


In [2]:
from model_parser import *

input_channels = 3
yaml_path = 'model1.yaml'
parser = ModelParser(yaml_path)
model, save_indices = parser.parse(input_channels)

# model = build_model_from_yaml('model1.yaml', input_channels=3)

25

Layer   From        Module              Out Channels   Params      
--------------------------------------------------------------------------------
0       -1          Conv                64             7,040       
1       -1          Conv                128            73,984      
2       -1          C3                  128            156,928     
3       -1          Conv                256            295,424     
4       -1          C3                  256            1,118,208   
5       -1          Conv                512            1,180,672   
6       -1          C3                  512            6,433,792   
7       -1          Conv                1024           4,720,640   
8       -1          C3                  1024           9,971,712   
9       -1          SPPF                1024           2,624,512   
10      -1          Conv                512            525,312     
11      -1          Upsample            512            0           
12      [-1, 6]     Concat     

In [3]:
model

YOLOModel(
  (layers): ModuleList(
    (0): Conv(
      (conv): Conv2d(3, 64, kernel_size=(6, 6), stride=(2, 2), padding=(2, 2), bias=False)
      (bn): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (act): SiLU(inplace=True)
    )
    (1): Conv(
      (conv): Conv2d(64, 128, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
      (bn): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (act): SiLU(inplace=True)
    )
    (2): C3(
      (cv1): Conv(
        (conv): Conv2d(128, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): SiLU(inplace=True)
      )
      (cv2): Conv(
        (conv): Conv2d(128, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): SiLU(inplace=True)
      )
      (cv

In [4]:
all_layers = parser.config['backbone'] + parser.config['head']
all_layers

[[-1, 1, 'Conv', [64, 6, 2, 2]],
 [-1, 1, 'Conv', [128, 3, 2]],
 [-1, 3, 'C3', [128]],
 [-1, 1, 'Conv', [256, 3, 2]],
 [-1, 6, 'C3', [256]],
 [-1, 1, 'Conv', [512, 3, 2]],
 [-1, 9, 'C3', [512]],
 [-1, 1, 'Conv', [1024, 3, 2]],
 [-1, 3, 'C3', [1024]],
 [-1, 1, 'SPPF', [1024, 5]],
 [-1, 1, 'Conv', [512, 1, 1]],
 [-1, 1, 'nn.Upsample', ['None', 2, 'nearest']],
 [[-1, 6], 1, 'Concat', [1]],
 [-1, 3, 'C3', [512, False]],
 [-1, 1, 'Conv', [256, 1, 1]],
 [-1, 1, 'nn.Upsample', ['None', 2, 'nearest']],
 [[-1, 4], 1, 'Concat', [1]],
 [-1, 3, 'C3', [256, False]],
 [-1, 1, 'Conv', [256, 3, 2]],
 [[-1, 14], 1, 'Concat', [1]],
 [-1, 3, 'C3', [512, False]],
 [-1, 1, 'Conv', [512, 3, 2]],
 [[-1, 10], 1, 'Concat', [1]],
 [-1, 3, 'C3', [1024, False]],
 [[17, 20, 23], 1, 'Detect', ['nc', 'anchors']]]

In [None]:
parser.config['nc']

int