In [16]:
import numpy as np
import torch
import torch.nn as nn

In [26]:
# Define two models to compare the number of parameters
widenet = nn.Sequential(
    nn.Linear(6, 10),  # Hidden layer
    nn.Linear(10, 4)   # Output layer
)

deepnet = nn.Sequential(
    nn.Linear(6, 8),  # Hidden layer
    nn.Linear(8, 6),  # Hidden layer
    nn.Linear(6, 4)   # Output layer
)
# Print the models to inspect their structure
print(widenet)
print('\n')
print(deepnet)

Sequential(
  (0): Linear(in_features=6, out_features=10, bias=True)
  (1): Linear(in_features=10, out_features=4, bias=True)
)


Sequential(
  (0): Linear(in_features=6, out_features=8, bias=True)
  (1): Linear(in_features=8, out_features=6, bias=True)
  (2): Linear(in_features=6, out_features=4, bias=True)
)


In [27]:
# Count the number of biases (nodes) in the wide network
numNodesInWide = sum(p.numel() for p in widenet.parameters() if p.requires_grad)

# Count the number of biases (nodes) in the deep network
numNodesInDeep = sum(p.numel() for p in deepnet.parameters() if p.requires_grad)

print('There are %s nodes in the wide network.' % numNodesInWide)
print('There are %s nodes in the deep network.' % numNodesInDeep)

There are 114 nodes in the wide network.
There are 138 nodes in the deep network.


In [28]:
# Print the parameters of the wide network
for name, param in widenet.named_parameters():
    print(name, param)

0.weight Parameter containing:
tensor([[ 0.3617,  0.0190,  0.0511,  0.1237, -0.0405, -0.1512],
        [-0.0906, -0.1181,  0.2420,  0.2817,  0.3622,  0.0441],
        [-0.4007,  0.0491,  0.3492,  0.0062, -0.1736, -0.1652],
        [ 0.0647,  0.3880, -0.2943, -0.3545, -0.0525,  0.1718],
        [-0.1505,  0.0195,  0.1825,  0.3658,  0.2103,  0.2748],
        [-0.1087,  0.4016,  0.0731,  0.1401, -0.0723,  0.0038],
        [ 0.1771, -0.3969, -0.3625, -0.0867,  0.0092, -0.2916],
        [ 0.3182, -0.2620, -0.2798, -0.2757, -0.1379, -0.1063],
        [-0.0224,  0.0034,  0.3558, -0.1499,  0.3684,  0.0063],
        [ 0.4018, -0.3360,  0.1765,  0.2499,  0.1631, -0.2423]],
       requires_grad=True)
0.bias Parameter containing:
tensor([-0.1024,  0.3655, -0.0129, -0.3152,  0.2717,  0.1233, -0.0327, -0.0045,
        -0.1086,  0.2348], requires_grad=True)
1.weight Parameter containing:
tensor([[-1.4077e-01,  2.0826e-01, -1.7282e-01, -1.9528e-01,  2.4538e-01,
         -1.8342e-01,  1.0808e-03,  2.90

In [29]:
# Print the parameters of the wide network
for name, param in deepnet.named_parameters():
    print(name, param)

0.weight Parameter containing:
tensor([[-0.1918, -0.0190,  0.0625, -0.0880,  0.0681,  0.1938],
        [ 0.1927, -0.2761,  0.2764,  0.0564, -0.0566,  0.0436],
        [ 0.0438,  0.2571, -0.1932, -0.1191,  0.3540, -0.0059],
        [ 0.2446,  0.0856, -0.1290, -0.2660, -0.1106, -0.1092],
        [ 0.3321, -0.3976,  0.0893,  0.0338, -0.3517, -0.3642],
        [-0.1111,  0.0355, -0.1798, -0.2871, -0.3330, -0.1848],
        [-0.1086,  0.2165,  0.3698, -0.2812,  0.3744,  0.0384],
        [ 0.2973, -0.2562,  0.0520, -0.0010,  0.3618,  0.2814]],
       requires_grad=True)
0.bias Parameter containing:
tensor([-0.2975, -0.3674, -0.3556, -0.0107, -0.0246, -0.0125,  0.3245,  0.1049],
       requires_grad=True)
1.weight Parameter containing:
tensor([[-0.2180, -0.1562,  0.3264, -0.1760,  0.2553, -0.2047,  0.0274, -0.2684],
        [-0.1740,  0.1235, -0.0850, -0.1184,  0.3088,  0.0992,  0.1018,  0.1520],
        [-0.3200,  0.1623, -0.0667, -0.1419, -0.3287,  0.3516, -0.2363, -0.0400],
        [ 0.089

In [30]:
# Count the total number of trainable parameters in the wide network
nparams = sum(p.numel() for p in widenet.parameters() if p.requires_grad)
print('\nTotal number of parameters in the wide network: %s' % nparams)

# Count the total number of trainable parameters in the deep network
nparams = sum(p.numel() for p in deepnet.parameters() if p.requires_grad)
print('Total number of parameters in the deep network: %s' % nparams)


Total number of parameters in the wide network: 114
Total number of parameters in the deep network: 138
