# Building default model

In [1]:
# idea: in simple proof of concept notebook a 2 layer mlp was used to make a default model.
# replicate this by specifying the number of parameters in a funciton and the number of layers

In [2]:
import torch

## Result implementation

In [3]:
from etnn.nn.baseline import create_baseline_model, calc_params

In [4]:
model, should_use = create_baseline_model(
    n_params=5000,
    input_dim=12*50,
    n_layer=2,
    output_dim=1
)
print(f"model: {model}, should_use: {should_use}, model_params: {calc_params(model)}")

model: Sequential(
  (0): Linear(in_features=600, out_features=8, bias=True)
  (1): ReLU()
  (2): Linear(in_features=8, out_features=1, bias=True)
), should_use: True, model_params: 4817


In [5]:
model, should_use = create_baseline_model(
    n_params=5000,
    input_dim=12*50,
    n_layer=3,
    output_dim=1
)
print(f"model: {model}, should_use: {should_use}, model_params: {calc_params(model)}")

model: Sequential(
  (0): Linear(in_features=600, out_features=8, bias=True)
  (1): ReLU()
  (2): Linear(in_features=8, out_features=2, bias=True)
  (3): ReLU()
  (4): Linear(in_features=2, out_features=1, bias=True)
), should_use: True, model_params: 4829


## Simple first attempt - 2 layer

In [6]:
input_dim_elem = 10
n_elem = 50
output_dim = 1

In [7]:
num_parameters = 1000

In [8]:
# a linear layer [torch.nn.Linear(n,m)] has n*m parameters.
# if I say I want to have a 2 layer mlp this means I have m*x + x*n parameters. In other words: num_par = x * (m+n)
# let's confirm it

In [9]:
n = output_dim
m = input_dim_elem * n_elem

In [10]:
hidden_dim = int(num_parameters/(m + n) + 1)
hidden_dim

2

In [11]:
hidden_dim = 3

In [12]:
module1 = torch.nn.Sequential(
    torch.nn.Linear(m, hidden_dim),
    torch.nn.ReLU(),
    torch.nn.Linear(hidden_dim, n)
)

In [13]:
[p.numel() for p in module1.parameters()]

[1500, 3, 3, 1]

In [14]:
sum([p.numel() for p in module1.parameters()])

1507

## Get a specific number of layers

In [15]:
# good that worked quite well
# now we do not have only have 2 layers but more.
# meaning: m*x1 + x1*x2 + ... + xn*n = n_param
# ... which is kind of difficult. If we say that x_i = factor * x_(i-1) then it becomes easier
# the formula turns into: n_param = m*factor^(n_layer-1) + ... + factor*n

In [16]:
n_layer = 4

In [17]:
for factor in range(2, 20, 1):
    params = 0
    for i in range(n_layer):
        if i == 0:
            print(n, factor)
            params += n*factor
        elif i == (n_layer-1):
            print(m, factor**i)
            params += m*(factor ** (i))
        else:
            print(factor**i, factor**(i+1))
            params += (factor ** i) * (factor ** (i+1))

    if params >= num_parameters:
        break
print(f"params: {params}, factor:{factor}")

1 2
2 4
4 8
500 8
params: 4042, factor:2


In [18]:
# build layers
layers = []
for idx in range(n_layer):
    i = n_layer-idx
    print(i)

    if idx == 0:
        layers += [torch.nn.Linear(m, (factor ** (i-1)))]
        layers += [torch.nn.ReLU()]
    elif idx == (n_layer-1):
        layers += [torch.nn.Linear(factor, n)]
    else:
        temp = (factor ** (i-1))
        layers += [torch.nn.Linear(temp*factor, temp)]
        layers += [torch.nn.ReLU()]

4
3
2
1


In [19]:
# build model
module2 = torch.nn.Sequential(*layers)
module2

Sequential(
  (0): Linear(in_features=500, out_features=8, bias=True)
  (1): ReLU()
  (2): Linear(in_features=8, out_features=4, bias=True)
  (3): ReLU()
  (4): Linear(in_features=4, out_features=2, bias=True)
  (5): ReLU()
  (6): Linear(in_features=2, out_features=1, bias=True)
)

In [20]:
print([p.numel() for p in module2.parameters()])
print(sum([p.numel() for p in module2.parameters()]))

[4000, 8, 32, 4, 8, 2, 2, 1]
4057


In [21]:
q = 0
for i in range(n_layer):
    if i == 0:
        q += n*factor
    elif i == (n_layer-1):
        q += m*(factor ** i)
    else:
        q += (factor ** i) * (factor ** (i+1))

In [22]:
q

4042

In [23]:
hidden_dim = int(num_parameters/q + 1)

In [24]:
hidden_dim

1

In [25]:
# build layers
layers = []
for idx in range(n_layer):
    i = n_layer-idx+1

    if idx == 0:
        layers += [torch.nn.Linear(m, hidden_dim*(factor ** i))]
        layers += [torch.nn.ReLU()]
    elif idx == (n_layer-1):
        layers += [torch.nn.Linear(hidden_dim*factor, n)]
    else:
        temp = hidden_dim*(factor ** (i-1))
        layers += [torch.nn.Linear(temp*factor, temp)]
        layers += [torch.nn.ReLU()]

In [26]:
layers

[Linear(in_features=500, out_features=32, bias=True),
 ReLU(),
 Linear(in_features=16, out_features=8, bias=True),
 ReLU(),
 Linear(in_features=8, out_features=4, bias=True),
 ReLU(),
 Linear(in_features=2, out_features=1, bias=True)]

In [27]:
# build model
module2 = torch.nn.Sequential(*layers)

In [28]:
print([p.numel() for p in module2.parameters()])
print(sum([p.numel() for p in module2.parameters()]))

[16000, 32, 128, 8, 32, 4, 2, 1]
16207
