## Things to keep in mind when using `deepSI_lite`



## Normalization

Neural networks and many other parameterizations (i.e. polynomials) work the best when the input and outputs are normalized. Because most activation functions and initialization stratagies assume zero mean and std=1 inputs it I can highly recommand you either:

1. Only work with normalized data
2. Or include normalization components in the `f` `h` and `encoder`.

These normalization components are 

* $f(x,u) \rightarrow f(x,(u-u_{\text{mean}})/u_{\text{std}})$ which can be added by wrapping a function with `norm.f`
* $h(x) \rightarrow h(x) y_\text{std} + y_\text{mean}$ which can be added by wrapping a function with `norm.h`
* $encoder(u_\text{past}, y_\text{past}) \rightarrow encoder((u_\text{past}-u_{\text{mean}})/u_{\text{std}}, (y_\text{past}-y_{\text{mean}})/y_{\text{std}})$ which can be added by wrapping a function with `norm.encoder`

In [4]:
from deepSI_lite.networks import MLP_res_net
from torch import nn
from deepSI_lite.models import SUBNET
from deepSI_lite import get_nu_ny_and_auto_norm
from nonlinear_benchmarks import Cascaded_Tanks
train, test = Cascaded_Tanks()
nx, nb, na = 2, 5, 5
nu, ny, norm = get_nu_ny_and_auto_norm(train)

f = norm.f(MLP_res_net(input_size = [nx , nu], output_size = nx, activation=nn.Sigmoid))
h = norm.h(MLP_res_net(input_size = nx, output_size = ny, n_hidden_layers=5, n_hidden_nodes=10))
encoder = norm.encoder(MLP_res_net(input_size = [(nb,nu) , (na,ny)], output_size = nx, n_hidden_layers=1, n_hidden_nodes=15))

print(f) #has f(x,u) -> x with batch
print(h) #has h(x,u) -> y with batch if feedthrough = True else h(x) -> y
print(encoder) #has encoder(upast, ypast) -> x

print(SUBNET(nu, ny, norm, nx=nx, nb=nb, na=na, f=f, h=h, encoder=encoder))

IO_normalization_f(
  (fun): MLP_res_net(
    (net_res): Linear(in_features=3, out_features=2, bias=True)
    (net_nonlin): Sequential(
      (0): Linear(in_features=3, out_features=64, bias=True)
      (1): Sigmoid()
      (2): Linear(in_features=64, out_features=64, bias=True)
      (3): Sigmoid()
      (4): Linear(in_features=64, out_features=2, bias=True)
    )
  )
)
IO_normalization_h(
  (fun): MLP_res_net(
    (net_res): Linear(in_features=2, out_features=1, bias=True)
    (net_nonlin): Sequential(
      (0): Linear(in_features=2, out_features=10, bias=True)
      (1): Tanh()
      (2): Linear(in_features=10, out_features=10, bias=True)
      (3): Tanh()
      (4): Linear(in_features=10, out_features=10, bias=True)
      (5): Tanh()
      (6): Linear(in_features=10, out_features=10, bias=True)
      (7): Tanh()
      (8): Linear(in_features=10, out_features=10, bias=True)
      (9): Tanh()
      (10): Linear(in_features=10, out_features=1, bias=True)
    )
  )
)
IO_normalization_