In [None]:
!pip install git+https://github.com/murtylab/comp-neuro.git

In [13]:
import torch.nn as nn
import torch
from compneuro.training import train_model
from compneuro.mlp import build_mlp_model, visualize_mlp
from compneuro.utils.video import show_video

save_dir = "results/vis/single_neuron"

In [None]:
default_train_config = dict(
    learning_rate=0.03,
    batch_size=32,
    num_epochs=100,
    num_data_points=50,
    test_data_fraction=0.3,
    visualize_every_nth_step=1,
    video_frames_folder=save_dir,
    save_video_as="training.mp4",
    device = "cuda" if torch.cuda.is_available() else "cpu"
)

# Train a single neuron to fit a line

In [None]:
model = build_mlp_model(
    size_sequence=[1, 1],
)
print(f"Weight before training: {model[0].weight.data[0].item()}")

In [None]:
test_loss = train_model(
    model=model,
    dataset_name="line_through_zero", ## options: "sine_wave", "line", "zigzag_line"
    **default_train_config
)

In [None]:
show_video(filename="training.mp4", width = 600)

# Now what if the line does not go through the origin? would it still work?

In [None]:
model = build_mlp_model(
    size_sequence=[1, 1],
)
test_loss = train_model(
    model=model,
    dataset_name="line", ## options: "sine_wave", "line", "zigzag_line"
    **default_train_config
)
show_video(filename="training.mp4", width = 600)

# This is why, we need a bias. It helps fit to a line which does not pass through the origin.

In [None]:
model = build_mlp_model(
    size_sequence=[1, 1],
    bias=True
)
test_loss = train_model(
    model=model,
    dataset_name="line", ## other options: "sine_wave", "line", "zigzag_line"
    **default_train_config# Can a Single neuron model learn something more complex?train_config
)
show_video(filename="training.mp4", width = 600)

# Can a Single neuron model learn something more complex?

In [None]:
model = build_mlp_model(
    size_sequence=[1, 1, 1], ## stack more neurons here in any way you want, it'll still not fit to the wedge data
    bias=True
)

test_loss = train_model(
    model=model,
    dataset_name="elbow",
    **default_train_config
)
show_video(filename="training.mp4", width = 600)

## Note that stacking more neurons is the same as having one neuron with a different weight and bias

Say we have a 2-neuron model:  

$$
y = w_2 (w_1 x + b_1) + b_2
$$

This can be rewritten as:  

$$
y = w_2 w_1 x + w_2 b_1 + b_2
$$

$$
y = W x + B
$$

where  

$$
W = w_2 w_1, \quad B = w_2 b_1 + b_2.
$$

Thus, the composition of two neurons is equivalent to a single neuron with an effective weight $W$ and bias $B$.

<!-- Or let's say we have a model with 2 hidden neurons:

$$
y_1 = w_1 x + b_1 \quad \text{and} \quad y_2 = w_2 x + b_2
$$ -->

<!-- Now, if we have these two neurons connected to a single output neuron with weights $w_3$ and $w_4$ and bias $b_3$, the output can be written as:

$$
y = w_3 y_1 + w_4 y_2 + b_3
$$

Substitute $y_1$ and $y_2$ from earlier:

$$
y = w_3 (w_1 x + b_1) + w_4 (w_2 x + b_2) + b_3
$$

Distribute the weights:

$$
y = (w_3 w_1 + w_4 w_2) x + (w_3 b_1 + w_4 b_2 + b_3)
$$

This can be rewritten as:

$$
y = W x + B
$$

Where:

$$
W = w_3 w_1 + w_4 w_2, \quad B = w_3 b_1 + w_4 b_2 + b_3
$$ -->


# To fit wedges, we need activation functions

In [None]:
model = build_mlp_model(
    size_sequence=[1, 1, 1], ## the extra 1 in the middle makes room for the activation function
    bias=True,
    activation=nn.ReLU()
)

test_loss = train_model(
    model=model,
    dataset_name="elbow",
    **default_train_config
)
show_video(filename="training.mp4", width = 600)

# Let's see what happens in a more complex dataset

In [None]:
model = build_mlp_model(
    size_sequence=[1, 1, 1],
    bias=True,
    activation=nn.ReLU()
)

test_loss = train_model(
    model=model,
    dataset_name="zigzag_line",
    **default_train_config
)
show_video(filename="training.mp4", width = 600)

In [None]:
model = build_mlp_model(
    size_sequence=[1, 10, 1],
    bias=True,
    activation=nn.ReLU()
)

test_loss = train_model(
    model=model,
    dataset_name="zigzag_line",
    **default_train_config
)
show_video(filename="training.mp4", width = 600)

## Does this mean we can fit almost any function given enough neurons in a shallow network?

Yes. But there's a catch. You'd need TONS of neurons! Too many of them sometimes.

# Universal Approximation Theorem

A single layer neural network can approxiate any function arbitrarily well. Given that you have _unlimited_ hidden units.

In [None]:
model = build_mlp_model(
    size_sequence=[1, 120, 1],
    activation=nn.ReLU(),
    bias = True
)
test_loss = train_model(
    model=model,
    dataset_name="zigzag_line", ## options: "sine_wave", "line", "zigzag_line"
    **default_train_config
)
show_video(filename="training.mp4", width = 600)

## Depth makes neural nets more expressive without adding too many parameters

The network shown below has far fewer neurons, but still works very well. This is by virtue of depth.

In [None]:
model = build_mlp_model(
    size_sequence=[1, 20, 20, 1],
    activation=nn.ReLU(),
    bias = True
)
test_loss = train_model(
    model=model,
    dataset_name="zigzag_line", ## options: "sine_wave", "line", "zigzag_line"
    **default_train_config
)
show_video(filename="training.mp4", width = 600)

## Data in real life is not perfect. It is noisy. But neural networks do not know the difference between signal and noise.

In [None]:
model = build_mlp_model(
    size_sequence=[1, 120, 1],
    bias = True,
    activation=nn.ReLU()
)

test_loss = train_model(
    model=model,
    dataset_name="noisy_line",
    dataset_noise=0.3,
    noisy_data=True,
    **default_train_config
)
show_video(filename="training.mp4", width = 600)

In [None]:
model = build_mlp_model(
    size_sequence=[1, 1],
    bias = True,
    activation=nn.ReLU()
)

test_loss = train_model(
    model=model,
    dataset_name="noisy_line",
    noisy_data=True,
    dataset_noise=0.2,
    **default_train_config
)
show_video(filename="training.mp4", width = 600)