In [None]:
import torch
from torch import nn
import torch.nn.functional as F
from graphviz import Digraph
import matplotlib.pyplot as plt
import numpy as np
import ipywidgets as widgets

In [None]:
# Create x and y values for our function over a range of values
X = torch.arange(-2, 2.1, step=.1)

def f(x):
    return x**2

Y = f(X)

def plot_derivative_at(point, ax):
    _x, _y = point
    slope = 2*_x # What's the derivative of y with respect to x?
    intercept = slope * _x  - _y # Figure out the intercept
    x = torch.arange(-2, 2.1, .1) # xs to plot
    y = slope * X - _y # ys to plot
    ax.scatter(_x, _y, c='r', label=f'({float(_x):.02f}, {float(_y):.02f})', linewidth=8)
    ax.plot(x, y, label=f'dy/dx = {float(slope):.02f}', linewidth=3, ls='--')

@widgets.interact(
    x = widgets.FloatSlider(value=1.4, min=-2, max=2)
)
def show_derivative_at(x):
    x = torch.tensor(x).float()
    point = (x, f(x))
    fig, ax = plt.subplots(figsize=(12,8))
    ax.plot(X, Y, label='f(x)', linewidth=3)
    ax.set_xlim(-2, 2)
    ax.set_ylim(0, 4)
    plot_derivative_at(point, ax)
    ax.legend()
    plt.show()

In [None]:
# Declare a tensor x with value 2.0
x = torch.tensor(2., requires_grad=True, dtype=torch.float32)

In [None]:
# Define the function we used previously
a = x + 2
b = a ** 2
y = b + 3
y

In [None]:
# Calculate the gradeients
y.backward()

In [None]:
# Observe the gradients
x.grad

In [None]:
# Declare a tensor x with value 2.0
x = torch.tensor(2., requires_grad=True)

In [None]:
# Define the same f we saw in the example
def f(x):
    return ((x+2)**2) + 3

In [None]:
# Calculate f(x)
y = f(x)

In [None]:
# Observe the output
y

In [None]:
# Calculate the gradients
y.backward()

In [None]:
# Observe the gradients
x.grad

In [None]:
# Write your own function
def g(x):
    raise NotImplementedError

In [None]:
# Complete this cell to find the derivative of y with respect to x.

In [None]:
# Define a tensor of numbers
x = torch.arange(3, dtype=torch.float32, requires_grad=True)
x

In [None]:
# Define a function that returns a single value.
# We will want to find the gradient this value with respect to every
# value in `x`.
def f(x):
    a = torch.exp(x)
    b = a / 1+torch.exp(x)
    return b.mean()

In [None]:
# Let's pass x through f to obtain y
y = f(x)

In [None]:
# Observe y
y

In [None]:
# Calculate the gradients
y.backward()

In [None]:
# Observe the gradients
x.grad

In [None]:
rng = torch.arange(-5, 5.01, 0.05)
fig, ax = plt.subplots(figsize=(14, 8))
ax.plot(rng, F.relu(rng), label='ReLU')
ax.plot(rng, torch.tanh(rng), label='tanh')
ax.plot(rng, torch.sigmoid(rng), label='sigmoid')
ax.plot(rng, F.leaky_relu(rng, negative_slope=0.01), ls='--', label='leaky ReLU')
ax.plot(rng, F.gelu(rng), ls=':', label='gelu')
ax.plot(rng, F.silu(rng), ls='dashdot', label='swish')
ax.plot(rng, F.mish(rng), label='mish')
ax.set_ylim(-1.1, 2)
ax.set_title('Common activation functions')
ax.legend()

In [None]:
rng = torch.arange(-1, 1, .2)

In [None]:
rng

In [None]:
rng.max(torch.tensor(0.))

In [None]:
F.relu(rng)

In [None]:
nn.ReLU()(rng)

In [None]:
leak = 0.1

In [None]:
rng.max(rng * leak)

In [None]:
F.leaky_relu(rng, leak)

In [None]:
nn.LeakyReLU(leak)(rng)

In [None]:
# Make our fake activations
activations = torch.tensor(np.random.normal(loc=1.5, scale=7, size=(10, 5)))
activations

In [None]:
ub = activations.mean(dim=0)
ub

In [None]:
vb = activations.var(dim=0)
vb

In [None]:
xhat = (activations - ub) / torch.sqrt(vb + 1e-5)
xhat

In [None]:
# Check that the mean of each "feature" is close to zero
np.allclose(xhat.mean(dim=0), 0)

In [None]:
scale = torch.tensor(np.random.normal(loc=0.1, scale=1.2, size=(5)))
shift = torch.tensor(np.random.normal(loc=0.1, scale=1.2, size=(5)))

In [None]:
outputs = xhat * scale + shift
outputs