## Setup

In [None]:
%matplotlib qt
import matplotlib.pyplot as plt
import numpy as np
from pathlib import Path
import os

In [None]:
Path('Plots').mkdir(exist_ok=True)
os.chdir('Plots')

## 1 Activation Functions

### 1.1 Activation

In [None]:
x = np.linspace(-3.0, 3.0, 300)
y_relu = np.maximum(0, x)
y_tanh = np.tanh(x)
y_sigm = 1 / (1 + np.exp(-x))

In [None]:
fig = plt.figure()
plt.plot(x, y_relu, label=r'$ReLU(x)$')
plt.plot(x, y_tanh, label=r'$tanh(x)$')
plt.plot(x, y_sigm, label=r'$\sigma(x)$')

plt.grid()
plt.xlabel('x')
plt.ylabel('Activation')
plt.legend(loc='upper left', framealpha=1.0)

fig.savefig('Activations.pdf', bbox_inches='tight', pad_inches=0.25)

### 1.2 Derivatives

In [None]:
x = np.linspace(-5.0, 5.0, 500)
dy_relu = np.heaviside(x, 0)
dy_tanh = 1 / np.cosh(x)**2
dy_sigm = np.exp(-x) / (1 + np.exp(-x))**2

In [None]:
fig = plt.figure()
plt.plot(x, dy_relu, label=r'$\frac{d}{dx}ReLU(x)$')
plt.plot(x, dy_tanh, label=r'$\frac{d}{dx}tanh(x)$')
plt.plot(x, dy_sigm, label=r'$\frac{d}{dx}\sigma(x)$')

plt.grid()
plt.xlabel('x')
plt.ylabel('Derivative')
plt.xticks(range(-5, 6))
plt.legend(loc='upper left', framealpha=1.0)

fig.savefig('Derivatives.pdf', bbox_inches='tight', pad_inches=0.25)

## 2 Gradient Descent

In [None]:
x = np.linspace(-3.0, 3.0, 100);
y = x**2;

In [None]:
fig = plt.figure()
plt.plot(x, y, label=r'$\mathsf{J}(\theta)$')
plt.plot([-3, -1], [8, 0], 'orange', label=r'$\frac{d}{d\theta} \mathsf{J}(\theta)$')
plt.plot(-2.0, 4.0, 'orange', marker='o')

plt.xticks([])
plt.yticks([])
plt.xlabel(r'$\theta$')
plt.ylabel(r'$\mathsf{J}(\theta)$')
plt.legend(loc='upper left', framealpha=1.0)
fig.savefig('1D-Loss.pdf', bbox_inches='tight')

### 2.1 Weight updates

In [None]:
def create_update_path(eta, x0, N):
    path_x = np.zeros(N)
    path_y = np.zeros(N)
    path_x[0] = x0
    path_y[0] = x0**2
    for i in range(N-1):
        dy = 2 * path_x[i]
        path_x[i+1] = path_x[i] - eta * dy
        path_y[i+1] = path_x[i+1]**2
    return path_x, path_y

In [None]:
x0 = -2
fig = plt.figure()
plt.plot(x, y, '#1f77b4')

### Unstable learning rate
path_x, path_y = create_update_path(1.04, x0, 6)
plt.plot(path_x, path_y, 'crimson', linewidth=0.5, label=r'$\eta \gg \eta_{crit}$')


## High learning rate
path_x, path_y = create_update_path(0.85, x0, 10)
plt.plot(path_x, path_y, 'purple', linewidth=0.5, label=r'$\eta > \eta_{crit}$')

### Critical learning rate
path_x, path_y = create_update_path(0.5, x0, 2)
plt.plot(path_x, path_y, 'green', linewidth=0.5, label=r'$\eta = \eta_{crit}$')

### Low learning rate
path_x, path_y = create_update_path(0.2, x0, 10)
plt.plot(path_x, path_y, 'orange', linewidth=1, label=r'$\eta < \eta_{crit}$')

plt.xticks([])
plt.yticks([])
plt.xlabel(r'$\theta$')
plt.ylabel(r'$\mathsf{J}(\theta)$')
plt.legend(loc='upper left', framealpha=1.0)
fig.savefig('1D-Weight-Update.pdf', bbox_inches='tight')

## 3 Overfitting

In [None]:
SEED = 279923  # https://youtu.be/nWSFlqBOgl8?t=86  -  I love this song
np.random.seed(SEED + 2)

N = 20
x_sample = np.linspace(0, 1, 20)
y_sample = 0.6*x_sample**2 + 0.2 + (1 + x_sample) * 0.1 * np.random.rand(20)

In [None]:
# fit polynomials
p_underfit = np.polyfit(x_sample, y_sample, 1)
p_good     = np.polyfit(x_sample, y_sample, 2)
p_overfit  = np.polyfit(x_sample, y_sample, 14)

# evaluate polynomials
x = np.linspace(0, 1, 400)
y_underfit = np.polyval(p_underfit, x)
y_good     = np.polyval(p_good    , x)
y_overfit  = np.polyval(p_overfit , x)

In [None]:
def plot_fit(x, y, i):
    label = ['Underfit', 'Good fit', 'Overfit'][i - 1]
    plt.subplot(1, 3, i)
    plt.title(label)
    plt.plot(x_sample, y_sample, 'o', markersize=3)
    plt.plot(x, y, linewidth=1)
    plt.xticks([])
    plt.yticks([])

In [None]:
fig = plt.figure(figsize=(12, 4))
plot_fit(x, y_underfit, 1)
plot_fit(x, y_good, 2)
plot_fit(x, y_overfit, 3)
fig.savefig('fitting.pdf', bbox_inches='tight')