In [1]:
%matplotlib inline

import os
import pickle
import torch
import numpy as np
import math

from typing import Dict, Tuple

# For verifying how polynomial order effects fitting

## Generate Random Data

Give variable $x$, use the third-order polynomials to generate train data and test data:
$$
y = 5 + 1.2x - 3.4\frac{x^{2}}{2!} + 5.6\frac{x^{3}}{3!} + \epsilon\ \text{where}\ \epsilon \sim \mathcal{N}(0, {0.1}^2)
$$
- Noise term $\epsilon$ obeys a normal distribution with a mean of $0$ and a standard deviation of $0.1$;
- During optimization, it is often desirable to avoid very large gradient or loss values, that's why adjust the feature from $x^i$ to $\frac{x^i}{i!}$, which can avoid large $i$ leads to very large exponential value;

In [2]:
# Setup some parameters
max_degree: int = 160
train_size, test_size = 50, 100
true_w: np.ndarray = np.zeros(max_degree)
true_w[0:4] = ([5, 1.2, -3.4, 5.6])

In [3]:
# Generate Random Data
poly_features: np.ndarray = np.random.normal(size=(train_size + test_size, 1))
np.random.shuffle(poly_features)

poly_features = np.power(poly_features, np.arange(max_degree).reshape((1, -1)))
for i in range(max_degree):
    poly_features[:, i] /= math.gamma(i + 1)    # Divide by i!

labels: np.ndarray = np.dot(poly_features, true_w)
labels += np.random.normal(scale=1e-1, size=labels.shape)

In [4]:
# Transform to torch tensors
true_w: torch.Tensor = torch.tensor(true_w, dtype=torch.float32)
poly_features: torch.Tensor = torch.tensor(poly_features, dtype=torch.float32)
labels: torch.Tensor = torch.tensor(labels, dtype=torch.float32)

In [5]:
print(poly_features[0, :], labels[:2], sep='\n')

tensor([ 1.0000e+00, -1.2040e+00,  7.2485e-01, -2.9092e-01,  8.7569e-02,
        -2.1087e-02,  4.2316e-03, -7.2786e-04,  1.0955e-04, -1.4655e-05,
         1.7646e-06, -1.9314e-07,  1.9379e-08, -1.7949e-09,  1.5437e-10,
        -1.2391e-11,  9.3243e-13, -6.6040e-14,  4.4175e-15, -2.7994e-16,
         1.6853e-17, -9.6626e-19,  5.2882e-20, -2.7684e-21,  1.3888e-22,
        -6.6888e-24,  3.0975e-25, -1.3813e-26,  5.9398e-28, -2.4661e-29,
         9.8977e-31, -3.8443e-32,  1.4465e-33, -5.2775e-35,  1.8689e-36,
        -6.4293e-38,  2.1503e-39, -6.9974e-41,  2.2169e-42, -6.8664e-44,
         1.4013e-45, -0.0000e+00,  0.0000e+00, -0.0000e+00,  0.0000e+00,
        -0.0000e+00,  0.0000e+00, -0.0000e+00,  0.0000e+00, -0.0000e+00,
         0.0000e+00, -0.0000e+00,  0.0000e+00, -0.0000e+00,  0.0000e+00,
        -0.0000e+00,  0.0000e+00, -0.0000e+00,  0.0000e+00, -0.0000e+00,
         0.0000e+00, -0.0000e+00,  0.0000e+00, -0.0000e+00,  0.0000e+00,
        -0.0000e+00,  0.0000e+00, -0.0000e+00,  0.0

## Save Data

In [6]:
data: Dict[str, torch.Tensor] = {
    'max_degree': max_degree,
    'poly_features': poly_features,
    'labels': labels,
    'train_size': train_size,
}

save_path: str = os.path.join('..', '..', 'data', 'weight_decay')
os.makedirs(save_path, exist_ok=True)
with open(os.path.join(save_path, 'polynomials_order_data.pkl'), 'wb') as f:
    pickle.dump(data, f)

# For verifying regularization technique -- weight decay

## Generate Random Data

Give variable $x$, use the third-order polynomials to generate train data and test data:
$$
y = 0.05 + \sum\limits_{i=1}^{d} {0.01}{x_{i}} + \epsilon\ \text{where}\ \epsilon \sim \mathcal{N}(0, {0.01}^{2})
$$
- Noise term $\epsilon$ obeys a normal distribution with mean of $0$ and standard deviation of $0.01$;

In [19]:
def generate_random_data(W: torch.Tensor, b: torch.Tensor, num_samples: int) -> Tuple[torch.Tensor, torch.Tensor]:
    """Generate Data: Xw + b + noise."""
    X: torch.Tensor = torch.normal(0, 1, (num_samples, len(W)))
    y: torch.Tensor = X @ W + b
    y += torch.normal(0, 1e-2, y.shape)
    return X, y.reshape((-1, 1))

In [20]:
train_size, test_size, num_inputs = 20, 100, 200
true_w, true_b = torch.ones((num_inputs, 1)) * 1e-2, 5e-2

In [21]:
train_data: Tuple[torch.Tensor, torch.Tensor] = generate_random_data(true_w, true_b, train_size)
test_data: Tuple[torch.Tensor, torch.Tensor] = generate_random_data(true_w, true_b, test_size)

## Save Data

In [22]:
data: Dict[str, torch.Tensor] = {
    'train_data': train_data,
    'test_data': test_data,
    'num_inputs': num_inputs,
}

save_path: str = os.path.join('..', '..', 'data', 'weight_decay')
os.makedirs(save_path, exist_ok=True)
with open(os.path.join(save_path, 'weight_decay.pkl'), 'wb') as f:
    pickle.dump(data, f)