In [1]:
import numpy as np
from typing import Callable
import matplotlib
import matplotlib.pyplot as plt
from tinygrad.tensor import Tensor
from tinygrad import nn
from tinygrad.nn.optim import Optimizer

# from tinygrad.extra.lr_scheduler import LR_Scheduler, ReduceLROnPlateau

In [4]:
def plot(x: np.ndarray) -> Callable[[np.ndarray,str,str], None]:
    """
    Curries X into the plot function
    """
    def fn(y: np.ndarray, label: str, color: str) -> None:
        plt.plot(x, y, label=label, color=color)
    return lambda y,label,color : fn(y,label,color)


def plt_setup(xlim:tuple = (0,1), ylim:tuple = (0,1), title:str = "getting $a*(X^2)+b$ from Relus is the goal") -> None:
    plt.grid(True)
    # plt.style.use('dark_background')
    plt.axhline(y=0, color='k')
    plt.axvline(x=0, color='k')
    plt.xlim(xlim)
    plt.ylim(ylim)
    plt.title(title)
    plt.style.use('dark_background')
    return None

color_cycle = matplotlib.colormaps["Spectral"]

X=np.arange(0,1,0.001)
p = plot(X)

# from tinygrad.extra
class LR_Scheduler:
  def __init__(self, optimizer: Optimizer):
    self.optimizer = optimizer
    self.epoch_counter = Tensor([0], requires_grad=False, device=self.optimizer.device)

  def get_lr(self): pass

  def step(self) -> None:
    self.epoch_counter.assign(self.epoch_counter + 1).realize()
    self.optimizer.lr.assign(self.get_lr()).realize()

class OneCycleLR(LR_Scheduler):
  def __init__(self, optimizer: Optimizer, max_lr: float, div_factor: float, final_div_factor: float, total_steps: int, pct_start: float):
    super().__init__(optimizer)
    self.initial_lr = max_lr / div_factor
    self.max_lr = max_lr
    self.min_lr = self.initial_lr / final_div_factor
    self.total_steps = total_steps
    self.pct_start = pct_start
    self.optimizer.lr.assign(self.get_lr()).realize() # update the initial LR

  @staticmethod
  def _annealing_linear(start: float, end: float, pct: Tensor) -> Tensor: return (pct*(end-start)+start)

  def get_lr(self) -> Tensor:
    return (self.epoch_counter < self.total_steps*self.pct_start).where(
      self._annealing_linear(self.initial_lr, self.max_lr, self.epoch_counter/(self.total_steps*self.pct_start)),
      self._annealing_linear(self.max_lr, self.min_lr, (self.epoch_counter-(self.total_steps*self.pct_start))/(self.total_steps*(1-self.pct_start)))
    )
    
class Model:
  def __init__(self, layers:int = 3):
    self.layers = [nn.Linear(1, 3), Tensor.relu] + [nn.Linear(3, 3), Tensor.relu]*layers + [nn.Linear(3, 1)]

  def __call__(self, x:Tensor) -> Tensor: return x.sequential(self.layers)

  def L1(self) -> Tensor: return sum([l.weight.abs().sum() + l.bias.abs().sum() for l in self.layers if isinstance(l, nn.Linear)])

  def L2(self) -> Tensor: return sum([l.weight.square().sum() + l.bias.square().sum() for l in self.layers if isinstance(l, nn.Linear)])


def train_step(x:Tensor, y:Tensor, model: Model, opt: nn.optim.LAMB, lr_schedule: LR_Scheduler) -> Tensor:
    y_pred = model(x)
    loss = (y_pred - y).square().mean() #+ 0.0001 * (model.L2())# + 0.0001 * (model.L2())
    opt.zero_grad()
    loss.backward()
    opt.step()
    lr_schedule.step()
    return loss


def train_model(model, lr:float = 0.01, steps:int = 1001, bs:int = 32768) -> Model:
    opt = nn.optim.Adam(nn.state.get_parameters(model), lr)
    lr_schedule = OneCycleLR(opt, max_lr=0.1, div_factor=100, final_div_factor=100, total_steps=steps, pct_start=0.5)
    old_lr = opt.lr.numpy()
    for i in range(steps):
        samples = Tensor.rand(bs, 1).realize()
        y = target(samples)
        loss = train_step(samples, y, model, opt, lr_schedule)
        if i%100 == 0:
            print(f"lr = {opt.lr.numpy()[0]}")
            print(f"loss at train_step = {i} : {loss.numpy()}")
    return model

def plot_model(model, title:str = "neural network learning versus the takagi curve") -> None:
    #p(target(Tensor(X.astype(np.float32)).reshape(-1,1)).numpy(), "target", color=color_cycle(0.0))
    p(model(Tensor(X.astype(np.float32)).reshape(-1,1)).numpy(), "model", color=color_cycle(0.5))
    #p(T_1(X)+T_2(X)+T_3(X), "Takagi 3", color=color_cycle(0.75))
    plt_setup(xlim=(0,1), ylim=(0,1/2+0.1), title=title)
    plt.legend(loc='best')
    plt.show()

In [None]:
# we write the quintic polynomial P(z)
# (z-z1)(z-z2)(z-z3)(z-z4)(z-z5)

# step 1 sample the roots
R = Tensor.rand(5, 1).realize()

# step2 train a neural net to learn the roots

# step 3 apply it to a quintic polynomial

# step 4 use newton's method to find the roots given the initialization point given by the neural network