In [None]:
"""
Time complexity (abuse of terminology) implementation
"""

In [85]:
%load_ext autoreload
%autoreload 2
import ObjectiveFunction as of
import plotly.graph_objects as go
import plotly.figure_factory as ff
import plotly.express as px
from plotly.subplots import make_subplots
import numpy as np
import helper_funcs as hf
from math import sqrt
from scipy.stats import uniform, norm

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [2]:
def cart2polar(x):
    x = np.array(x)
    return np.arctan2(x[1], x[0])

In [36]:
def powlaw_samp(x_min, alpha, size=1):
    """
    Samples from powerlaw dist with min value x_min.
    """
    r = np.random.random(size=size)
    return  x_min * (1 - r) ** (-1 / (alpha - 1))

    # https://stats.stackexchange.com/questions/173242/random-sample-from-power-law-distribution
    # https://arxiv.org/pdf/0706.1062.pdf

In [128]:
"""
Doing SGD with Levy-Flight Noise properly
"""

from torch.optim.optimizer import Optimizer, required
import torch
from typing import List, Optional

class SGD_LevyPow(Optimizer):

    def __init__(self,
                 params,
                 func: float = required,
                 lr: float = required,
                 alpha:float = required,
                 momentum: float = 0
    ):
        self.func = func
        defaults = dict(lr=lr,
                        alpha=alpha,
                        momentum=momentum)
        super().__init__(params, defaults)


    @torch.no_grad()
    def step(self, closure=None):

        loss = None
        if closure is not None:
            with torch.enable_grad():
                loss = closure()

        for group in self.param_groups:
            for p in group['params']:
                grad = p.grad

                if 'momentum_buffer' not in self.state:
                    self.state['momentum_buffer'] = grad.detach().clone()
                else:
                    self.state['momentum_buffer'].mul_(group['momentum']).add_(grad, alpha=1)
                
                mom_grad = self.state['momentum_buffer']
                p.add_(mom_grad, alpha=-group['lr'])

                # Levy Flight noise
                levy_r = float(powlaw_samp(x_min=group['lr']*0.01, alpha=group['alpha'])) * torch.norm(grad)
                #cur_dir = cart2polar(grad)
                #theta = float(norm.rvs(loc=cur_dir, scale=group['angle_spread']))
                theta = float(uniform.rvs(loc=0, scale=2*np.pi))
                dir = np.array([np.cos(theta), np.sin(theta)])
                levy_noise = levy_r * torch.Tensor(dir)
                p.add_(levy_noise, alpha=-group['lr'])
                p = self.func.apply_period(p)

        return loss


In [134]:
m = of.AlpineN1(start=[2.5, 2], bounds=10)
alpline_LevyPow = of.OptimisationProblem(
    m,
    SGD_LevyPow(m.parameters(), func=m,
                lr=0.01, momentum=0.996,
                alpha=2),
    n_epochs = 2000
)

losses, params, preds = alpline_LevyPow.run()
fig1 = alpline_LevyPow.visualise((-10, 10), (-10, 10), 0.1, render="contour")
fig1.show()

In [135]:
x = params[0]
y = params[1]

colorscale = ['#7A4579', '#D56073', 'rgb(236,158,105)', (1, 1, 0.2), (0.98,0.98,0.98)]

fig2 = ff.create_2d_density(
    x, y, colorscale=colorscale, point_size=3
)

fig2.show()

In [133]:
hf.figures_to_html([fig1, fig2], 'cool_levypow_alpha2_mom996_lr0-01--2.html')

In [None]:
# AGENDA
# Get the KDE Plots of position to demonstrate it spends longer in deeper wells
# Build adaptive alpha algorithm based on discrete box counts (as below)

In [None]:
from torch.optim.optimizer import Optimizer, required
import torch
from typing import List, Optional

class SGD_TC(Optimizer):

    """
    Divide region into discrete boxes (based on bounds).
    Count number of times in each region - PHASE 1.
    Change diffusion behaviour based on count of current box - PHASE 2.
        Sub diffusive if low count, super diffusive if high count.
    """

    def __init__(self,
                 params,
                 height: float = required,
                 width: float = required,
                 lr: float = required,
                 alpha:float = required,
                 angle_spread: float = required,
                 levy_spread: float = required,
                 bounds: float = required,
                 momentum: float = 0
    ):
        self.step_count = 0
        self.bounds = bounds

        self.height = height
        self.width_denom = -0.5*(1/width)**2
        defaults = dict(height=height,
                        width=width,
                        lr=lr,
                        alpha=alpha,
                        angle_spread=angle_spread,
                        levy_spread=levy_spread,
                        momentum=momentum)
        super().__init__(params, defaults)
    
    def _metric(self, pred):

        if not self.state:
            print('Initialised.')
            return pred
        
        history = self.state['history'][0:-1]
        last_ph = self.state['history'][-1]
        
        Vbias = 0

        for ph in history:
            v = last_ph - ph
            Vbias += torch.exp(self.width_denom * torch.dot(v, v.T))
        
        Vbias = self.height * Vbias

        # update detachment of tensors
        self.state['history'][-1] = self.state['history'][-1].detach().clone()
        
        return pred + Vbias


    @torch.no_grad()
    def step(self, closure=None):

        loss = None
        if closure is not None:
            with torch.enable_grad():
                loss = closure() # learn what a closure is 21/04/2022
        
        # Get the current parameter state

        for group in self.param_groups:
            for p in group['params']:
                grad = p.grad
                if 'history' not in self.state:
                    self.state['history'] = [p]
                else:
                    self.state['history'].append(p)

                if 'momentum_buffer' not in self.state:
                    self.state['momentum_buffer'] = grad.detach().clone()
                else:
                    self.state['momentum_buffer'].mul_(group['momentum']).add_(grad, alpha=1)
                grad = self.state['momentum_buffer']
                p.add_(grad, alpha=-group['lr'])

                # NALF
                alpha = group['alpha']
                levy_r = hf.levyrandom(alpha, beta=1, mu=0.0, sigma=group['levy_spread']) * torch.norm(grad)
                cur_dir = cart2polar(grad)
                theta = float(norm.rvs(loc=cur_dir, scale=group['angle_spread']))
                dir = np.array([np.cos(theta), np.sin(theta)])
                levy_noise = levy_r * torch.Tensor(dir)
                p.add_(levy_noise, alpha=-group['lr'])

        self.step_count += 1

        return loss