In [None]:
"""
Time complexity (abuse of terminology) implementation
"""

In [9]:
%load_ext autoreload
%autoreload 2
import ObjectiveFunction as of
import plotly.graph_objects as go
import plotly.figure_factory as ff
import plotly.express as px
from plotly.subplots import make_subplots
import numpy as np
import helper_funcs as hf
from math import sqrt
from scipy.stats import uniform, norm

In [3]:
def cart2polar(x):
    x = np.array(x)
    return np.arctan2(x[1], x[0])

In [10]:
def powlaw_samp(x_min, alpha, size=1):
    """
    Samples from powerlaw dist with min value x_min.
    """
    r = np.random.random(size=size)
    return  x_min * (1 - r) ** (1 / (1-alpha))

    # https://stats.stackexchange.com/questions/173242/random-sample-from-power-law-distribution
    # https://arxiv.org/pdf/0706.1062.pdf

In [5]:
"""
Doing SGD with Levy-Flight Noise properly
"""

from torch.optim.optimizer import Optimizer, required
import torch
from typing import List, Optional

class SGD_LevyPow(Optimizer):

    def __init__(self,
                 params,
                 func: float = required,
                 lr: float = required,
                 alpha:float = required,
                 momentum: float = 0
    ):
        self.func = func
        defaults = dict(lr=lr,
                        alpha=alpha,
                        momentum=momentum)
        super().__init__(params, defaults)


    @torch.no_grad()
    def step(self, closure=None):

        loss = None
        if closure is not None:
            with torch.enable_grad():
                loss = closure()

        for group in self.param_groups:
            for p in group['params']:
                grad = p.grad

                if 'momentum_buffer' not in self.state:
                    self.state['momentum_buffer'] = grad.detach().clone()
                else:
                    self.state['momentum_buffer'].mul_(group['momentum']).add_(grad, alpha=1)
                
                mom_grad = self.state['momentum_buffer']
                p.add_(mom_grad, alpha=-group['lr'])

                # Levy Flight noise
                levy_r = float(powlaw_samp(x_min=group['lr']*0.01, alpha=group['alpha'])) * torch.norm(grad)
                #cur_dir = cart2polar(grad)
                #theta = float(norm.rvs(loc=cur_dir, scale=group['angle_spread']))
                theta = float(uniform.rvs(loc=0, scale=2*np.pi))
                dir = np.array([np.cos(theta), np.sin(theta)])
                levy_noise = levy_r * torch.Tensor(dir)
                p.add_(levy_noise, alpha=-group['lr'])
                p = self.func.apply_period(p)

        return loss


In [6]:
m = of.AlpineN1(start=[2.5, 2], bounds=10)
alpline_LevyPow = of.OptimisationProblem(
    m,
    SGD_LevyPow(m.parameters(), func=m,
                lr=0.01, momentum=0.996,
                alpha=2.5),
    n_epochs = 2000
)

losses, params, preds = alpline_LevyPow.run()
fig1 = alpline_LevyPow.visualise((-10, 10), (-10, 10), 0.1, render="contour")
fig1.show()

0
200
400
600
800
1000
1200
1400
1600
1800


AttributeError: 'SGD_LevyPow' object has no attribute 'record'

In [187]:
bounds = 5
m = of.Ackley(start=[2.5, 2], bounds=bounds)
ackley_LevyPow = of.OptimisationProblem(
    m,
    SGD_LevyPow(m.parameters(), func=m,
                lr=0.02, momentum=0.996,
                alpha=2.5),
    n_epochs = 2000
)

losses, params, preds = ackley_LevyPow.run()
fig1 = ackley_LevyPow.visualise((-bounds, bounds), (-bounds, bounds), 0.1, render="contour")
fig1.show()

In [185]:
x = params[0]
y = params[1]

colorscale = ['#7A4579', '#D56073', 'rgb(236,158,105)', (1, 1, 0.2), (0.98,0.98,0.98)]

fig2 = ff.create_2d_density(
    x, y, colorscale=colorscale, point_size=3
)

fig2.show()

In [173]:
hf.figures_to_html([fig1, fig2], 'ackley_levypow_alpha2_mom996_lr0-01--2.html')

In [None]:
# AGENDA
# Get the KDE Plots of position to demonstrate it spends longer in deeper wells
# Build adaptive alpha algorithm based on discrete box counts (as below) OR continuous metadynamics bias potentials
"""
The motion of bubbles in our model is surprisingly complex—an ensemble averaged mean-squared displacement (Fig. 1b) 
shows that the motion is a super-diffusive power law at long lag times: 〈Δr2(τ)〉 ∼ τ^α with a non-trivial exponent 
α ≈ 1.37 ± 0.03. Super-diffusive random walks appear to violate the central limit theorem, and are a consequence of 
either random steps having a suitable non-Gaussian size distribution (called Lévy-type super-diffusion42), steps 
having long-time correlations in their direction (non-Lévy super-diffusion) or a mixture of the two. In Fig. 1c, we 
observe that the step size distribution (or van Hove function) of bubble displacements in one time step shows a 
power-law tail, P(Δx(τ = 1)) ∼ Δx^−μ. The exponent we observe μ ≈ 2.5, is indeed consistent with the Lévy walk model 
that requires [[[[μ = 4 - α]]]. Close examination of the van Hove function in Fig. 1c shows the power-law tail is cut
off at large values and becomes more so at larger lag times. Such behaviour resembles a truncated Lévy walk, which 
displays super-diffusive behaviour over a wide range of timescales  https://www.nature.com/articles/nmat4663

power law index = 4 - MSD time growth exponent (alpha - swapped below)
alpha > 1 --> superdiffusion --> power law index < 3 (heavy tailed)
alpha < 1 --> subdiffusion --> power law index > 3
""" 

In [7]:
from torch.optim.optimizer import Optimizer, required
import torch
from typing import List, Optional

class SGD_TC(Optimizer):

    def __init__(self,
                 params,
                 func: float = required,
                 lr: float = required,
                 height: float = required,
                 width: float = required,
                 phase_switch_count: int = required,
                 momentum: float = 0
    ):       
        self.record = [lr, height, width, phase_switch_count, momentum]
        self.height = height
        if height != 1:
            print("Warning: given height is not compatible with a counting scheme.")
        self.width_denom = -0.5*(1/width)**2    # how large our count regions are
        self.step_count = 0
        self.phase_switch_count = phase_switch_count
        self.alpha = 2.5

        self.func = func
        defaults = dict(lr=lr,
                        momentum=momentum,
                        phase_switch_count=phase_switch_count)
        super().__init__(params, defaults)
    
    def _metric(self, pred):

        if not self.state:
            return pred
        
        history = self.state['history'][0:-1]
        last_ph = self.state['history'][-1]
        
        Vbias = 0

        for ph in history:
            v = last_ph - ph
            Vbias += torch.exp(self.width_denom * torch.dot(v, v.T))
        
        Vbias = self.height * Vbias

        # update detachment of tensors
        self.state['history'][-1] = self.state['history'][-1].detach().clone()

        # Adapt alpha - after the explore phase (after self.phase_switch_count many steps)
        if self.step_count > self.phase_switch_count:
            self.adapt_alpha(Vbias, pred)
        
        return pred
    
    def adapt_alpha(self, Vbias, pred):
        """
        Adaptive alpha function - use Vbias to change self.alpha
        Current scheme - look at Vbias as a fraction of total explore steps --> linear map to decrease alpha.
                       - 2.5 (super) < power law index < 3.5 (sub)
                       - if p = Vbias/phase_switch_count > 0.5 --> sub < 0.5 --> super
        Future ideas - look at Vbias as fraction of pred (height becomes objective dependent, rather than counting units)
        """

        p = float(Vbias/self.phase_switch_count)
        
        if p > 1:
            raise ValueError("""Should not have this much bias. 
                            Probably irregular walker behaviour - check the trajectory plots.""")

        self.alpha = 2.5 + p

    @torch.no_grad()
    def step(self, closure=None):

        loss = None
        if closure is not None:
            with torch.enable_grad():
                loss = closure()

        for group in self.param_groups:
            for p in group['params']:
                
                grad = p.grad
                
                if 'history' not in self.state:
                    self.state['history'] = [p]
                elif self.step_count < self.phase_switch_count:
                    self.state['history'].append(p)

                if 'momentum_buffer' not in self.state:
                    self.state['momentum_buffer'] = grad.detach().clone()
                else:
                    self.state['momentum_buffer'].mul_(group['momentum']).add_(grad, alpha=1)
                
                mom_grad = self.state['momentum_buffer']
                p.add_(mom_grad, alpha=-group['lr'])

                # Levy Flight noise
                levy_r = float(powlaw_samp(x_min=group['lr']*0.01, alpha=self.alpha)) * torch.norm(grad)
                theta = float(uniform.rvs(loc=0, scale=2*np.pi))
                dir = np.array([np.cos(theta), np.sin(theta)])
                levy_noise = levy_r * torch.Tensor(dir)
                p.add_(levy_noise, alpha=-group['lr'])
                print(self.alpha)
                
                # Periodic Boundary Conditions (INBUILT TO OPTIMISER TO ENSURE COORDINATES ARE BOUNDED)
                p = self.func.apply_period(p)

        self.step_count += 1

        return loss

In [8]:
bounds = 10
phase_switch_count = 2000
m = of.AlpineN1(start=[2.5, 2], bounds=bounds)
alpline_TC = of.OptimisationProblem(
    m,
    SGD_TC(m.parameters(), func=m,
                lr=0.01, momentum=0.996,
                height=1, width=bounds/50, phase_switch_count=phase_switch_count),
    n_epochs = 3000
)

losses, params, preds = alpline_TC.run()
fig1 = alpline_TC.visualise((-bounds, bounds), (-bounds, bounds), 0.1, render="contour")
fig1.show()

2.5
0
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.

KeyboardInterrupt: 

In [16]:
x_explore = params[0][0:phase_switch_count]
y_explore = params[1][0:phase_switch_count]
x_diffuse = params[0][phase_switch_count:]
y_diffuse = params[1][phase_switch_count:]

colorscale = ['#7A4579', '#D56073', 'rgb(236,158,105)', (1, 1, 0.2), (0.98,0.98,0.98)]
fig2 = make_subplots(rows=1, cols=2)
fig2 = ff.create_2d_density(
    x_explore, y_explore, colorscale=colorscale, point_size=3
)
fig3 = ff.create_2d_density(
    x_diffuse, y_diffuse, colorscale=colorscale, point_size=3
)

fig2.show()
fig3.show()

In [17]:
bounds = 10
phase_switch_count = 2000
m = of.Ackley(start=[2.5, 2], bounds=bounds)
ackley_TC = of.OptimisationProblem(
    m,
    SGD_TC(m.parameters(), func=m,
                lr=0.01, momentum=0.996,
                height=1, width=bounds/50, phase_switch_count=phase_switch_count),
    n_epochs = 3000
)

losses, params, preds = ackley_TC.run()
fig1 = ackley_TC.visualise((-bounds, bounds), (-bounds, bounds), 0.1, render="contour")
fig1.show()

2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5
2.5


In [19]:
x_explore = params[0][0:phase_switch_count]
y_explore = params[1][0:phase_switch_count]
x_diffuse = params[0][phase_switch_count:]
y_diffuse = params[1][phase_switch_count:]

colorscale = ['#7A4579', '#D56073', 'rgb(236,158,105)', (1, 1, 0.2), (0.98,0.98,0.98)]
fig2 = make_subplots(rows=1, cols=2)
fig2 = ff.create_2d_density(
    x_explore, y_explore, colorscale=colorscale, point_size=3
)
fig3 = ff.create_2d_density(
    x_diffuse, y_diffuse, colorscale=colorscale, point_size=3
)

fig2.show()
fig3.show()

In [40]:
bounds = 2
phase_switch_count = 2000
m = of.Rosenbrock(start=[0.5, -1.5], bounds=bounds)
rosenbrock_TC = of.OptimisationProblem(
    m,
    SGD_TC(m.parameters(), func=m,
                lr=0.000001, momentum=0.996,
                height=1, width=bounds/50, phase_switch_count=phase_switch_count),
    n_epochs = 3000
)

losses, params, preds = rosenbrock_TC.run()
fig1 = rosenbrock_TC.visualise((-bounds, bounds), (-bounds, bounds), 0.1, render="contour")
fig1.show()

NameError: name 'SGD_TC' is not defined

In [24]:
x_explore = params[0][0:phase_switch_count]
y_explore = params[1][0:phase_switch_count]
x_diffuse = params[0][phase_switch_count:]
y_diffuse = params[1][phase_switch_count:]

colorscale = ['#7A4579', '#D56073', 'rgb(236,158,105)', (1, 1, 0.2), (0.98,0.98,0.98)]
fig2 = make_subplots(rows=1, cols=2)
fig2 = ff.create_2d_density(
    x_explore, y_explore, colorscale=colorscale, point_size=3
)
fig3 = ff.create_2d_density(
    x_diffuse, y_diffuse, colorscale=colorscale, point_size=3
)

fig2.show()
fig3.show()

In [25]:
hf.figures_to_html([fig1, fig2, fig3], 'levyTC_ackley_lr0-01_mom996_b50_psc2000.html')

In [34]:
from torch.optim.optimizer import Optimizer, required
import torch
from typing import List, Optional

class SGD_TC2(Optimizer):

    def __init__(self,
                 params,
                 func: float = required,
                 lr: float = required,
                 height: float = required,
                 width: float = required,
                 momentum: float = 0
    ):
        self.record = {'lr': lr, 'height': height, 'width': width, 'momentum': momentum}
        self.alpha_record = []
        self.height = height
        if height != 1:
            print("Warning: given height is not compatible with a counting scheme.")
        self.width_denom = -0.5*(1/width)**2    # how large our count regions are
        self.step_count = 0
        self.alpha = 2.5

        self.func = func
        defaults = dict(lr=lr,
                        momentum=momentum)
        super().__init__(params, defaults)
    
    def _metric(self, pred):

        if not self.state:
            return pred
        
        history = self.state['history'][0:-1]
        last_ph = self.state['history'][-1]
        
        Vbias = 0

        for ph in history:
            v = last_ph - ph
            Vbias += torch.exp(self.width_denom * torch.dot(v, v.T))
        
        Vbias = self.height * Vbias

        # update detachment of tensors
        self.state['history'][-1] = self.state['history'][-1].detach().clone()

        # Adapt alpha - no phase preference
        self.adapt_alpha(Vbias, pred)
        
        return pred
    
    def adapt_alpha(self, Vbias, pred):

        p = float(Vbias/self.step_count)
        
        if p > 1:
            raise ValueError("""Should not have this much bias. 
                            Probably irregular walker behaviour - check the trajectory plots.""")

        self.alpha = 2.5 + p

    @torch.no_grad()
    def step(self, closure=None):

        loss = None
        if closure is not None:
            with torch.enable_grad():
                loss = closure()

        for group in self.param_groups:
            for p in group['params']:
                
                grad = p.grad
                
                if 'history' not in self.state:
                    self.state['history'] = [p]
                else:
                    self.state['history'].append(p)

                if 'momentum_buffer' not in self.state:
                    self.state['momentum_buffer'] = grad.detach().clone()
                else:
                    self.state['momentum_buffer'].mul_(group['momentum']).add_(grad, alpha=1)
                
                mom_grad = self.state['momentum_buffer']
                p.add_(mom_grad, alpha=-group['lr'])

                # Levy Flight noise
                levy_r = float(powlaw_samp(x_min=0.01, alpha=self.alpha)) * torch.norm(grad)
                theta = float(uniform.rvs(loc=0, scale=2*np.pi))
                dir = np.array([np.cos(theta), np.sin(theta)])
                levy_noise = levy_r * torch.Tensor(dir)
                p.add_(levy_noise, alpha=-group['lr'])
                self.alpha_record.append(self.alpha)
                
                # Periodic Boundary Conditions (INBUILT TO OPTIMISER TO ENSURE COORDINATES ARE BOUNDED)
                p = self.func.apply_period(p)

        self.step_count += 1

        return loss

def powlaw_samp(x_min, alpha, size=1):
    """
    Samples from powerlaw dist with min value x_min.
    """
    r = np.random.random(size=size)
    return  x_min * (1 - r) ** (1 / (1-alpha))

    # https://stats.stackexchange.com/questions/173242/random-sample-from-power-law-distribution
    # https://arxiv.org/pdf/0706.1062.pdf

In [65]:
from torch.optim.optimizer import Optimizer, required
import torch
from typing import List, Optional

class SGD_TC2_5(Optimizer):

    def __init__(self,
                 params,
                 func: float = required,
                 lr: float = required,
                 height: float = required,
                 width: float = required,
                 momentum: float = 0
    ):
        self.record = {'lr': lr, 'height': height, 'width': width, 'momentum': momentum}
        self.alpha_record = []
        self.height = height
        if height != 1:
            print("Warning: given height is not compatible with a counting scheme.")
        self.width_denom = -0.5*(1/width)**2    # how large our count regions are
        self.step_count = 0
        self.alpha = 2.5

        self.func = func
        defaults = dict(lr=lr,
                        momentum=momentum)
        super().__init__(params, defaults)
    
    def _metric(self, pred):

        if not self.state:
            return pred
        
        history = self.state['history'][0:-1]
        last_ph = self.state['history'][-1]
        
        Vbias = 0

        for ph in history:
            v = last_ph - ph
            Vbias += torch.exp(self.width_denom * torch.dot(v, v.T))
        
        Vbias = self.height * Vbias

        # update detachment of tensors
        self.state['history'][-1] = self.state['history'][-1].detach().clone()

        # Adapt alpha - no phase preference
        self.adapt_alpha(Vbias, pred)
        
        return pred
    
    def adapt_alpha(self, Vbias, pred):

        p = float(Vbias/self.step_count)
        
        if p > 1:
            raise ValueError("""Should not have this much bias. 
                            Probably irregular walker behaviour - check the trajectory plots.""")

        self.alpha = 2.5 + p

    @torch.no_grad()
    def step(self, closure=None):

        loss = None
        if closure is not None:
            with torch.enable_grad():
                loss = closure()

        for group in self.param_groups:
            for p in group['params']:
                
                grad = p.grad
                
                if 'history' not in self.state:
                    self.state['history'] = [p]
                else:
                    self.state['history'].append(p)

                if 'momentum_buffer' not in self.state:
                    self.state['momentum_buffer'] = grad.detach().clone()
                else:
                    self.state['momentum_buffer'].mul_(group['momentum']).add_(grad, alpha=1)
                
                mom_grad = self.state['momentum_buffer']
                p.add_(mom_grad, alpha=-group['lr'])

                # Levy Flight noise
                levy_r = float(powlaw_samp(x_min=1, alpha=self.alpha)) * torch.norm(grad)
                theta = float(uniform.rvs(loc=0, scale=2*np.pi))
                dir = np.array([np.cos(theta), np.sin(theta)])
                levy_noise = levy_r * torch.Tensor(dir)
                p.add_(levy_noise, alpha=-group['lr'])
                self.alpha_record.append(self.alpha)
                
                # Periodic Boundary Conditions (INBUILT TO OPTIMISER TO ENSURE COORDINATES ARE BOUNDED)
                p = self.func.apply_period(p)

        self.step_count += 1

        return loss

def powlaw_samp(x_min, alpha, size=1):
    """
    Samples from powerlaw dist with min value x_min.
    """
    r = np.random.random(size=size)
    return  x_min * (1 - r) ** (1 / (1-alpha))

    # https://stats.stackexchange.com/questions/173242/random-sample-from-power-law-distribution
    # https://arxiv.org/pdf/0706.1062.pdf

In [1]:
def run_experiment(model, opt_alg, start, bounds, opt_params, n_epochs):
    m = model(start=start, bounds=bounds)
    opt_problem = of.OptimisationProblem(
        m,
        opt_alg(params=m.parameters(), func=m, **opt_params),
        n_epochs = n_epochs
    )

    losses, params, preds = opt_problem.run()
    fig1 = opt_problem.visualise((-bounds, bounds), (-bounds, bounds), 0.1, render="contour")
    fig2 = hf.create_density_plot(params)
    fig3 = px.scatter(opt_problem.opt.alpha_record)
    return [fig1, fig2, fig3]

In [73]:
bounds = 10
n_epochs = 3000
figs = run_experiment(of.AlpineN1, SGD_TC2_5, start=[2.5,2], bounds=bounds,
                      opt_params={'lr': 0.01, 'momentum': 0, 'height': 1, 'width': bounds/50},
                      n_epochs=n_epochs
                      )
figs.extend(run_experiment(of.Ackley, SGD_TC2_5, start=[2.5,2], bounds=bounds,
                      opt_params={'lr': 0.0135, 'momentum': 0, 'height': 1, 'width': bounds/50},
                      n_epochs=n_epochs
                      )
            )
figs.extend(run_experiment(of.Rosenbrock, SGD_TC2_5, start=[0.5,-1.5], bounds=3,
                      opt_params={'lr': 0.05, 'momentum': 0, 'height': 1, 'width': bounds/50},
                      n_epochs=n_epochs
                      )
            )
hf.figures_to_html(figs, 'TC2_5_results8.html')

0
300
600
900
1200
1500
1800
2100
2400
2700
0
300
600
900
1200
1500
1800
2100
2400
2700
0
300
600
900
1200
1500
1800
2100
2400
2700


In [None]:
"""
TC2_results3 revealed that the levy noise has been doing almost nothing - all explorary behaviour came from the momentum term

Createad TC2_5 with bigger x_min (up from 0.01) to combat this - can try different x_min values
    TC_2_5_results1 : x_min = 0.1 --> not much difference from TC2_results3
    TC_2_5_results2 : x_min = 1 --> much better (but very lr dependent?)
    TC_2_5_results3 : x_min = 1, customise lr --> much better convergence to global minimum for Rosenbrock, Ackley stays superdiffusive regime throughout bc lr = 0.03 too high
    TC_2_5_results4 : x_min = 1, lr = 0.02 Ackley and lr = 0.01 Rosenbrock  --> Ackley too high for convergence, but got good kde statistics
    TC_2_5_results5 : same as 4, but added 0.996 momentum --> really bad, kde plot nearly uncorrelated to global minimum
    TC_2_5_results6 : x_min = 1, lr = 0.015 Ackley and lr = 0.1 Rosenbrock --> both lr too high for convergence, kde statistics worse for Ackley, Rosenbrock statistics ok but bad
    TC_2_5_results7 : x_min = 1, lr = 0.012 Ackley and lr = 0.05 Rosenbrock --> 

TC3 attempts to resolve the forgetful p problem
    TC3_results1 + 2 :  x _min = 0.01 
    TC_results3 : x_min = 1, lr from TC_2_5results7 --> 

TC_Control checks the effect of SGD on the dynamics versus pure Radial Levy Motion
    TC_Control_results3 : x_min = 1 --> need SGD to get good statistics from kde plot + convergence.


remove scaling of Levy noise
"""

In [15]:
history = [torch.Tensor([1,1]), torch.Tensor([2,2])]
history

[tensor([1., 1.]), tensor([2., 2.])]

In [22]:
v = torch.Tensor([1,2])
torch.dot(v,v)

tensor(5.)

In [22]:
px.histogram(powlaw_samp(x_min=1, alpha=2.5, size=3000))

In [23]:
px.histogram(powlaw_samp(x_min=1, alpha=3.5, size=3000))

In [30]:
1500 + 750 + 375 + 187 + 93 + 46 + 23 + 11 + 5 + 2 + 1

2993

In [29]:
93//2

46