In [1]:
%load_ext autoreload
%autoreload 2
import ObjectiveFunction as of
import plotly.graph_objects as go
import plotly.figure_factory as ff
import plotly.express as px
from plotly.subplots import make_subplots
import numpy as np
import helper_funcs as hf
from math import sqrt
from scipy.stats import uniform, norm

In [3]:
def run_experiment(model, opt_alg, start, bounds, opt_params, n_epochs):
    m = model(start=start, bounds=bounds)
    opt_problem = of.OptimisationProblem(
        m,
        opt_alg(params=m.parameters(), func=m, **opt_params),
        n_epochs = n_epochs
    )

    losses, params, preds = opt_problem.run()
    fig1 = opt_problem.visualise((-bounds, bounds), (-bounds, bounds), 0.1, render="contour")
    fig2 = hf.create_density_plot(params)
    fig3 = px.scatter(opt_problem.opt.alpha_record)
    return [fig1, fig2, fig3]

def powlaw_samp(x_min, alpha, size=1):
    """
    Samples from powerlaw dist with min value x_min.
    """
    r = np.random.random(size=size)
    return  x_min * (1 - r) ** (1 / (1-alpha))

    # https://stats.stackexchange.com/questions/173242/random-sample-from-power-law-distribution
    # https://arxiv.org/pdf/0706.1062.pdf

In [83]:
from torch.optim.optimizer import Optimizer, required
import torch
from typing import List, Optional

class SGD_TC_Control(Optimizer):

    def __init__(self,
                 params,
                 func: float = required,
                 lr: float = required,
                 height: float = required,
                 width: float = required,
    ):
        self.record = {'lr': lr, 'height': height, 'width': width}
        self.alpha_record = []
        self.height = height
        if height != 1:
            print("Warning: given height is not compatible with a counting scheme.")
        self.width_denom = -0.5*(1/width)**2    # how large our count regions are
        self.step_count = 0
        self.alpha = 2.5

        self.func = func
        defaults = dict(lr=lr)
        super().__init__(params, defaults)
    
    def _metric(self, pred):

        if not self.state:
            return pred
        
        history = self.state['history'][0:-1]
        last_ph = self.state['history'][-1]
        
        Vbias = 0

        for ph in history:
            v = last_ph - ph
            Vbias += torch.exp(self.width_denom * torch.dot(v, v.T))
        
        Vbias = self.height * Vbias

        # update detachment of tensors
        self.state['history'][-1] = self.state['history'][-1].detach().clone()

        # Adapt alpha - no phase preference
        self.adapt_alpha(Vbias, pred)
        
        return pred
    
    def adapt_alpha(self, Vbias, pred):

        p = float(Vbias/self.step_count)
        
        if p > 1:
            raise ValueError("""Should not have this much bias. 
                            Probably irregular walker behaviour - check the trajectory plots.""")

        self.alpha = 2.5 + p

    @torch.no_grad()
    def step(self, closure=None):

        loss = None
        if closure is not None:
            with torch.enable_grad():
                loss = closure()

        for group in self.param_groups:
            for p in group['params']:
                
                grad = p.grad
                
                if 'history' not in self.state:
                    self.state['history'] = [p]
                else:
                    self.state['history'].append(p)

                # Levy Flight noise
                levy_r = float(powlaw_samp(x_min=1, alpha=self.alpha)) * torch.norm(grad)
                theta = float(uniform.rvs(loc=0, scale=2*np.pi))
                dir = np.array([np.cos(theta), np.sin(theta)])
                levy_noise = levy_r * torch.Tensor(dir)
                p.add_(levy_noise, alpha=-group['lr'])
                self.alpha_record.append(self.alpha)
                
                # Periodic Boundary Conditions (INBUILT TO OPTIMISER TO ENSURE COORDINATES ARE BOUNDED)
                p = self.func.apply_period(p)

        self.step_count += 1

        return loss

In [86]:
# Control Experiments
bounds = 10
n_epochs = 3000
figs = run_experiment(of.AlpineN1, SGD_TC_Control, start=[2.5,2], bounds=bounds,
                      opt_params={'lr': 0.01,'height': 1, 'width': bounds/50},
                      n_epochs=n_epochs
                      )
figs.extend(run_experiment(of.Ackley, SGD_TC_Control, start=[2.5,2], bounds=bounds,
                      opt_params={'lr': 0.03, 'height': 1, 'width': bounds/50},
                      n_epochs=n_epochs
                      )
            )
figs.extend(run_experiment(of.Rosenbrock, SGD_TC_Control, start=[0.5,-1.5], bounds=3,
                      opt_params={'lr': 0.001, 'height': 1, 'width': bounds/50},
                      n_epochs=n_epochs
                      )
            ) # 0.996 old momentum setting
hf.figures_to_html(figs, 'TC_Control_results3.html')

0
300
600
900
1200
1500
1800
2100
2400
2700
0
300
600
900
1200
1500
1800
2100
2400
2700
0
300
600
900
1200
1500
1800
2100
2400
2700


In [78]:
from torch.optim.optimizer import Optimizer, required
import torch
from typing import List, Optional

class SGD_TC2_5(Optimizer):

    def __init__(self,
                 params,
                 func: float = required,
                 lr: float = required,
                 height: float = required,
                 width: float = required,
                 momentum: float = 0
    ):
        self.record = {'lr': lr, 'height': height, 'width': width, 'momentum': momentum}
        self.alpha_record = []
        self.height = height
        if height != 1:
            print("Warning: given height is not compatible with a counting scheme.")
        self.width_denom = -0.5*(1/width)**2    # how large our count regions are
        self.step_count = 0
        self.alpha = 2.5

        self.func = func
        defaults = dict(lr=lr,
                        momentum=momentum)
        super().__init__(params, defaults)
    
    def _metric(self, pred):

        if not self.state:
            return pred
        
        history = self.state['history'][0:-1]
        last_ph = self.state['history'][-1]
        
        Vbias = 0

        for ph in history:
            v = last_ph - ph
            Vbias += torch.exp(self.width_denom * torch.dot(v, v.T))
        
        Vbias = self.height * Vbias

        # update detachment of tensors
        self.state['history'][-1] = self.state['history'][-1].detach().clone()

        # Adapt alpha - no phase preference
        self.adapt_alpha(Vbias, pred)
        
        return pred
    
    def adapt_alpha(self, Vbias, pred):

        p = float(Vbias/self.step_count)
        
        if p > 1:
            raise ValueError("""Should not have this much bias. 
                            Probably irregular walker behaviour - check the trajectory plots.""")

        self.alpha = 2.5 + p

    @torch.no_grad()
    def step(self, closure=None):

        loss = None
        if closure is not None:
            with torch.enable_grad():
                loss = closure()

        for group in self.param_groups:
            for p in group['params']:
                
                grad = p.grad
                
                if 'history' not in self.state:
                    self.state['history'] = [p]
                else:
                    self.state['history'].append(p)

                if 'momentum_buffer' not in self.state:
                    self.state['momentum_buffer'] = grad.detach().clone()
                else:
                    self.state['momentum_buffer'].mul_(group['momentum']).add_(grad, alpha=1)
                
                mom_grad = self.state['momentum_buffer']
                p.add_(mom_grad, alpha=-group['lr'])

                # Levy Flight noise
                levy_r = float(powlaw_samp(x_min=1, alpha=self.alpha)) * torch.norm(grad)
                theta = float(uniform.rvs(loc=0, scale=2*np.pi))
                dir = np.array([np.cos(theta), np.sin(theta)])
                levy_noise = levy_r * torch.Tensor(dir)
                p.add_(levy_noise, alpha=-group['lr'])
                self.alpha_record.append(self.alpha)
                
                # Periodic Boundary Conditions (INBUILT TO OPTIMISER TO ENSURE COORDINATES ARE BOUNDED)
                p = self.func.apply_period(p)

        self.step_count += 1

        return loss

In [88]:
from torch.optim.optimizer import Optimizer, required
import torch
from typing import List, Optional

class SGD_TC3(Optimizer):

    # Discard DIRECT probability interpretation of p
    def __init__(self,
                 params,
                 func: float = required,
                 lr: float = required,
                 height: float = required,
                 width: float = required,
                 half_life: float = required,
                 momentum: float = 0
    ):
        self.record = {'lr': lr, 'height': height, 'width': width, 'momentum': momentum}
        self.alpha_record = []
        self.half_life = 1/half_life
        self.height = height
        if height != 1:
            print("Warning: given height is not compatible with a counting scheme.")
        self.width_denom = -0.5*(1/width)**2    # how large our count regions are
        self.step_count = 0
        self.alpha = 2.5

        self.func = func
        defaults = dict(lr=lr,
                        momentum=momentum)
        super().__init__(params, defaults)
    
    def _metric(self, pred):

        if not self.state:
            return pred
        
        history = self.state['history'][0:-1]
        last_ph = self.state['history'][-1]
        
        Vbias = 0

        for ph in history:
            v = last_ph - ph
            Vbias += torch.exp(self.width_denom * torch.dot(v, v.T))
        
        Vbias = self.height * Vbias

        # update detachment of tensors
        self.state['history'][-1] = self.state['history'][-1].detach().clone()

        # Adapt alpha - no phase preference
        self.adapt_alpha(Vbias, pred)
        
        return pred
    
    def adapt_alpha(self, Vbias, pred):

        p = 1 - np.exp(-np.log(2) * self.half_life * float(Vbias)) # 0 < f(x) = 1 - e^(-mx) < 1
        
        # if p > 1:
        #     raise ValueError("""Should not have this much bias. 
        #                     Probably irregular walker behaviour - check the trajectory plots.""")

        self.alpha = 2.5 + p

    @torch.no_grad()
    def step(self, closure=None):

        loss = None
        if closure is not None:
            with torch.enable_grad():
                loss = closure()

        for group in self.param_groups:
            for p in group['params']:
                
                grad = p.grad
                
                if 'history' not in self.state:
                    self.state['history'] = [p]
                else:
                    self.state['history'].append(p)

                if 'momentum_buffer' not in self.state:
                    self.state['momentum_buffer'] = grad.detach().clone()
                else:
                    self.state['momentum_buffer'].mul_(group['momentum']).add_(grad, alpha=1)
                
                mom_grad = self.state['momentum_buffer']
                p.add_(mom_grad, alpha=-group['lr'])

                # Levy Flight noise
                levy_r = float(powlaw_samp(x_min=1, alpha=self.alpha)) * torch.norm(grad)
                theta = float(uniform.rvs(loc=0, scale=2*np.pi))
                dir = np.array([np.cos(theta), np.sin(theta)])
                levy_noise = levy_r * torch.Tensor(dir)
                p.add_(levy_noise, alpha=-group['lr'])
                self.alpha_record.append(self.alpha)
                
                # Periodic Boundary Conditions (INBUILT TO OPTIMISER TO ENSURE COORDINATES ARE BOUNDED)
                p = self.func.apply_period(p)

        self.step_count += 1

        return loss

In [4]:
bounds = 10
n_epochs = 3000
figs = run_experiment(of.AlpineN1, SGD_TC2_5, start=[2.5,2], bounds=bounds,
                      opt_params={'lr': 0.01, 'momentum': 0.996, 'height': 1, 'width': bounds/50},
                      n_epochs=n_epochs
                      )
figs.extend(run_experiment(of.Ackley, SGD_TC2_5, start=[2.5,2], bounds=bounds,
                      opt_params={'lr': 0.02, 'momentum': 0.996, 'height': 1, 'width': bounds/50},
                      n_epochs=n_epochs
                      )
            )
figs.extend(run_experiment(of.Rosenbrock, SGD_TC2_5, start=[0.5,-1.5], bounds=3,
                      opt_params={'lr': 0.01, 'momentum': 0.996, 'height': 1, 'width': bounds/50},
                      n_epochs=n_epochs
                      )
            ) # 0.996 old momentum setting
hf.figures_to_html(figs, 'TC2_5_results5.html')

NameError: name 'SGD_TC2_5' is not defined

In [89]:
bounds = 10
n_epochs = 3000
figs = run_experiment(of.AlpineN1, SGD_TC3, start=[2.5,2], bounds=bounds,
                      opt_params={'lr': 0.01, 'momentum': 0, 'height': 1, 'width': bounds/50, 'half_life': 300},
                      n_epochs=n_epochs
                      )
figs.extend(run_experiment(of.Ackley, SGD_TC3, start=[2.5,2], bounds=bounds,
                      opt_params={'lr': 0.0135, 'momentum': 0, 'height': 1, 'width': bounds/50, 'half_life': 300},
                      n_epochs=n_epochs
                      )
            )
figs.extend(run_experiment(of.Rosenbrock, SGD_TC3, start=[0.5,-1.5], bounds=3,
                      opt_params={'lr': 0.05, 'momentum': 0, 'height': 1, 'width': bounds/50, 'half_life': 300},
                      n_epochs=n_epochs
                      )
            ) # 0.996 old momentum setting
hf.figures_to_html(figs, 'TC3_results3.html')

0
300
600
900
1200
1500
1800
2100
2400
2700
0
300
600
900
1200
1500
1800
2100
2400
2700
0
300
600
900
1200
1500
1800
2100
2400
2700


In [6]:
def levy_flight(x0, x_min, alpha, size):
    rs = powlaw_samp(x_min, alpha, size=size)
    thetas = uniform.rvs(loc=0, scale=2*np.pi, size=size)
    xs = list(np.cumsum(rs*np.cos(thetas)))
    ys = list(np.cumsum(rs*np.sin(thetas)))
    xs.insert(0, x0[0])
    ys.insert(0, x0[1])

    return [xs, ys], rs

In [11]:
path, rs = levy_flight(np.array([0, 0]), 0.01, alpha=2.5, size=10000)
fig, taus, tmsds = hf.tmsd_analysis(path, max_tau_ratio=0.5)
fig.show()

In [12]:
path, rs = levy_flight(np.array([0, 0]), 0.01, alpha=3.0, size=10000)
fig, taus, tmsds = hf.tmsd_analysis(path, max_tau_ratio=0.5)
fig.show()

In [13]:
path, rs = levy_flight(np.array([0, 0]), 0.01, alpha=3.5, size=10000)
fig, taus, tmsds = hf.tmsd_analysis(path, max_tau_ratio=0.5)
fig.show()

In [14]:
path, rs = levy_flight(np.array([0, 0]), 0.01, alpha=3.1, size=10000)
fig, taus, tmsds = hf.tmsd_analysis(path, max_tau_ratio=0.5)
fig.show()

In [54]:
from scipy.signal import savgol_filter
x = np.arange(0, 100) + 10*np.random.rand(100)
xf = savgol_filter(x, window_length=11, polyorder=1)
fig = go.Figure()
fig.add_trace(go.Scatter(x=np.arange(0, len(x)), y=x))
fig.add_trace(go.Scatter(x=np.arange(0, len(xf)), y=xf))

In [12]:
# emsds
paths = []
rss = []
for _ in range(0, 1000):
    path, rs = levy_flight(np.array([0, 0]), 0.01, alpha=2.5, size=1000)
    paths.append(path)
    rss.append(rs)
    if _ % 100 == 0: print(_)

0
100
200
300
400
500
600
700
800
900


In [13]:
fig, ts, emsds = hf.emsd_analysis(paths, tau=20)
fig.show()

In [None]:
# look at same problems as ADAM then other problems

# literature review - identify literature knowledge gap - how you will fill the gap

# get convergence condition theory bounds for algorithm - do empirically first + then theory

# 
