In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset
import numpy as np

import matplotlib.pyplot as plt
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
from tqdm.notebook import tqdm
import itertools
import importlib

from levy.__init__ import fit_levy
import adaptive_algos as aa
import helper_funcs as hf
import ObjectiveFunction as of

In [2]:
class PointDataset(Dataset):
    def __init__(self, seed, dim, size, bounds):
        np.random.seed(seed)
        self.data = [torch.tensor(np.random.rand(dim)*bounds - 0.5*bounds) for c in range(size)]
        self.data = torch.vstack(self.data)
    
    def __len__(self):
        return len(self.data)
    
    def __getitem__(self, idx):
        return self.data[idx]

def cyclic_loader(dataloader):
    while True:
        for data in dataloader:
            yield data

In [3]:
start = [1.0]
bounds = 10
pointdata = PointDataset(seed=1, dim=1, size=56, bounds=bounds)
batchsize = 4
dataloader = torch.utils.data.DataLoader(dataset=pointdata, batch_size=batchsize, shuffle=False)
train_cdl = cyclic_loader(dataloader)

opt_params={'lr': 0.05}
model_params={'start':start, 'bounds':bounds}
n_iterations = 100

model = of.Hodgkinson(**model_params)
opt = torch.optim.SGD(params=model.parameters(), **opt_params)
losses = [model(data=pointdata.data).item()]
params = [list(model.parameters())[0].detach().clone().numpy()]

for i in range(n_iterations):
    model.train()
    batch = next(train_cdl)
    opt.zero_grad()
    loss = model(data=batch)
    loss.backward()
    opt.step()

    full_loss = model(data=pointdata.data)
    losses.append(full_loss.item())
    param = list(model.parameters())[0].detach().clone()
    param = model.apply_period(param)
    params.append(param.numpy())

In [4]:
# Plotting code
def plot_prelim_results(bounds, res, model, pointdata, final_params):

    xlim, res = (-bounds/2 - 1, bounds/2 + 1), 0.01
    X = np.arange(xlim[0], xlim[1], res)
    Z = [float(model.forward(data=pointdata.data, X=torch.Tensor([_]))) for _ in X]

    fig = make_subplots(specs=[[{"secondary_y": True}]])
    fig.add_trace(
        go.Scatter(x=X, y=Z, line=dict(color='orange')),
        secondary_y=True
    )
    hist_fig = px.histogram(final_params, opacity=0.4, 
                            nbins=120, histnorm='probability', cumulative=False)
    fig.add_trace(
        hist_fig.data[0],
        secondary_y=False
    )
    fig.update_layout(
        width=800,
        height=800,
        showlegend=False,
        xaxis=dict(showline=True, mirror=True, linewidth=2, linecolor='black',
                   tickfont=dict(size=20), titlefont=dict(size=25)),
        yaxis=dict(showline=True, mirror=True, linewidth=2, linecolor='black',
                   tickfont=dict(size=20), titlefont=dict(size=25)),
        yaxis2=dict(showtickprefix='all', tickprefix=" ",
                    tickfont=dict(size=20), titlefont=dict(size=25)),
        plot_bgcolor='rgba(0,0,0,0)',
        yaxis_title='Probability',
        yaxis2_title='Function Value',
        xaxis_title='Parameter',
        font=dict(size=25)
    )

    return fig

def plot_landscape(bounds, res, model, pointdata):

    xlim, res = (-bounds/2 - 1, bounds/2 + 1), 0.01
    X = np.arange(xlim[0], xlim[1], res)
    Z = [float(model.forward(data=pointdata.data, X=torch.Tensor([_]))) for _ in X]

    fig = go.Figure()
    fig.add_trace(
        go.Scatter(x=X, y=Z, line=dict(color='orange'))
    )
    fig.update_layout(
        width=800,
        height=800,
        showlegend=False,
        xaxis=dict(showline=True, mirror=True, linewidth=2, linecolor='black',
                   tickfont=dict(size=15), titlefont=dict(size=20)),
        yaxis=dict(showline=True, mirror=True, linewidth=2, linecolor='black',
                   tickfont=dict(size=15), titlefont=dict(size=20)),
        plot_bgcolor='rgba(0,0,0,0)',
        yaxis_title='Function Value',
        xaxis_title='Parameter',
        font=dict(size=20)
    )

    return fig

def plot_avg_loss(total_losses):
    avg_loss = sum(total_losses)/len(total_losses)

    fig = px.line(x=np.arange(0, len(avg_loss)), y=avg_loss)
    return fig

In [14]:
# Shared parameters
seed = 22 # determines function
n_runs = 5000
max_iterations = 200
dim = 1
size = 1000

batchsize = 32
start = [-2.84]
lr = 1.0

bounds = 20
res = 0.01
dataset_params= {'seed': seed, 'dim':dim, 'size':size, 'bounds':bounds}
pointdata = PointDataset(**dataset_params)
model_params = {'start':start, 'bounds':bounds}
model = of.Hodgkinson(**model_params)
    
basin_bounds = (4.09, 7.1)

plot_landscape(bounds, res, model, pointdata)

In [15]:
######################################## SGD ########################################
opt_params = {'lr': lr}
model_params = {'start':start, 'bounds':bounds}
final_params_sgd = []
total_loss_sgd = []
cutoffs_sgd = []

def lr_annealer(opt, progress, lr):
    opt.param_groups[0]['lr'] = lr * (1-progress)

for i in tqdm(range(n_runs)):

    # Shuffle false gives all the same results as expected
    torch.manual_seed(seed+i)
    dataloader = torch.utils.data.DataLoader(dataset=pointdata, batch_size=batchsize, shuffle=True)
    train_cdl = cyclic_loader(dataloader)
    model = of.Hodgkinson(**model_params)
    opt = torch.optim.SGD(params=model.parameters(), **opt_params)
    losses = []
    found_global_min = False

    for j in range(max_iterations):
        batch = next(train_cdl)
        opt.zero_grad()
        loss = model(data=batch)
        loss.backward()
        opt.step()
        losses.append(float(loss.detach().clone()))
        lr_annealer(opt, j/max_iterations, lr)
        
        if basin_bounds[0] < list(model.parameters())[0] < basin_bounds[1] and not found_global_min:
            cutoffs_sgd.append(j)
            found_global_min = True
    
    if not found_global_min:
        cutoffs_sgd.append(max_iterations)
    
    total_loss_sgd.append(np.array(losses))
    final_param = list(model.parameters())[0].detach().clone()
    final_param = model.apply_period(final_param).numpy()  
    final_params_sgd.append(final_param)

cutoffs_sgd = np.array(cutoffs_sgd)

  0%|          | 0/5000 [00:00<?, ?it/s]

In [16]:
fig1 = plot_prelim_results(bounds, res, model, pointdata, final_params_sgd)
fig1.show()

In [38]:
fig2 = plot_avg_loss(total_loss_sgd)
fig2.show()

In [9]:
######################################## ADAM ########################################

opt_params = {'lr': lr}
model_params = {'start':start, 'bounds':bounds}
final_params_adam = []
total_loss_adam = []
cutoffs_adam = []

for i in tqdm(range(n_runs)):

    # Shuffle false gives all the same results as expected
    torch.manual_seed(seed+i)
    dataloader = torch.utils.data.DataLoader(dataset=pointdata, batch_size=batchsize, shuffle=True)
    train_cdl = cyclic_loader(dataloader)
    model = of.Hodgkinson(**model_params)
    opt = torch.optim.Adam(params=model.parameters(), **opt_params)
    losses = []
    found_global_min = False

    for j in range(max_iterations):
        batch = next(train_cdl)
        opt.zero_grad()
        loss = model(data=batch)
        loss.backward()
        opt.step()
        losses.append(float(loss.detach().clone()))

        if basin_bounds[0] < list(model.parameters())[0] < basin_bounds[1]:
            cutoffs_adam.append(j)
            found_global_min = True

    if not found_global_min:
        cutoffs_adam.append(max_iterations)
    
    total_loss_adam.append(np.array(losses))
    final_param = list(model.parameters())[0].detach().clone()
    final_param = model.apply_period(final_param).numpy()  
    final_params_adam.append(final_param)

  0%|          | 0/1000 [00:00<?, ?it/s]

In [10]:
fig3 = plot_prelim_results(bounds, res, model, pointdata, final_params_adam)
fig3.show()

In [11]:
fig4 = plot_avg_loss(total_loss_adam)
fig4.show()

## SGD TC TESTS

In [12]:
########################################## SGD_TC #######################################
importlib.reload(aa)

scale_annealer = lambda progress: 1
opt_params={'lr': lr, 'height': 1.0, 'width': bounds/8,
            'scale_annealer': scale_annealer, 'n_epochs': max_iterations}
model_params = {'start':start, 'bounds':bounds}
final_params_levy = []

for i in tqdm(range(n_runs)):

    # Shuffle false gives all the same results as expected
    torch.manual_seed(seed+i)
    dataloader = torch.utils.data.DataLoader(dataset=pointdata, batch_size=batchsize, shuffle=True)
    train_cdl = cyclic_loader(dataloader)
    model = of.Hodgkinson(**model_params)
    opt = aa.SGD_TC(params=model.parameters(), **opt_params)

    for j in range(max_iterations):
        batch = next(train_cdl)
        opt.zero_grad()
        loss = model(data=batch)
        loss.backward()
        opt.step()

    final_param = list(model.parameters())[0].detach().clone()
    final_param = model.apply_period(final_param).numpy()  
    final_params_levy.append(final_param)

fig5 = plot_prelim_results(bounds, res, model, pointdata, final_params_levy)
fig5.show()

  0%|          | 0/1000 [00:00<?, ?it/s]

KeyboardInterrupt: 

In [None]:
px.scatter(opt.alpha_record) # SGD_TC ALPHA RECORD LAST EXAMPLE

In [17]:
##################################### SGD_TC with annealing ###############################################
importlib.reload(aa)
def lr_annealer(opt, progress, lr):
    opt.param_groups[0]['lr'] = lr * (1-progress)

scale_annealer = lambda progress: (1 - progress)

# opt_params={'lr': lr, 'height': 1.0, 'width': bounds/10,
#             'scale_annealer': scale_annealer, 'n_epochs': max_iterations}
opt_params={'lr': lr, 'height': 1.0, 'width': bounds/20,
            'scale_annealer': scale_annealer, 'n_epochs': max_iterations, 'adjust_dir':True}
model_params = {'start':start, 'bounds':bounds}
final_params_levy_ann = []
total_loss_levy_ann = []
cutoffs_levy_ann = []

for i in tqdm(range(n_runs)):

    # Shuffle false gives all the same results as expected
    torch.manual_seed(seed+i)
    dataloader = torch.utils.data.DataLoader(dataset=pointdata, batch_size=batchsize, shuffle=True)
    train_cdl = cyclic_loader(dataloader)
    model = of.Hodgkinson(**model_params)
    opt = aa.SGD_TC(params=model.parameters(), **opt_params)
    losses = []
    found_global_min = False
    old_batch = None

    for j in range(max_iterations):
        batch = next(train_cdl)
        if j == 0:
            old_batch = batch
        opt.zero_grad()
        loss = model(data=batch)
        loss.backward()
        opt.step()
        #opt.step(batch=old_batch, model=model)
        losses.append(float(loss.detach().clone()))
        lr_annealer(opt, opt.step_count/opt.n_epochs, lr)
        old_batch = batch
        
        if basin_bounds[0] < list(model.parameters())[0] < basin_bounds[1] and not found_global_min:
            cutoffs_levy_ann.append(j)
            found_global_min = True
    
    if not found_global_min:
        cutoffs_levy_ann.append(max_iterations)
    
    total_loss_levy_ann.append(np.array(losses))
    final_param = list(model.parameters())[0].detach().clone()
    final_param = model.apply_period(final_param).numpy()  
    final_params_levy_ann.append(final_param)

cutoffs_levy_ann = np.array(cutoffs_levy_ann)

  0%|          | 0/5000 [00:00<?, ?it/s]

In [18]:
fig6 = plot_prelim_results(bounds, res, model, pointdata, final_params_levy_ann)
fig6.show()

In [41]:
fig7 = plot_avg_loss(total_loss_levy_ann)
fig7.show()

In [13]:
px.scatter(opt.alpha_record) # SGD_TC_ANNEALED ALPHA RECORD LAST EXAMPLE

In [43]:
hf.figures_to_html([fig1, fig2, fig3], 
                    'proof_of_concepts_batch12.html')


NameError: name 'fig3' is not defined

In [None]:
# Fixed alpha - demonstrate exploration
# Bigger alpha - converge

"""
- proof of concept 1d and 2d landscapes, statistics of convergence of minima
        - different batch size?
        - different lr?
- linear regression
- different alphas

- comparison with metadynamics/tempering, real problems

"""


In [None]:
# escape time analysis

# create artificial dataset of points concentrated somewhere (well - depth scales with points)
# gaussian distributed points, so we can also vary width
# first escape time measurements?

"""
Using the cutoffs collected above for this now --> convergence time analysis
"""

In [11]:
import pandas as pd
cutoffs = pd.DataFrame({'SGD': cutoffs_sgd, 'Levy SGD': cutoffs_levy_ann})
cutoffs = cutoffs[cutoffs < max_iterations]
print(cutoffs.mean(), cutoffs.median())
fig = px.histogram(cutoffs, barmode="overlay", cumulative=True)
fig.update_layout(
    height=800,
    width=1600,
    legend=dict(x=0,y=1, font=dict(size=20), title_text=''),
    xaxis_title='Iteration Number to Reach Widest Basin',
    yaxis_title='Count',
    yaxis=dict(tickfont=dict(size=25), titlefont=dict(size=25)),
    xaxis=dict(tickfont=dict(size=25), titlefont=dict(size=25))
)
fig.show()


SGD         88.929412
Levy SGD    75.000000
dtype: float64 SGD         82.0
Levy SGD    66.0
dtype: float64


In [12]:
5000 - cutoffs.isna().sum()

SGD         425
Levy SGD    793
dtype: int64