In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset
import numpy as np

import matplotlib.pyplot as plt
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
from tqdm.notebook import tqdm
import itertools
import importlib

from levy.__init__ import fit_levy
import adaptive_algos as aa
import helper_funcs as hf
import ObjectiveFunction as of

In [2]:
class PointDataset(Dataset):
    def __init__(self, seed, dim, size, bounds):
        np.random.seed(seed)
        self.data = [torch.tensor(np.random.rand(dim)*bounds - 0.5*bounds) for c in range(size)]
        self.data = torch.vstack(self.data)
    
    def __len__(self):
        return len(self.data)
    
    def __getitem__(self, idx):
        return self.data[idx]

def cyclic_loader(dataloader):
    while True:
        for data in dataloader:
            yield data

In [3]:
start = [1.0]
bounds = 10
pointdata = PointDataset(seed=1, dim=1, size=56, bounds=bounds)
batchsize = 4
dataloader = torch.utils.data.DataLoader(dataset=pointdata, batch_size=batchsize, shuffle=False)
train_cdl = cyclic_loader(dataloader)

opt_params={'lr': 0.05}
model_params={'start':start, 'bounds':bounds}
n_iterations = 100

model = of.Hodgkinson(**model_params)
opt = torch.optim.SGD(params=model.parameters(), **opt_params)
losses = [model(data=pointdata.data).item()]
params = [list(model.parameters())[0].detach().clone().numpy()]

for i in range(n_iterations):
    model.train()
    batch = next(train_cdl)
    opt.zero_grad()
    loss = model(data=batch)
    loss.backward()
    opt.step()

    full_loss = model(data=pointdata.data)
    losses.append(full_loss.item())
    param = list(model.parameters())[0].detach().clone()
    param = model.apply_period(param)
    params.append(param.numpy())

In [3]:
# Plotting code
def plot_prelim_results(bounds, res, model, pointdata, final_params):

    xlim, res = (-bounds/2 - 1, bounds/2 + 1), 0.01
    X = np.arange(xlim[0], xlim[1], res)
    Z = [float(model.forward(data=pointdata.data, X=torch.Tensor([_]))) for _ in X]

    fig = make_subplots(specs=[[{"secondary_y": True}]])
    fig.add_trace(
        go.Scatter(x=X, y=Z, line=dict(color='orange')),
        secondary_y=True
    )
    hist_fig = px.histogram(final_params, opacity=0.4, 
                            nbins=120, histnorm='probability', cumulative=False)
    fig.add_trace(
        hist_fig.data[0],
        secondary_y=False
    )
    fig.update_layout(
        width=800,
        height=800,
        showlegend=False,
        xaxis=dict(showline=True, mirror=True, linewidth=2, linecolor='black'),
        yaxis=dict(showline=True, mirror=True, linewidth=2, linecolor='black'),
        yaxis2=dict(showticklabels=False),
        plot_bgcolor='rgba(0,0,0,0)',
        yaxis_title='Probability',
        xaxis_title='Parameter'
    )

    return fig

def plot_avg_loss(total_losses):
    avg_loss = sum(total_losses)/len(total_losses)

    fig = px.line(x=np.arange(0, len(avg_loss)), y=avg_loss)
    return fig

In [34]:
# Shared parameters
seed = 22 # determines function
n_runs = 1000
max_iterations = 200
dim = 1
size = 1000

batchsize = 10
start = [-1.32]
lr = 0.05

bounds = 20
res = 0.01
dataset_params= {'seed': seed, 'dim':dim, 'size':size, 'bounds':bounds}
pointdata = PointDataset(**dataset_params)
model_params = {'start':start, 'bounds':bounds}
model = of.Hodgkinson(**model_params)

plot_prelim_results(bounds, res, model, pointdata, pointdata.data)

In [35]:
######################################## SGD ########################################
opt_params = {'lr': lr}
model_params = {'start':start, 'bounds':bounds}
final_params_sgd = []
total_loss_sgd = []

def lr_annealer(opt, progress, lr):
    if progress >= 0.5:
        opt.param_groups[0]['lr'] = lr * (1 - progress)

for i in tqdm(range(n_runs)):

    # Shuffle false gives all the same results as expected
    torch.manual_seed(seed+i)
    dataloader = torch.utils.data.DataLoader(dataset=pointdata, batch_size=batchsize, shuffle=True)
    train_cdl = cyclic_loader(dataloader)
    model = of.Hodgkinson(**model_params)
    opt = torch.optim.SGD(params=model.parameters(), **opt_params)
    losses = []

    for j in range(max_iterations):
        batch = next(train_cdl)
        opt.zero_grad()
        loss = model(data=batch)
        loss.backward()
        opt.step()
        losses.append(float(loss.detach().clone()))
        lr_annealer(opt, j/max_iterations, lr)

    
    total_loss_sgd.append(np.array(losses))
    final_param = list(model.parameters())[0].detach().clone()
    final_param = model.apply_period(final_param).numpy()  
    final_params_sgd.append(final_param)

  0%|          | 0/1000 [00:00<?, ?it/s]

In [36]:
fig1 = plot_prelim_results(bounds, res, model, pointdata, final_params_sgd)
fig1.show()

In [7]:
fig2 = plot_avg_loss(total_loss_sgd)
fig2.show()

In [125]:
######################################## SGD Annealed ########################################
title = "SGD Annealed with Function Seed {}".format(seed)

def lr_annealer(opt, progress, lr):
    if progress >= 0.8:
        # opt.param_groups[0]['lr'] = lr * (1 - progress**2)
        opt.param_groups[0]['lr'] = lr * (1 - progress)
opt_params = {'lr': lr}
model_params = {'start':start, 'bounds':bounds}
final_params = []

for i in tqdm(range(n_runs)):

    # Shuffle false gives all the same results as expected
    torch.manual_seed(seed+i)
    dataloader = torch.utils.data.DataLoader(dataset=pointdata, batch_size=batchsize, shuffle=True)
    train_cdl = cyclic_loader(dataloader)
    model = of.Hodgkinson(**model_params)
    opt = torch.optim.SGD(params=model.parameters(), **opt_params)

    for j in range(max_iterations):
        batch = next(train_cdl)
        opt.zero_grad()
        loss = model(data=batch)
        loss.backward()
        opt.step()
        lr_annealer(opt, j/max_iterations, lr)

    final_param = list(model.parameters())[0].detach().clone()
    final_param = model.apply_period(final_param).numpy()  
    final_params.append(final_param)

fig2 = plot_prelim_results(bounds, res, model, pointdata, final_params, title)
fig2.show()

  0%|          | 0/1000 [00:00<?, ?it/s]

In [41]:
######################################## ADAM ########################################

opt_params = {'lr': lr}
model_params = {'start':start, 'bounds':bounds}
final_params_adam = []
total_loss_adam = []

for i in tqdm(range(n_runs)):

    # Shuffle false gives all the same results as expected
    torch.manual_seed(seed+i)
    dataloader = torch.utils.data.DataLoader(dataset=pointdata, batch_size=batchsize, shuffle=True)
    train_cdl = cyclic_loader(dataloader)
    model = of.Hodgkinson(**model_params)
    opt = torch.optim.Adam(params=model.parameters(), **opt_params)
    losses = []

    for j in range(max_iterations):
        batch = next(train_cdl)
        opt.zero_grad()
        loss = model(data=batch)
        loss.backward()
        opt.step()
        losses.append(float(loss.detach().clone()))
    
    total_loss_adam.append(np.array(losses))
    final_param = list(model.parameters())[0].detach().clone()
    final_param = model.apply_period(final_param).numpy()  
    final_params_adam.append(final_param)

  0%|          | 0/1000 [00:00<?, ?it/s]

In [None]:
fig3 = plot_prelim_results(bounds, res, model, pointdata, final_params_adam)
fig3.show()

In [10]:
fig4 = plot_avg_loss(total_loss_adam)
fig4.show()

## SGD TC TESTS

In [17]:
########################################## SGD_TC #######################################
importlib.reload(aa)

scale_annealer = lambda progress: 1
opt_params={'lr': lr, 'height': 1.0, 'width': bounds/8,
            'scale_annealer': scale_annealer, 'n_epochs': max_iterations}
model_params = {'start':start, 'bounds':bounds}
final_params_levy = []

for i in tqdm(range(n_runs)):

    # Shuffle false gives all the same results as expected
    torch.manual_seed(seed+i)
    dataloader = torch.utils.data.DataLoader(dataset=pointdata, batch_size=batchsize, shuffle=True)
    train_cdl = cyclic_loader(dataloader)
    model = of.Hodgkinson(**model_params)
    opt = aa.SGD_TC(params=model.parameters(), **opt_params)

    for j in range(max_iterations):
        batch = next(train_cdl)
        opt.zero_grad()
        loss = model(data=batch)
        loss.backward()
        opt.step()

    final_param = list(model.parameters())[0].detach().clone()
    final_param = model.apply_period(final_param).numpy()  
    final_params_levy.append(final_param)

fig5 = plot_prelim_results(bounds, res, model, pointdata, final_params_levy)
fig5.show()

  0%|          | 0/1000 [00:00<?, ?it/s]

In [18]:
px.scatter(opt.alpha_record) # SGD_TC ALPHA RECORD LAST EXAMPLE

In [39]:
##################################### SGD_TC with annealing ###############################################
importlib.reload(aa)
def lr_annealer(opt, progress, lr):
    if progress >= 0.5:
        opt.param_groups[0]['lr'] = lr * (1 - progress)

scale_annealer = lambda progress: (1 - progress)

opt_params={'lr': lr, 'height': 1.0, 'width': bounds/10,
            'scale_annealer': scale_annealer, 'n_epochs': max_iterations}
model_params = {'start':start, 'bounds':bounds}
final_params_levy_ann = []
total_loss_levy_ann = []

for i in tqdm(range(n_runs)):

    # Shuffle false gives all the same results as expected
    torch.manual_seed(seed+i)
    dataloader = torch.utils.data.DataLoader(dataset=pointdata, batch_size=batchsize, shuffle=True)
    train_cdl = cyclic_loader(dataloader)
    model = of.Hodgkinson(**model_params)
    opt = aa.SGD_TC(params=model.parameters(), **opt_params)
    losses = []

    for j in range(max_iterations):
        batch = next(train_cdl)
        opt.zero_grad()
        loss = model(data=batch)
        loss.backward()
        opt.step()
        losses.append(float(loss.detach().clone()))
        lr_annealer(opt, opt.step_count/opt.n_epochs, lr)
    
    total_loss_levy_ann.append(np.array(losses))
    final_param = list(model.parameters())[0].detach().clone()
    final_param = model.apply_period(final_param).numpy()  
    final_params_levy_ann.append(final_param)

  0%|          | 0/1000 [00:00<?, ?it/s]

In [40]:
fig6 = plot_prelim_results(bounds, res, model, pointdata, final_params_levy_ann)
fig6.show()

In [13]:
fig7 = plot_avg_loss(total_loss_levy_ann)
fig7.show()

In [20]:
px.scatter(opt.alpha_record) # SGD_TC_ANNEALED ALPHA RECORD LAST EXAMPLE

In [9]:
hf.figures_to_html([fig1, fig2, fig3], 
                    'proof_of_concepts_batch12.html')


In [None]:
# Fixed alpha - demonstrate exploration
# Bigger alpha - converge

"""
- proof of concept 1d and 2d landscapes, statistics of convergence of minima
        - different batch size?
        - different lr?
- linear regression
- different alphas

- comparison with metadynamics/tempering, real problems

"""




In [None]:
# escape time analysis

# create artificial dataset of points concentrated somewhere (well - depth scales with points)
# gaussian distributed points, so we can also vary width
# first escape time measurements?