In [1]:
# Use SGD_TC to solve linear regression problem
import ObjectiveFunction as of
import helper_funcs as hf
import numpy as np
import torch
from torch.utils.data import Dataset
import plotly
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import plotly.express as px
from tqdm.notebook import tqdm
import itertools
import importlib
import copy
from ADAM_TC import ADAM_TC
import adaptive_algos as aa

seed = 1

In [2]:
class LinearRegressionDataset(Dataset):
    """
    Linear data with Gaussian noise. Parameterised by "seed".
    """
    def __init__(self, seed, dim, size, bounds, mtrue, btrue, sigma):

        np.random.seed(seed)
        torch.manual_seed(seed)
        self.Xdata = [torch.tensor(np.random.rand(dim)*bounds - 0.5*bounds) for _ in range(size)]
        self.Xdata = torch.vstack(self.Xdata)
        self.ydata = mtrue * self.Xdata + btrue
        self.ydata += sigma*torch.randn(size=(len(self.Xdata),1))
        self.data = (self.Xdata, self.ydata)
    
    def __len__(self):
        return len(self.Xdata)
    
    def __getitem__(self, idx):
        return self.Xdata[idx], self.ydata[idx]
    
    def shuffle_data(self, seed):
        torch.manual_seed(seed)
        shuffle_indices = torch.randperm(len(self.Xdata))
        self.Xdata = self.Xdata[shuffle_indices]
        self.ydata = self.ydata[shuffle_indices]
        self.data = (self.Xdata, self.ydata)

def cyclic_loader(dataloader):
    while True:
        for data in dataloader:
            yield data

In [7]:
databounds = 20
mtrue, btrue, sigma = 3.0, 4.0, 3.0
dataset = LinearRegressionDataset(seed=seed, dim=1, size=1000, bounds=databounds,
                                    mtrue=mtrue, btrue=btrue, sigma=sigma)

In [8]:
px.scatter(x=dataset.Xdata.flatten(), y=dataset.ydata.flatten())

In [9]:
importlib.reload(of)
importlib.reload(aa)
filename = 'SGD_LinearRegression_new.html'

batchsize = 4
dataloader = torch.utils.data.DataLoader(dataset=dataset, batch_size=batchsize, shuffle=False)
train_cdl = cyclic_loader(dataloader)

max_iterations = 500
start = [5,-6]
opt_params = {'lr': 0.01}
metric = torch.nn.MSELoss()

model = of.LinearRegression(start=start, bounds=databounds) # bounds superfluous here
opt = torch.optim.SGD(params=model.parameters(), **opt_params)

losses, params = [], []
params.append(list(model.parameters())[0].detach().clone().numpy())
pbar = tqdm(range(max_iterations))

for i in pbar:
    model.train()
    Xdata_batch, ydata_batch = next(train_cdl)
    opt.zero_grad()
    ypred = model(Xdata_batch)
    loss = metric(ypred, ydata_batch)    
    loss.backward()
    opt.step()

    losses.append(float(loss))
    params.append(list(model.parameters())[0].detach().clone().numpy())
    pbar.set_description(f'Epoch {i}: {loss}')

params = [list(_) for _ in list(zip(*params))]

  0%|          | 0/500 [00:00<?, ?it/s]

In [41]:
px.scatter(losses)

In [46]:
importlib.reload(aa)
importlib.reload(of)
filename = 'MD_LinearRegression_new.html'

# MD Params
lr = 0.05
height = 1.0    
width = 1
max_iterations = 2000
scale_annealer = lambda progress: (1 - progress)**2
opt_params={'lr': lr, 'height': height, 'width': width, 'n_epochs': max_iterations, 
            'scale_annealer': scale_annealer}

start = [5,-6]
metric = torch.nn.MSELoss()

model = of.LinearRegression(start=start, bounds=databounds) # bounds superfluous here
opt = aa.Metadynamics(params=model.parameters(), func=model, **opt_params)

losses, params = [], []
params.append(list(model.parameters())[0].detach().clone().numpy())
pbar = tqdm(range(max_iterations))

for i in pbar:
    model.train()
    opt.zero_grad()
    Xdata = dataset.Xdata
    ydata = dataset.ydata
    ypred = model(Xdata)
    loss = metric(ypred, ydata)    
    loss.backward()
    opt.step()

    losses.append(float(loss))
    params.append(list(model.parameters())[0].detach().clone().numpy())
    pbar.set_description(f'Epoch {i}: {loss}')

params = [list(_) for _ in list(zip(*params))]

  0%|          | 0/2000 [00:00<?, ?it/s]

In [11]:
importlib.reload(aa)
importlib.reload(of)
filename = 'SGD_TC_LinearRegression_new.html'

batchsize = 4
dataloader = torch.utils.data.DataLoader(dataset=dataset, batch_size=batchsize, shuffle=False)
train_cdl = cyclic_loader(dataloader)

# SGD_TC Params
lr = 0.01
height = 1.0
width = 1
max_iterations = 500
scale_annealer = lambda progress: (1 - progress)**2
start = [5.0,-6.0]
opt_params={'lr': lr, 'height': height, 'width': width, 'n_epochs': max_iterations, 
            'scale_annealer': scale_annealer}
metric = torch.nn.MSELoss()

model = of.LinearRegression(start=start, bounds=databounds)
opt = aa.SGD_TC(params=model.parameters(), func=model, **opt_params)

losses, params = [], []
params.append(list(model.parameters())[0].detach().clone().numpy())

pbar = tqdm(range(max_iterations))
for i in pbar:
    model.train()
    Xdata_batch, ydata_batch = next(train_cdl)
    opt.zero_grad()
    ypred = model(Xdata_batch)
    loss = metric(ypred, ydata_batch)    
    loss.backward()
    opt.step()

    losses.append(float(loss))
    params.append(list(model.parameters())[0].detach().clone().numpy())
    pbar.set_description(f'Epoch {i}: {loss}')

params = [list(_) for _ in list(zip(*params))]

  0%|          | 0/500 [00:00<?, ?it/s]

In [12]:
def calc_contours(Z, divisions):
    m, M = np.min(Z), np.max(Z)
    size = (M - m)/divisions
    contours = dict(start=m, end=M, size=size)
    return contours

fig1 = make_subplots(rows=1,cols=2)
bounds = 20
xlim, ylim, res = (-bounds, bounds), (-bounds, bounds), 500

m = np.linspace(xlim[0], xlim[1], res)
b = np.linspace(ylim[0], ylim[1], res)
grid = [np.array(_) for _ in itertools.product(m,b)]
Z = [model.forward(dataset.Xdata, w=torch.tensor(_)) for _ in grid]
Z = [metric(ypred, dataset.ydata) for ypred in Z]
Z = np.array(Z).reshape(len(m), len(b)).T
gminimum_est = grid[np.argmin(Z)]

fig1.add_trace(go.Scatter(x=np.arange(len(losses)), y=losses), row=1, col=1)
fig1.add_trace(
    go.Contour(x=m, y=b, z=Z, contours=calc_contours(Z, 50)),
    row=1, col=2,
)
fig1.add_trace(
    go.Scatter(x=params[0], 
               y=params[1],
               text=list(zip(losses, list(np.arange(0, len(losses)))))),
    row=1, col=2
)
fig1.add_trace(
    go.Scatter(x=[mtrue], y=[btrue], mode='markers',
               marker=dict(symbol='star', opacity=0.5, size=20, color='gold')),
    row=1, col=2
)
fig1.add_trace(
    go.Scatter(x=[gminimum_est[1]], y=[gminimum_est[0]], mode='markers',
               marker=dict(symbol='star', opacity=0.8, size=20, color='black')),
    row=1, col=2
)
fig1.update_layout(showlegend=False)
title = 'ADAM/SGD' if not hasattr(opt, 'record') else str(opt.record)
title = title + str(list(model.parameters()))

fig1.update_layout(
    autosize=False,
    width=1600,
    height=800,
    title=title
)
X_data = dataset.Xdata.reshape(1,-1).squeeze()
y_true = dataset.ydata.reshape(1,-1).squeeze()
y_pred = model(dataset.Xdata).detach().reshape(1,-1).squeeze()

fig2 = hf.create_density_plot(params)
fig3 = go.Figure()
fig3.add_trace(
    go.Scatter(x=X_data, y=y_true, mode='markers')
)
fig3.add_trace(
    go.Scatter(x=X_data, y=y_pred)
)
fig3.update_layout(showlegend=False)
if hasattr(opt, 'alpha_record'):
    fig4 = px.scatter(opt.alpha_record)
    figs = [fig1, fig2, fig4, fig3]
else:
    figs = [fig1, fig2, fig3]

hf.figures_to_html(figs, filename)

In [None]:
"""
Main questions
- levy noise - power law vs levy-stable distribution
    - do we want levy flight or specifically this brand of levy noise
    https://link.springer.com/referenceworkentry/10.1007/978-0-387-30440-3_310
    https://www.pnas.org/doi/full/10.1073/pnas.2001548117
- what problems next: 
- non-convex problem next (e.g. quadratic polynomial fitting)
- apply to LSTM deep learning network
- compare with metadynamics + Parisi (tempering/simulated annealing)
- collect convergence statistics
"""

In [8]:
# Convergence Statistics (convex optimisation)
# Find when error first lower than some bound
importlib.reload(of)
importlib.reload(aa)

<module 'adaptive_algos' from 'c:\\Users\\karls\\Documents\\GitHub\\honours\\adaptive_algos.py'>

In [16]:
def train_model_linear_regression(dataset, experiment_num, batch_size, max_iterations, start, 
                                  opt_algo, opt_params, store=True, convergence_test=False):
    
    new_dataset = copy.deepcopy(dataset)
    LinearRegressionDataset.shuffle_data(new_dataset, experiment_num)
    dataloader = torch.utils.data.DataLoader(dataset=dataset, batch_size=batchsize, shuffle=False)
    
    train_cdl = cyclic_loader(dataloader)
    metric = torch.nn.MSELoss()
    model = of.LinearRegression(start=start, bounds=databounds) # bounds superfluous here
    opt = opt_algo(params=model.parameters(), **opt_params)
    initial_loss = metric(model(new_dataset.Xdata), new_dataset.ydata)
    convergence_threshold = convergence_test * initial_loss

    if store and not convergence_test:
        losses, params = [], []
        losses.append(initial_loss)
        params.append(list(model.parameters())[0].detach().clone().numpy())
    
    if convergence_test:
        pbar = range(max_iterations)
    else:
        pbar = tqdm(range(max_iterations))

    for i in pbar:
        model.train()
        Xdata_batch, ydata_batch = next(train_cdl)
        opt.zero_grad()
        ypred = model(Xdata_batch)
        loss = metric(ypred, ydata_batch)
        loss.backward()
        opt.step()

        if convergence_test:
            if loss < convergence_threshold:
                return i
        else:
            pbar.set_description(f'Epoch {i}: {loss}')
            if store:
                losses.append(float(loss))
                params.append(list(model.parameters())[0].detach().clone().numpy())
    
    if convergence_test:
        return max_iterations*2
    if store:
        params = [list(_) for _ in list(zip(*params))]
        return losses, params, model, opt
    else:
        return model, opt

In [17]:
n_experiments = 1000
convergence_test = 0.05
max_iterations = 500
start = [5.0, -6.0]

In [18]:
# SGD Params
batch_size = 4
lr = 0.01
opt_params = {'lr': lr}

cutoff_iterations_sgd = []
experiments = tqdm(range(n_experiments))

for experiment in experiments:
    i = train_model_linear_regression(dataset, experiment, batch_size, max_iterations, start,
                                      torch.optim.SGD, opt_params=opt_params, 
                                      store=False, convergence_test=convergence_test)
    cutoff_iterations_sgd.append(i)
    experiments.set_description(f'Cutoff Iteration {i}')

  0%|          | 0/1000 [00:00<?, ?it/s]

In [19]:
# SGD_TC Params
batch_size = 4
lr = 0.01
height = 1.0    
width = 1
max_iterations = 500
scale_annealer = lambda progress: (1 - progress)**2
opt_params={'lr': lr, 'height': height, 'width': width, 'n_epochs': max_iterations, 
            'scale_annealer': scale_annealer}

cutoff_iterations_sgdtc = []
experiments = tqdm(range(n_experiments))

for experiment in experiments:
    i = train_model_linear_regression(dataset, experiment, batch_size, max_iterations, start,
                                      aa.SGD_TC, opt_params=opt_params, 
                                      store=False, convergence_test=convergence_test)
    cutoff_iterations_sgdtc.append(i)
    experiments.set_description(f'Cutoff Iteration {i}')

  0%|          | 0/1000 [00:00<?, ?it/s]

In [20]:
px.histogram(cutoff_iterations_sgd, nbins=100)

In [21]:
px.histogram(cutoff_iterations_sgdtc, nbins=100)