In [1]:
# Use ADAM_TC to solve linear regression problem
%load_ext autoreload
%autoreload 2
import ObjectiveFunction as of
import helper_funcs as hf
import numpy as np
import torch
import plotly
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import plotly.express as px
import itertools
from ADAM_TC import ADAM_TC

In [7]:
def linear_data_wnoise(mtrue, btrue, sigma, n):

    Xdata_r = torch.arange(-n//2, n//2).float()
    ydata_r = mtrue * Xdata_r + btrue + sigma*torch.randn(size=(len(Xdata_r),))
    Xdata = Xdata_r[:,None]
    ydata = ydata_r[:,None]

    return Xdata, ydata, Xdata_r, ydata_r

mtrue, btrue, sigma = 3.0, 4.0, 3.0
Xdata, ydata, Xdata_r, ydata_r = linear_data_wnoise(mtrue, btrue, sigma, 10)
px.scatter(x=Xdata_r, y=ydata_r)


In [48]:
filename = 'ADAM_TC_LinearRegression.html'
n_epochs = 600
start, bounds = torch.tensor([5.0,-6.0], requires_grad=True), 20
opt_params = {'lr': 0.001, 'height': 1, 'width': bounds/10, 'n_epochs': n_epochs}
metric = torch.nn.MSELoss()

m = of.LinearRegression(start=start, bounds=bounds, Xdata=Xdata)
opt = ADAM_TC(params=m.parameters(), func=m, **opt_params)

losses, params = [], []
params.append(list(m.parameters())[0].detach().clone().numpy())

for i in range(n_epochs):
    
    opt.zero_grad()
    ypred = m()
    loss = metric(ypred, ydata)
    print(f'Epoch {i}: {loss}')
    
    loss.backward()
    opt.step()

    losses.append(float(loss))
    params.append(list(m.parameters())[0].detach().clone().numpy())
    
    if i % (n_epochs//10) == 0: print(i)

params = [list(_) for _ in list(zip(*params))]


Epoch 0: 212.0286102294922
ADAM ratio: tensor(28.5571) 0
Epoch 1: 151.8529510498047
ADAM ratio: tensor(0.0494) Epoch 2: 586.8972778320312
ADAM ratio: tensor(0.0138) Epoch 3: 126.48628997802734
ADAM ratio: tensor(0.1199) Epoch 4: 284.658203125
ADAM ratio: tensor(0.0420) Epoch 5: 358.282958984375
ADAM ratio: tensor(0.0128) Epoch 6: 373.6379699707031
ADAM ratio: tensor(0.0105) Epoch 7: 172.78030395507812
ADAM ratio: tensor(0.0843) Epoch 8: 168.89260864257812
ADAM ratio: tensor(0.1224) Epoch 9: 702.145751953125
ADAM ratio: tensor(0.0033) Epoch 10: 853.93359375
ADAM ratio: tensor(0.0051) Epoch 11: 845.3489990234375
ADAM ratio: tensor(0.0005) Epoch 12: 833.4680786132812
ADAM ratio: tensor(0.0023) Epoch 13: 795.8807373046875
ADAM ratio: tensor(0.0159) Epoch 14: 382.6225280761719
ADAM ratio: tensor(0.0108) Epoch 15: 386.0341491699219
ADAM ratio: tensor(0.0106) Epoch 16: 479.7774963378906
ADAM ratio: tensor(0.0053) Epoch 17: 390.78204345703125
ADAM ratio: tensor(0.0610) Epoch 18: 32.04328155517

In [37]:
filename = 'ADAM_LinearRegression.html'
n_epochs = 600
start, bounds = torch.tensor([5.0,-6.0], requires_grad=True), 20
opt_params = {'lr': 1}
metric = torch.nn.MSELoss()

m = of.LinearRegression(start=start, bounds=bounds, Xdata=Xdata)
opt = torch.optim.Adam(params=m.parameters(), **opt_params)

losses, params = [], []
params.append(list(m.parameters())[0].detach().clone().numpy())

for i in range(n_epochs):
    
    opt.zero_grad()
    ypred = m()
    loss = metric(ypred, ydata)
    print(f'Epoch {i}: {loss}')
    
    loss.backward()
    opt.step()

    losses.append(float(loss))
    params.append(list(m.parameters())[0].detach().clone().numpy())
    
    if i % (n_epochs//10) == 0: print(i)

params = [list(_) for _ in list(zip(*params))]


Epoch 0: 212.0286102294922
0
Epoch 1: 150.38243103027344
Epoch 2: 110.27364349365234
Epoch 3: 89.3809814453125
Epoch 4: 81.21058654785156
Epoch 5: 76.57160186767578
Epoch 6: 69.29267120361328
Epoch 7: 58.40891647338867
Epoch 8: 45.770809173583984
Epoch 9: 33.78730010986328
Epoch 10: 24.399633407592773
Epoch 11: 18.67844009399414
Epoch 12: 16.67998504638672
Epoch 13: 17.530141830444336
Epoch 14: 19.799087524414062
Epoch 15: 22.079830169677734
Epoch 16: 23.48187255859375
Epoch 17: 23.796977996826172
Epoch 18: 23.34785270690918
Epoch 19: 22.691585540771484
Epoch 20: 22.331806182861328
Epoch 21: 22.520763397216797
Epoch 22: 23.186594009399414
Epoch 23: 23.99410057067871
Epoch 24: 24.510087966918945
Epoch 25: 24.398942947387695
Epoch 26: 23.558603286743164
Epoch 27: 22.143091201782227
Epoch 28: 20.479232788085938
Epoch 29: 18.9288330078125
Epoch 30: 17.75737762451172
Epoch 31: 17.0570011138916
Epoch 32: 16.746591567993164
Epoch 33: 16.640710830688477
Epoch 34: 16.55031967163086
Epoch 35: 16

In [49]:
def calc_contours(Z, divisions):
    m, M = np.min(Z), np.max(Z)
    size = (M - m)/divisions
    contours = dict(start=m, end=M, size=size)
    return contours

fig1 = make_subplots(rows=1,cols=2)
xlim, ylim, res = (-bounds, bounds), (-bounds, bounds), 0.1

X = np.arange(xlim[0], xlim[1], res)
Y = np.arange(ylim[0], ylim[1], res)
grid = [np.array(_) for _ in itertools.product(X,Y)]
Z = [m.forward(torch.Tensor(_)) for _ in grid]
Z = [metric(ypred, ydata) for ypred in Z]
Z = np.array(Z).reshape(len(X), len(Y)).T
gminimum_est = grid[np.argmin(Z)]

fig1.add_trace(go.Scatter(x=np.arange(len(losses)), y=losses), row=1, col=1)
fig1.add_trace(
    go.Contour(x=X, y=Y, z=Z, contours=calc_contours(Z, 50)),
    row=1, col=2,
)
fig1.add_trace(
    go.Scatter(x=params[0], 
               y=params[1],
               text=list(zip(losses, list(np.arange(0, len(losses)))))),
    row=1, col=2
)
fig1.add_trace(
    go.Scatter(x=[mtrue], y=[btrue], mode='markers',
               marker=dict(symbol='star', opacity=0.5, size=20, color='gold')),
    row=1, col=2
)
fig1.add_trace(
    go.Scatter(x=[gminimum_est[1]], y=[gminimum_est[0]], mode='markers',
               marker=dict(symbol='star', opacity=0.8, size=20, color='black')),
    row=1, col=2
)
title = 'ADAM/SGD' if not hasattr(opt, 'record') else str(opt.record)
title = title + str(list(m.parameters()))

fig1.update_layout(
    autosize=False,
    width=1600,
    height=800,
    title=title
)
X_in = Xdata.reshape(1,-1).squeeze()
y_in = ydata.reshape(1,-1).squeeze()
y_in2 = m().detach().reshape(1,-1).squeeze()

fig2 = hf.create_density_plot(params)
fig3 = go.Figure()
fig3.add_trace(
    go.Scatter(x=X_in, y=y_in, mode='markers')
)
fig3.add_trace(
    go.Scatter(x=X_in, y=y_in2)
)

if hasattr(opt, 'alpha_record'):
    fig4 = px.scatter(opt.alpha_record)
    figs = [fig1, fig2, fig4, fig3]
else:
    figs = [fig1, fig2, fig3]

hf.figures_to_html(figs, filename)

In [12]:
X = torch.tensor([[0,-1,1], [1,1,1], [2,2,3]])
sweights = torch.tensor([1,2,3,10])
print(X, sweights[0:-1])
print(torch.matmul(X, sweights[0:-1]) + sweights[-1]) 

tensor([[ 0, -1,  1],
        [ 1,  1,  1],
        [ 2,  2,  3]]) tensor([1, 2, 3])
tensor([11, 16, 25])


In [None]:
"""
Main questions
- levy noise - power law vs levy-stable distribution
    - do we want levy flight or specifically this brand of levy noise
    https://link.springer.com/referenceworkentry/10.1007/978-0-387-30440-3_310
    https://www.pnas.org/doi/full/10.1073/pnas.2001548117
- what problems next: 
- non-convex problem next (e.g. quadratic polynomial fitting)
- apply to LSTM deep learning network
- compare with metadynamics + Parisi (tempering/simulated annealing)
- collect convergence statistics
"""