In [1]:
!pip install optuna
!pip install torchsde

Collecting optuna
  Downloading optuna-3.0.1-py3-none-any.whl (348 kB)
[2K     [38;2;114;156;31m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m348.1/348.1 KB[0m [31m20.1 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting colorlog
  Downloading colorlog-6.7.0-py2.py3-none-any.whl (11 kB)
Collecting cmaes>=0.8.2
  Downloading cmaes-0.8.2-py3-none-any.whl (15 kB)
Collecting sqlalchemy>=1.1.0
  Downloading SQLAlchemy-1.4.41-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.6 MB)
[2K     [38;2;114;156;31m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.6/1.6 MB[0m [31m31.1 MB/s[0m eta [36m0:00:00[0m31m47.4 MB/s[0m eta [36m0:00:01[0m
Collecting cliff
  Downloading cliff-4.0.0-py3-none-any.whl (80 kB)
[2K     [38;2;114;156;31m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m81.0/81.0 KB[0m [31m19.0 MB/s[0m eta [36m0:00:00[0m
Collecting alembic
  Downloading alembic-1.8.1-py3-none-any.whl (209 kB)
[2K     [38;2;114;156;31

In [38]:
import numpy as np
import torch
from torch import nn
from torchsde import BrownianInterval, sdeint

device = 'cuda' if torch.cuda.is_available() else 'cpu'

# numbers of cells, freedom, and noise source
batch_size, state_size, brownian_size = 25, 6, 2

# duration of simulation
t_max = 1000

# interval of evaluation
t_eval = 1

# method to solve SDE
methodSDE = 'euler'
isIto = True

# time step to simulate
stepSDE = 2e-3

# delay (steps with t_eval interval) for autocorrelation calculation
delaystep = np.arange(50)

# bins for v histogram calculation
vbinwidth = 0.1
vmin = 0
vmax = 5
vbins = torch.tensor(np.arange(vmin, vmax+vbinwidth, vbinwidth), dtype=torch.float, device=device)

# list of steps at which output is collected
ts = torch.arange(0, t_max+t_eval, t_eval, device=device)
Nts = ts.size()[0]


In [39]:
print(vbins)

tensor([0.0000, 0.1000, 0.2000, 0.3000, 0.4000, 0.5000, 0.6000, 0.7000, 0.8000,
        0.9000, 1.0000, 1.1000, 1.2000, 1.3000, 1.4000, 1.5000, 1.6000, 1.7000,
        1.8000, 1.9000, 2.0000, 2.1000, 2.2000, 2.3000, 2.4000, 2.5000, 2.6000,
        2.7000, 2.8000, 2.9000, 3.0000, 3.1000, 3.2000, 3.3000, 3.4000, 3.5000,
        3.6000, 3.7000, 3.8000, 3.9000, 4.0000, 4.1000, 4.2000, 4.3000, 4.4000,
        4.5000, 4.6000, 4.7000, 4.8000, 4.9000, 5.0000], device='cuda:0')


In [40]:
import os

dirName = "/home/uwamichi/miura_kamiya/"

csvdir = dirName + "graph_frameOut_yflip0.142/"

def loadcsv(prefix, surfix):
    return np.loadtxt(csvdir+prefix+surfix, delimiter=',', skiprows=1)

csv_prefix = 'L2_skip{}_'.format(str(t_eval))
csv_surfix = '.csv'

savedir = dirName + 'torchSDE_Adam/'
os.makedirs(savedir, exist_ok=True)

In [41]:
class betasigma(nn.Module):
    def __init__(self, params):
        super(betasigma, self).__init__()

        self.layer1 = nn.Linear(1,1,bias=True, device=device)
       # self.layer2 = nn.Linear(1,1,bias=False)
        self.layer3 = nn.Linear(1,1,bias=False, device=device)

        nn.init.constant_(self.layer1.bias, params[0])
        nn.init.constant_(self.layer1.weight, params[1])
        nn.init.constant_(self.layer3.weight, params[2])

    def forward(self, x):
        # x has shape [N, 1]
        x01 = self.layer1(x)
        x2 = self.layer3(torch.square(x))
        return x01 + x2


class SDE(nn.Module):
    noise_type = 'general'
    sde_type = 'ito' if isIto else 'stratonovich'

    def __init__(self, alpha, beta, gamma, sigma):
        super().__init__()
        self.alpha = nn.Parameter(torch.tensor([[alpha]], requires_grad=True, device=device))
        self.gamma = nn.Parameter(torch.tensor([[gamma]], requires_grad=True, device=device))
        self.beta = betasigma(beta)
        self.sigma = betasigma(sigma)

        self.alpha.register_hook(lambda grad: print('alpha grad', grad))
        self.gamma.register_hook(lambda grad: print('gamma grad', grad))
        self.beta.layer1.bias.register_hook(lambda grad: print('beta0 grad', grad))
        self.beta.layer1.weight.register_hook(lambda grad: print('beta1 grad', grad))
        self.beta.layer3.weight.register_hook(lambda grad: print('beta2 grad', grad))
        self.sigma.layer1.bias.register_hook(lambda grad: print('sigma0 grad', grad))
        self.sigma.layer1.weight.register_hook(lambda grad: print('sigma1 grad', grad))
        self.sigma.layer3.weight.register_hook(lambda grad: print('sigma2 grad', grad))

        
    # Drift
    def f(self, t, y):
        vsmall = y[:, 2:4]
        vlarge = y[:, 4:]

        v_abs = torch.norm(vsmall, dim=1, keepdim=True)
        
        betas = self.beta(v_abs)
        dvsmall = -betas*vsmall + self.alpha * vlarge

        dvlarge = self.alpha * vsmall - self.gamma * vlarge

        return torch.cat((vsmall, dvsmall, dvlarge), 1)  # shape (batch_size, state_size)

    # Diffusion
    def g(self, t, y):
        v_abs = torch.unsqueeze(torch.norm(y[:, 2:4], dim=1, keepdim=True), 2)
        sigmas_v = self.sigma(v_abs)
        sigmas_vx = torch.cat((sigmas_v, torch.zeros_like(sigmas_v)), 2)
        sigmas_vy = torch.cat((torch.zeros_like(sigmas_v), sigmas_v), 2)
        sigmas_v = torch.cat((sigmas_vx, sigmas_vy), 1)
        sigmas_xy = torch.zeros_like(sigmas_v)
        sigmas_vlarge = torch.zeros_like(sigmas_v)

        return torch.cat((sigmas_xy, sigmas_v, sigmas_vlarge), 1)


In [42]:
class moduleSDE(nn.Module):
    def __init__(self, params):
        super(moduleSDE, self).__init__()
    
        self.sde = SDE(params['alpha'], [params['beta0'], params['beta1'], params['beta2']],
                       params['gamma'], [params['sigma0'], params['sigma1'], params['sigma2']])
        
        self.sigmaX = nn.Parameter(torch.tensor([[[params['sigmaX']]]], requires_grad=True, device=device))

        self.sigmaX.register_hook(lambda grad: print('sigmaX grad', grad))

    def forward(self, yInit, bm, rn):

        ys = sdeint(self.sde, yInit, ts, bm=bm, dt=stepSDE, method=methodSDE)

        return ys[...,:2] + self.sigmaX * rn

In [43]:
model = SDE(0.0, [1,2,3], 4.0, [5,6,7])
print(list(model.parameters()))

[Parameter containing:
tensor([[0.]], device='cuda:0', requires_grad=True), Parameter containing:
tensor([[4.]], device='cuda:0', requires_grad=True), Parameter containing:
tensor([[2.]], device='cuda:0', requires_grad=True), Parameter containing:
tensor([1.], device='cuda:0', requires_grad=True), Parameter containing:
tensor([[3.]], device='cuda:0', requires_grad=True), Parameter containing:
tensor([[6.]], device='cuda:0', requires_grad=True), Parameter containing:
tensor([5.], device='cuda:0', requires_grad=True), Parameter containing:
tensor([[7.]], device='cuda:0', requires_grad=True)]


In [44]:
def binnedSum(y, x, bins):
    # out[i] = sum_j(y[j] | bins[i] <= x[j] < bins[i+1])

    flg = torch.logical_and(torch.isfinite(x), torch.isfinite(y))

    x = x[flg]
    y = y[flg]

    dbins = (bins[1:] - bins[:-1]).view(1,-1) / 1e3
    #print(dbins)
    
    cond = torch.special.expit((x.view(-1,1) - bins[:-1].view(1, -1))/dbins) - torch.special.expit((x.view(-1,1) - bins[1:].view(1, -1))/dbins)
    #print(cond)

    #if x.requires_grad:
    #    sumc = torch.sum(cond)
    #    sumc.backward()

    return torch.squeeze(y.view(1,-1) @ cond)

def calc_velocity(xy):
    return (xy[1:] - xy[:-1])/t_eval

def calc_acceleration(v):
    return (v[1:] - v[:-1])/t_eval

def calc_v_histogram(vabs):
    h = binnedSum(torch.ones_like(vabs), vabs, vbins)
    h = nn.functional.normalize(h, p=1, dim=0) / (vbins[1:] - vbins[:-1])

    #if vabs.requires_grad:
    #   sumh = torch.sum(h)
    #   sumh.backward()

    return h

def calc_v_autocorrelation(v):
    #print(v.size())
    flg = torch.isfinite(v)
    #print(torch.count_nonzero(~flg))
    if torch.any(~flg):
        v[~flg] = torch.zeros_like(v[~flg])
        flg = torch.all(flg, dim=2)
        Ncor = torch.count_nonzero(torch.logical_and(flg.view(v.size()[0],1,v.size()[1]), flg.view(1,v.size()[0],v.size()[1])), 2)
    else:
        Ncor = v.size()[1]
    #print(Ncor)
    v_cor = ((v[:,:,0] @ v[:,:,0].T) + (v[:,:,1] @ v[:,:,1].T)) / Ncor
    vac = torch.zeros([delaystep.size], dtype=float, device=device)
    for idt, dt in enumerate(delaystep):
        vac[idt] = torch.nanmean(torch.diag(v_cor, dt))
    return vac

def calc_va4(v_normalized, vabs, a):
    i_bins = vabs.div(vbinwidth, rounding_mode="floor").to(torch.int32)
    Nbins = np.int_(np.divmod(vmax, vbinwidth)[0])
    flg_inrange = torch.logical_and(torch.logical_and(vabs>=vmin, i_bins<Nbins), torch.logical_and(torch.all(torch.isfinite(a), 2), torch.all(torch.isfinite(v_normalized), 2)))
    j_dummy = torch.zeros_like(i_bins)
    a_dummy = torch.ones_like(i_bins)

    a_para = v_normalized[:,:,0] * a[:,:,0] + v_normalized[:,:,1] * a[:,:,1]
    a_perp = v_normalized[:,:,0] * a[:,:,1] - v_normalized[:,:,1] * a[:,:,0]

    inds_sparse = torch.concat((i_bins[flg_inrange].view(1,-1), j_dummy[flg_inrange].view(1,-1)), 0)
    #print('inds', inds_sparse)

    a_para_sum = torch.sparse_coo_tensor(inds_sparse, a_para[flg_inrange], size=[Nbins,1], dtype=float, device=device).to_dense()
    a_perp_sum = torch.sparse_coo_tensor(inds_sparse, a_perp[flg_inrange], size=[Nbins,1], dtype=float, device=device).to_dense()

    count_vabs = torch.sparse_coo_tensor(inds_sparse, a_dummy[flg_inrange], size=[Nbins,1], dtype=float, device=device).to_dense()
    #print('count', count_vabs)

    a_para_mean = a_para_sum/count_vabs #torch.maximum(count_vabs, torch.tensor([1.0]))
    #print(a_para_mean)
    a_perp_mean = a_perp_sum/count_vabs #torch.maximum(count_vabs, torch.tensor([1.0]))

    d_a_para_mean = a_para[flg_inrange] - a_para_mean[:,0][i_bins[flg_inrange].to(torch.int64)]
    d_a_perp_mean = a_perp[flg_inrange] - a_perp_mean[:,0][i_bins[flg_inrange].to(torch.int64)]
    #print(a_para_var)
    #print(a_perp_var)

    flg_for_std = (count_vabs>=2)[:,0][i_bins[flg_inrange].to(torch.int64)]

    d_a_para_mean = d_a_para_mean[flg_for_std]
    d_a_perp_mean = d_a_perp_mean[flg_for_std]

    inds_sparse = inds_sparse.T[flg_for_std].T

    a_para_std = torch.sqrt(torch.sparse_coo_tensor(inds_sparse, torch.square(d_a_para_mean), size=[Nbins,1], dtype=float, device=device).to_dense() / (count_vabs-1))#torch.maximum(count_vabs-1, torch.tensor([1.0])))
    a_perp_std = torch.sqrt(torch.sparse_coo_tensor(inds_sparse, torch.square(d_a_perp_mean), size=[Nbins,1], dtype=float, device=device).to_dense() / (count_vabs-1))#torch.maximum(count_vabs-1, torch.tensor([1.0])))

    #print(a_para_std)
    #print(a_perp_std)

    #if vabs.requires_grad:
        #a_para_var.register_hook(lambda grad: print('a_para_var grad', grad))
        #a_perp_var.register_hook(lambda grad: print('a_perp_var grad', grad))

    return a_para_mean, a_perp_mean, a_para_std, a_perp_std#, count_vabs



In [45]:
with torch.no_grad():

    x_csv = loadcsv(csv_prefix, 'x'+csv_surfix)
    y_csv = loadcsv(csv_prefix, 'y'+csv_surfix)

    xy_csv = torch.tensor(np.concatenate((np.expand_dims(x_csv, 2), np.expand_dims(y_csv, 2)), 2), dtype=torch.float, device=device)

    v_csv = calc_velocity(xy_csv)
    a_csv = calc_acceleration(v_csv)

    vabs_csv = torch.norm(v_csv, dim=2)

    v_normalized_csv = nn.functional.normalize(v_csv, dim=2)

    hist_csv = calc_v_histogram(vabs_csv)
    vac_csv = calc_v_autocorrelation(v_csv)
    va4_csv = calc_va4(v_normalized_csv[:-1], vabs_csv[:-1], a_csv)
    va4_csv = torch.concat(va4_csv, 1)

    print(hist_csv)
    print(vac_csv)
    print(va4_csv)

    hist_norm = torch.nansum(hist_csv**2)
    print(hist_norm)

    vac_norm = torch.abs(vac_csv)
    print(vac_norm)

    va4_norm = torch.nansum(va4_csv**2, dim=0)
    print(va4_norm)


tensor([7.7382e-02, 1.9442e-01, 3.1161e-01, 3.9839e-01, 5.0349e-01, 5.6932e-01,
        6.3120e-01, 7.1836e-01, 7.8103e-01, 8.0358e-01, 7.9743e-01, 7.4059e-01,
        6.6378e-01, 5.5074e-01, 4.8309e-01, 3.6264e-01, 2.9296e-01, 2.4451e-01,
        1.8147e-01, 1.4667e-01, 1.1110e-01, 8.5229e-02, 7.8882e-02, 6.4705e-02,
        4.6304e-02, 4.0165e-02, 2.9394e-02, 2.3630e-02, 1.5841e-02, 1.0321e-02,
        8.7670e-03, 6.7941e-03, 1.9412e-03, 4.2059e-03, 3.5588e-03, 3.5588e-03,
        2.5882e-03, 1.9412e-03, 1.6176e-03, 1.6177e-03, 6.4706e-04, 3.2353e-04,
        1.6176e-03, 0.0000e+00, 3.2353e-04, 6.4706e-04, 9.7059e-04, 0.0000e+00,
        0.0000e+00, 6.4706e-04], device='cuda:0')
tensor([1.4615, 1.2273, 1.1752, 1.1293, 1.0923, 1.0602, 1.0255, 0.9928, 0.9759,
        0.9502, 0.9306, 0.9140, 0.8981, 0.8798, 0.8652, 0.8493, 0.8334, 0.8222,
        0.8067, 0.7953, 0.7796, 0.7705, 0.7567, 0.7428, 0.7337, 0.7200, 0.7034,
        0.6944, 0.6840, 0.6693, 0.6632, 0.6571, 0.6506, 0.6373, 0.6277

In [46]:
print(xy_csv.requires_grad)

False


In [47]:
def compare_v_histogram(vabs):
    h = calc_v_histogram(vabs)
    dh = h - hist_csv
    flg = torch.isfinite(dh)
    hist_norm_ = torch.nansum(hist_csv[flg]**2)

    dhsq = torch.square(dh[flg])
    dhsumsq = torch.sum(dhsq)

    out = dhsumsq / hist_norm_

    #print('vhist', out)
    #if vabs.requires_grad:
        #h.register_hook(lambda grad: print('hist grad', grad))
        #dh.register_hook(lambda grad: print('d hist grad', grad))
        #dhsq.register_hook(lambda grad: print('d hist square grad', grad))
        #dhsumsq.register_hook(lambda grad: print('d hist square sum grad', grad))

        #out.backward()
    #print(2*dh[flg]/hist_norm_)
    return out

def compare_v_autocorrelation(v):
    vac = calc_v_autocorrelation(v)
    #if v.requires_grad:
      #vac.register_hook(lambda grad: print('vac grad', grad))
    dvac = vac - vac_csv
    flg = torch.isfinite(dvac)
    vac_norm_ = torch.abs(vac_csv[flg])
    return torch.mean(torch.abs(dvac[flg])/vac_norm_)

def compare_acceleration(v_normalized, vabs, a):

    a_para_mean, a_perp_mean, a_para_std, a_perp_std = calc_va4(v_normalized, vabs, a)
    #print(a_para_mean - va4_csv[:,:1])

    flg0 = torch.logical_and(torch.isfinite(a_para_mean), torch.isfinite(va4_csv[:,:1]))
    flg1 = torch.logical_and(torch.isfinite(a_perp_mean), torch.isfinite(va4_csv[:,1:2]))
    flg2 = torch.logical_and(torch.isfinite(a_para_std), torch.isfinite(va4_csv[:,2:3]))
    flg3 = torch.logical_and(torch.isfinite(a_perp_std), torch.isfinite(va4_csv[:,3:4]))

    #flg0 = torch.logical_and(count_vabs>=1, va4_csv[:,4:]>=1)
    #flg1 = torch.logical_and(count_vabs>=1, va4_csv[:,4:]>=1)
    #flg2 = torch.logical_and(count_vabs>=2, va4_csv[:,4:]>=2)
    #flg3 = torch.logical_and(count_vabs>=2, va4_csv[:,4:]>=2)

    a_para_mean = torch.where(flg0, a_para_mean, torch.tensor(0.0, device=device))
    a_perp_mean = torch.where(flg1, a_perp_mean, torch.tensor(0.0, device=device))
    a_para_std = torch.where(flg2, a_para_std, torch.tensor(0.0, device=device))
    a_perp_std = torch.where(flg3, a_perp_std, torch.tensor(0.0, device=device))

    d_para_mean = a_para_mean - torch.where(flg0, va4_csv[:,:1], torch.tensor(0.0, device=device))
    d_perp_mean = a_perp_mean - torch.where(flg1, va4_csv[:,1:2], torch.tensor(0.0, device=device))
    d_para_std = a_para_std - torch.where(flg2, va4_csv[:,2:3], torch.tensor(0.0, device=device))
    d_perp_std = a_perp_std - torch.where(flg3, va4_csv[:,3:4], torch.tensor(0.0, device=device))

    a_para_mean_norm = torch.sum(torch.abs(va4_csv[:,:1][flg0]))
    a_perp_mean_norm = torch.sum(torch.abs(va4_csv[:,1:2][flg1]))
    a_para_std_norm = torch.sum(torch.abs(va4_csv[:,2:3][flg2]))
    a_perp_std_norm = torch.sum(torch.abs(va4_csv[:,3:4][flg3]))

    dif_para_mean = torch.sum(torch.abs(d_para_mean[flg0])) / a_para_mean_norm
    dif_perp_mean = torch.sum(torch.abs(d_perp_mean[flg1])) / a_perp_mean_norm
    dif_para_std = torch.sum(torch.abs(d_para_std[flg2])) / a_para_std_norm
    dif_perp_std = torch.sum(torch.abs(d_perp_std[flg3])) / a_perp_std_norm

    #if vabs.requires_grad:
        #a_para_mean.register_hook(lambda grad: print('a_para_mean grad', grad))
        #a_perp_mean.register_hook(lambda grad: print('a_perp_mean grad', grad))
        #a_para_std.register_hook(lambda grad: print('a_para_std grad', grad))
        #a_perp_std.register_hook(lambda grad: print('a_perp_std grad', grad))

        #dif_para_std.backward()
        #d_para_std[flg2][0].backward()

    return dif_para_mean, dif_perp_mean, dif_para_std, dif_perp_std

def eval_function(xy):

    v = calc_velocity(xy)
    a = calc_acceleration(v)

    vabs = torch.norm(v, dim=2)

    v_normalized = nn.functional.normalize(v, dim=2)
    #j_vals = torch.zeros([5])

    j_hist = compare_v_histogram(vabs)
    j_vac = compare_v_autocorrelation(v)
    j_av0, j_av1, j_av2, j_av3 = compare_acceleration(v_normalized[:-1], vabs[:-1], a)

    j_vals = torch.concat((j_hist.view(1), j_vac.view(1),
                           j_av0.view(1), j_av2.view(1), j_av3.view(1)))
    # warning: if a member of concat has NaN grad, others also show NaN grad even if its grad was finite before concat.

    #print('jhist', j_hist)

    #if xy.requires_grad:
        #sumv = torch.sum(v)
        #sumv.backward()
        #j_vals[0].backward()
        #j_av2.backward()

    return j_vals

def treatOuts(x):
    return torch.arctan(6*(x**3))*2/np.pi


In [48]:
class customLoss(nn.Module):
    def __init__(self):
        super().__init__()
        # 初期化処理
        # self.param = ... 

    def forward(self, outputs):
        '''
        outputs: 予測結果(ネットワークの出力)
         targets: 正解
        '''
        # 損失の計算

        J = eval_function(outputs)
        print(J)

        J = J * torch.tensor([1.0, 10.0, 1.0, 1.0, 1.0], device=device)

        #J0.register_hook(lambda grad: print('J_hist back', grad))
        #J1.register_hook(lambda grad: print('J_vac back', grad))
        #J2.register_hook(lambda grad: print('J_va_para_mean back', grad))
        #J3.register_hook(lambda grad: print('J_va_para_sd back', grad))
        #J4.register_hook(lambda grad: print('J_va_perp_sd back', grad))

        return torch.sum(J)

In [49]:
J_csv = eval_function(xy_csv)

print(J_csv)

tensor([0.0000e+00, 0.0000e+00, 0.0000e+00, 1.4556e-16, 1.9564e-16],
       device='cuda:0', dtype=torch.float64)


In [50]:
def printparams(model):
    alpha = model.sde.alpha[0,0].cpu().detach().numpy()
    beta0 = model.sde.beta.layer1.bias[0].cpu().detach().numpy()
    beta1 = model.sde.beta.layer1.weight[0,0].cpu().detach().numpy()
    beta2 = model.sde.beta.layer3.weight[0,0].cpu().detach().numpy()
    gamma = model.sde.gamma[0,0].cpu().detach().numpy()
    sigma0 = model.sde.sigma.layer1.bias[0].cpu().detach().numpy()
    sigma1 = model.sde.sigma.layer1.weight[0,0].cpu().detach().numpy()
    sigma2 = model.sde.sigma.layer3.weight[0,0].cpu().detach().numpy()
    sigmaX = model.sigmaX[0,0,0].cpu().detach().numpy()

    print('alpha: {}, beta0: {}, beta1: {}, beta2: {}, '.format(alpha, beta0, beta1, beta2))
    print('gamma: {}, sigma0: {}, sigma1: {}, sigma2: {}, sigmaX: {}'.format(gamma, sigma0, sigma1, sigma2, sigmaX))


In [None]:
import matplotlib.pyplot as plt
import gc

#datadirName = "/content/drive/MyDrive/trajectories_miura/"

alpha_init = 0.0741
beta0_init = 0.116
beta1_init = 0.0
beta2_init = 0.0
gamma_init = 0.0641
sigma0_init = 0.266
sigma1_init = 0.0
sigma2_init = 0.0
sigmaX_init = 0.155

initIns = {'alpha': alpha_init,
           'beta0': beta0_init,
           'beta1': beta1_init,
           'beta2': beta2_init,
           'gamma': gamma_init,
           'sigma0': sigma0_init,
           'sigma1': sigma1_init,
           'sigma2': sigma2_init,
           'sigmaX': sigmaX_init}

model = moduleSDE(initIns)

printparams(model)

lossfunc = customLoss()

optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)#, weight_decay=5e-4)

loss_log = []

# learnig loop
for epoch in range(200):
    if 'out' in globals():
        del out
    if 'loss' in globals():
        del loss
        
    torch.cuda.empty_cache()
    gc.collect()

    optimizer.zero_grad()

    bm = BrownianInterval(t0=ts[0], 
                          t1=ts[-1], 
                          size=(batch_size, brownian_size),
                          dt=stepSDE,
                          device=device)

    y0 = torch.rand((batch_size,1), device=device)
    y0 = torch.concat((torch.zeros_like(y0), torch.zeros_like(y0),          #x,y = 0,0
                     torch.cos(y0*(2*np.pi)), torch.sin(y0*(2*np.pi)),    #|v| = 1
                     torch.zeros_like(y0), torch.zeros_like(y0)), 1)      #|V| = 0

    rnoise = torch.randn((Nts, batch_size, 2), device=device)

    #with torch.autograd.detect_anomaly():
    out = model(y0, bm, rnoise)
    #print('out', out)
    print('forward done')

    #out.register_hook(lambda grad: print('trajectory back', grad))#(grad != grad).any().item()))

    loss = lossfunc(out)
    #print('loss {}'.format(str(loss)))

    loss.backward()
    optimizer.step()
    
    print('Epoch %d | Loss: %.4f' % (epoch, loss.item()))
    
    loss_log.append(loss.item())

    printparams(model)


alpha: 0.07410000264644623, beta0: 0.11599999666213989, beta1: 0.0, beta2: 0.0, 
gamma: 0.0640999972820282, sigma0: 0.26600000262260437, sigma1: 0.0, sigma2: 0.0, sigmaX: 0.1550000011920929
forward done
tensor([0.0085, 0.0304, 0.4758, 0.5312, 0.3572], device='cuda:0',
       dtype=torch.float64, grad_fn=<CatBackward0>)
sigmaX grad tensor([[[-1.6259]]], device='cuda:0')
sigma2 grad tensor([[-59.7364]], device='cuda:0')
sigma0 grad tensor([-48.5446], device='cuda:0')
sigma1 grad tensor([[-48.2326]], device='cuda:0')
gamma grad tensor([[246.0461]], device='cuda:0')
alpha grad tensor([[-510.3936]], device='cuda:0')
beta2 grad tensor([[611.8146]], device='cuda:0')
beta0 grad tensor([257.2993], device='cuda:0')
beta1 grad tensor([[374.8483]], device='cuda:0')
Epoch 0 | Loss: 1.6772
alpha: 0.07510000467300415, beta0: 0.11499999463558197, beta1: -0.0009999999310821295, beta2: -0.0009999999310821295, 
gamma: 0.06309999525547028, sigma0: 0.2669999897480011, sigma1: 0.0010000000474974513, sigma2:

forward done
tensor([0.0170, 0.1048, 0.5544, 0.4901, 0.3332], device='cuda:0',
       dtype=torch.float64, grad_fn=<CatBackward0>)
sigmaX grad tensor([[[0.9699]]], device='cuda:0')
sigma2 grad tensor([[-73.7192]], device='cuda:0')
sigma0 grad tensor([-60.0082], device='cuda:0')
sigma1 grad tensor([[-57.8827]], device='cuda:0')
gamma grad tensor([[383.8922]], device='cuda:0')
alpha grad tensor([[-757.1809]], device='cuda:0')
beta2 grad tensor([[862.1444]], device='cuda:0')
beta0 grad tensor([361.3021], device='cuda:0')
beta1 grad tensor([[522.0949]], device='cuda:0')
Epoch 10 | Loss: 2.4426
alpha: 0.07490062713623047, beta0: 0.1150565966963768, beta1: 4.415980583871715e-05, beta2: 0.0008641349268145859, 
gamma: 0.06349893659353256, sigma0: 0.2679418921470642, sigma1: 0.000602503539994359, sigma2: -0.00042852171463891864, sigmaX: 0.15291202068328857
forward done
tensor([0.0111, 0.0459, 0.3986, 0.5393, 0.3953], device='cuda:0',
       dtype=torch.float64, grad_fn=<CatBackward0>)
sigmaX gr

forward done
tensor([0.0159, 0.1637, 0.5103, 0.5236, 0.3549], device='cuda:0',
       dtype=torch.float64, grad_fn=<CatBackward0>)
sigmaX grad tensor([[[0.2187]]], device='cuda:0')
sigma2 grad tensor([[-60.1144]], device='cuda:0')
sigma0 grad tensor([-53.9573], device='cuda:0')
sigma1 grad tensor([[-50.1960]], device='cuda:0')
gamma grad tensor([[354.7244]], device='cuda:0')
alpha grad tensor([[-701.6175]], device='cuda:0')
beta2 grad tensor([[760.8355]], device='cuda:0')
beta0 grad tensor([333.9830], device='cuda:0')
beta1 grad tensor([[473.6198]], device='cuda:0')
Epoch 20 | Loss: 3.0419
alpha: 0.07523879408836365, beta0: 0.11450426280498505, beta1: 0.00012730581511277705, beta2: 0.0014610043726861477, 
gamma: 0.06342418491840363, sigma0: 0.2695074677467346, sigma1: 0.0013502816436812282, sigma2: -0.00032069094595499337, sigmaX: 0.1505415141582489
forward done
tensor([0.0097, 0.0345, 0.4786, 0.5317, 0.3620], device='cuda:0',
       dtype=torch.float64, grad_fn=<CatBackward0>)
sigmaX 

forward done
tensor([0.0245, 0.2306, 0.5780, 0.5424, 0.3780], device='cuda:0',
       dtype=torch.float64, grad_fn=<CatBackward0>)
sigmaX grad tensor([[[-1.5553]]], device='cuda:0')
sigma2 grad tensor([[-37.4139]], device='cuda:0')
sigma0 grad tensor([-47.1308], device='cuda:0')
sigma1 grad tensor([[-37.4502]], device='cuda:0')
gamma grad tensor([[358.5782]], device='cuda:0')
alpha grad tensor([[-686.2162]], device='cuda:0')
beta2 grad tensor([[656.1478]], device='cuda:0')
beta0 grad tensor([318.2199], device='cuda:0')
beta1 grad tensor([[429.7277]], device='cuda:0')
Epoch 30 | Loss: 3.8288
alpha: 0.07505779713392258, beta0: 0.11446943879127502, beta1: 0.0004068848502356559, beta2: 0.001975764986127615, 
gamma: 0.06382633745670319, sigma0: 0.2704099118709564, sigma1: 0.001877912669442594, sigma2: -6.554186984430999e-05, sigmaX: 0.15174244344234467
forward done
tensor([0.0104, 0.0281, 0.5187, 0.4614, 0.3255], device='cuda:0',
       dtype=torch.float64, grad_fn=<CatBackward0>)
sigmaX gr

forward done
tensor([0.0100, 0.0304, 0.4837, 0.4990, 0.3663], device='cuda:0',
       dtype=torch.float64, grad_fn=<CatBackward0>)
sigmaX grad tensor([[[-0.2304]]], device='cuda:0')
sigma2 grad tensor([[-10.1342]], device='cuda:0')
sigma0 grad tensor([-5.4110], device='cuda:0')
sigma1 grad tensor([[-6.5924]], device='cuda:0')
gamma grad tensor([[-53.7364]], device='cuda:0')
alpha grad tensor([[73.3772]], device='cuda:0')
beta2 grad tensor([[-41.0929]], device='cuda:0')
beta0 grad tensor([-17.8090], device='cuda:0')
beta1 grad tensor([[-25.2310]], device='cuda:0')
Epoch 40 | Loss: 1.6629
alpha: 0.07553163915872574, beta0: 0.1138174831867218, beta1: 0.00011194160470040515, beta2: 0.001991336466744542, 
gamma: 0.0635385513305664, sigma0: 0.271818608045578, sigma1: 0.002872378332540393, sigma2: 0.0005564770544879138, sigmaX: 0.15313850343227386
forward done
tensor([0.0153, 0.0690, 0.5456, 0.5079, 0.3430], device='cuda:0',
       dtype=torch.float64, grad_fn=<CatBackward0>)
sigmaX grad tens

forward done
tensor([0.0147, 0.0752, 0.5087, 0.5146, 0.3552], device='cuda:0',
       dtype=torch.float64, grad_fn=<CatBackward0>)
sigmaX grad tensor([[[-0.5943]]], device='cuda:0')
sigma2 grad tensor([[-55.6666]], device='cuda:0')
sigma0 grad tensor([-54.1112], device='cuda:0')
sigma1 grad tensor([[-48.0363]], device='cuda:0')
gamma grad tensor([[379.9392]], device='cuda:0')
alpha grad tensor([[-742.4689]], device='cuda:0')
beta2 grad tensor([[800.7454]], device='cuda:0')
beta0 grad tensor([349.3748], device='cuda:0')
beta1 grad tensor([[496.4950]], device='cuda:0')
Epoch 50 | Loss: 2.1454
alpha: 0.07546055316925049, beta0: 0.1136666089296341, beta1: 0.00019933721341658384, beta2: 0.0022715351078659296, 
gamma: 0.06383022665977478, sigma0: 0.27281826734542847, sigma1: 0.003617800073698163, sigma2: 0.001088950433768332, sigmaX: 0.15420202910900116
forward done
tensor([0.0073, 0.0725, 0.4740, 0.4783, 0.3343], device='cuda:0',
       dtype=torch.float64, grad_fn=<CatBackward0>)
sigmaX gr

forward done
tensor([0.0138, 0.1242, 0.5367, 0.4968, 0.3634], device='cuda:0',
       dtype=torch.float64, grad_fn=<CatBackward0>)
sigmaX grad tensor([[[-0.7234]]], device='cuda:0')
sigma2 grad tensor([[82.5785]], device='cuda:0')
sigma0 grad tensor([65.1563], device='cuda:0')
sigma1 grad tensor([[65.0885]], device='cuda:0')
gamma grad tensor([[-471.1457]], device='cuda:0')
alpha grad tensor([[922.1339]], device='cuda:0')
beta2 grad tensor([[-1117.5850]], device='cuda:0')
beta0 grad tensor([-435.0347], device='cuda:0')
beta1 grad tensor([[-656.1675]], device='cuda:0')
Epoch 60 | Loss: 2.6530
alpha: 0.07565483450889587, beta0: 0.11325407028198242, beta1: -3.60832905244024e-06, beta2: 0.002251430181786418, 
gamma: 0.06386087089776993, sigma0: 0.27401554584503174, sigma1: 0.004604792688041925, sigma2: 0.0018682318041101098, sigmaX: 0.15614348649978638
forward done
tensor([0.0140, 0.0320, 0.5641, 0.4785, 0.3506], device='cuda:0',
       dtype=torch.float64, grad_fn=<CatBackward0>)
sigmaX g

forward done
tensor([0.0157, 0.1607, 0.5355, 0.4715, 0.3405], device='cuda:0',
       dtype=torch.float64, grad_fn=<CatBackward0>)
sigmaX grad tensor([[[1.1294]]], device='cuda:0')
sigma2 grad tensor([[79.2512]], device='cuda:0')
sigma0 grad tensor([66.9544], device='cuda:0')
sigma1 grad tensor([[64.3517]], device='cuda:0')
gamma grad tensor([[-531.3256]], device='cuda:0')
alpha grad tensor([[1018.7272]], device='cuda:0')
beta2 grad tensor([[-1263.5509]], device='cuda:0')
beta0 grad tensor([-473.4440], device='cuda:0')
beta1 grad tensor([[-726.7317]], device='cuda:0')
Epoch 70 | Loss: 2.9699
alpha: 0.07600870728492737, beta0: 0.1127241998910904, beta1: -0.00033745658583939075, beta2: 0.0021000842098146677, 
gamma: 0.06369636207818985, sigma0: 0.2751151919364929, sigma1: 0.005468415562063456, sigma2: 0.002524198964238167, sigmaX: 0.15690790116786957
forward done
tensor([0.0251, 0.2256, 0.5298, 0.4727, 0.3297], device='cuda:0',
       dtype=torch.float64, grad_fn=<CatBackward0>)
sigmaX g

In [None]:
print(out)

In [None]:
import datetime

now = datetime.datetime.now()

def extractParams(model):
    alpha = model.sde.alpha[0,0].cpu().detach().numpy()
    beta0 = model.sde.beta.layer1.bias[0].cpu().detach().numpy()
    beta1 = model.sde.beta.layer1.weight[0,0].cpu().detach().numpy()
    beta2 = model.sde.beta.layer3.weight[0,0].cpu().detach().numpy()
    gamma = model.sde.gamma[0,0].cpu().detach().numpy()
    sigma0 = model.sde.sigma.layer1.bias[0].cpu().detach().numpy()
    sigma1 = model.sde.sigma.layer1.weight[0,0].cpu().detach().numpy()
    sigma2 = model.sde.sigma.layer3.weight[0,0].cpu().detach().numpy()
    sigmaX = model.sigmaX[0,0,0].cpu().detach().numpy()

    return {'alpha': alpha, 
            'beta0': beta0,
            'beta1': beta1, 
            'beta2': beta2,
            'gamma': gamma, 
            'sigma0': sigma0,
            'sigma1': sigma1,
            'sigma2': sigma2, 
            'sigmaX': sigmaX}

bestParams = extractParams(model)

np.savez(savedir+'bestParams_' + now.strftime('%Y%m%d_%H%M%S'))