In [4]:
from IPython.core.display import display, HTML
display(HTML("<style>.container { width:100% !important; }</style>"))

In [5]:
import numpy as np
import matplotlib.pyplot as plt
import time
import scipy.io
import matplotlib.gridspec as gridspec
from mpl_toolkits.axes_grid1 import make_axes_locatable
from scipy.interpolate import griddata

import torch
from torch import nn
import torch.optim as optim
from torch.optim import lr_scheduler

import argparse
import random
import os
import math

os.environ["CUDA_VISIBLE_DEVICES"] = "1"

In [6]:
# CUDA support
if torch.cuda.is_available():
    device = torch.device('cuda')
else:
    device = torch.device('cpu')

In [7]:
def seed_torch(seed):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed) # 为了禁止hash随机化，使得实验可复现
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.cuda.manual_seed_all(seed) # if you are using multi-GPU.
    torch.backends.cudnn.benchmark = False
    torch.backends.cudnn.deterministic = True
    random.seed(seed)

def grad(outputs, inputs):
    """ compute the derivative of outputs associated with inputs

    Params
    ======
    outputs: (N, 1) tensor
    inputs: (N, D) tensor
    """
    return torch.autograd.grad(outputs, inputs,
                               grad_outputs=torch.ones_like(outputs),
                               create_graph=True)

def activation(name):
    if name in ['tanh', 'Tanh']:
        return nn.Tanh()
    elif name in ['relu', 'ReLU']:
        return nn.ReLU(inplace=True)
    elif name in ['leaky_relu', 'LeakyReLU']:
        return nn.LeakyReLU(inplace=True)
    elif name in ['sigmoid', 'Sigmoid']:
        return nn.Sigmoid()
    elif name in ['softplus', 'Softplus']:
        return nn.Softplus()
    else:
        raise ValueError(f'unknown activation function: {name}')

In [8]:
seed_torch(24)

# 网络模型

In [9]:
class MLP(nn.Module):
    """Deep Neural Network"""

    def __init__(self, L, M, dim_hidden, hidden_layers, dim_out,
                 act_name='tanh', init_name='xavier_normal'):
        super().__init__()
        
        dim_in = M * 2 + 2
        
        model = nn.Sequential()
        
        model.add_module('fc0', nn.Linear(dim_in, dim_hidden, bias=True))
        model.add_module('act0', activation(act_name))
        for i in range(1, hidden_layers):
            model.add_module(f'fc{i}', nn.Linear(dim_hidden, dim_hidden, bias=True))
            model.add_module(f'act{i}', activation(act_name))
        model.add_module(f'fc{hidden_layers}', nn.Linear(dim_hidden, dim_out, bias=True))
            
        self.model = model
        
        self.L = L
        self.M = M
        
        if init_name is not None:
            self.init_weight(init_name)

            
        self.k = nn.Parameter(torch.arange(1, self.M+1), requires_grad=False)
                    
    def init_weight(self, name):
        if name == 'xavier_normal':
            nn_init = nn.init.xavier_normal_
        elif name == 'xavier_uniform':
            nn_init = nn.init.xavier_uniform_
        elif name == 'kaiming_normal':
            nn_init = nn.init.kaiming_normal_
        elif name == 'kaiming_uniform':
            nn_init = nn.init.kaiming_uniform_
        else:
            raise ValueError(f'unknown initialization function: {name}')

        for param in self.parameters():
            if len(param.shape) > 1:
                nn_init(param)
                
#         for layer, param in enumerate(self.parameters()):
#             if layer % 2 == 1:
#                 nn.init.constant_(param, 0.0)
#             else:
#                 nn_init(param)
                
    def input_encoding(self, t, x):
        w = 2.0 * math.pi / self.L
        out = torch.cat([t, torch.ones_like(t), 
                            torch.cos(self.k * w * x), torch.sin(self.k * w * x)], dim = 1) 
        
        return out    
            
    def forward(self, H):
        t = H[:, 0:1]
        x = H[:, 1:2]
        
        H = self.input_encoding(t, x)
        H = self.model(H)
        
        return H
    
    def forward_test(self, x):
        print(f"{'input':<20}{str(x.shape):<40}")
        for name, module in self.model._modules.items():
            x = module(x)
            print(f"{name:<20}{str(x.shape):<40}")
        return x

    def model_size(self):
        n_params = 0
        for param in self.parameters():
            n_params += param.numel()
        return n_params
    
    def print(self):
        print(self.bias)

In [10]:
model = MLP(L=2.0, M=10, dim_hidden=128, hidden_layers=4, dim_out=1)

# PDE 部分

KDV方程：
$$
\left\{\begin{matrix}
&u_{t}+ \lambda_1 u u_{x} + \lambda_2 u_{xxx}=0  ,(t,x)\in (0,1)\times (-1,1),  \\
&u(t,-1)=u(t,1),t\in [0,1],  \\
&u_x(t,-1)=u_x(t,1),x\in [0,1].
\end{matrix}\right.
$$
$\lambda_1 = 1$, $\lambda_2 = 1$.

真解设置为:
$$u(x, t) = 12  \frac{ k_1^2 e^{\theta_1}  + k_2^2 e^{\theta_2} + 2 (k_2 - k_1)^2 e^{\theta_1 + \theta_2} + a^2 ( k_2^2 e^{\theta_1} + k_1^2 e^{\theta_2})e^{\theta_1 + \theta_2}}{(1 + e^{\theta_1} + e^{\theta_2} + a^2 e^{\theta_1 + \theta_2})^2}$$

$ k_1 = 0.4$, $k_2 = 0.6$, $a^2 = \left (  \frac{ k_1 - k_2 }{k_1 + k_2 } \right )^2 = \frac{1}{25}$, 


$\theta_1 = k_1 x - k_1^3 t + x_1 $, $\theta_2 = k_2 x - k_2^3 t + x_2$, 


$ x_1 = 4$, $x_2 = 15$. 

# Options

In [11]:
import argparse
class Options_KDV(object):
    def __init__(self):
        parser = argparse.ArgumentParser()
        parser.add_argument('--no_cuda', action='store_true', default=False, help='disable CUDA or not')
        parser.add_argument('--dim_hidden', type=int, default=12, help='neurons in hidden layers')     # 10 9
        parser.add_argument('--hidden_layers', type=int, default=9, help='number of hidden layers')    # 4  20
        parser.add_argument('--lam', type=float, default=1, help='weight in loss function')
        parser.add_argument('--lr', type=float, default=0.001, help='initial learning rate')
        parser.add_argument('--epochs_Adam', type=int, default=600000, help='epochs for Adam optimizer')
        parser.add_argument('--epochs_LBFGS', type=int, default=2500, help='epochs for LBFGS optimizer')
        parser.add_argument('--newton_iter', type=int, default=100, help='newton_iter for LBFGS optimizer')
        parser.add_argument('--step_size', type=int, default=10000, help='step size in lr_scheduler for Adam optimizer')
        parser.add_argument('--gamma', type=float, default=0.9, help='gamma in lr_scheduler for Adam optimizer')
        parser.add_argument('--tol', type=float, default=100, help='the annealing scheme')
        parser.add_argument('--resume', type=bool, default=False, help='resume or not')
        parser.add_argument('--sample_method', type=str, default='uniform', help='sample method')

        self.parser = parser

    def parse(self):
        arg = self.parser.parse_args(args=[])
        arg.load_model = False
        arg.cuda = not arg.no_cuda and torch.cuda.is_available()
        # arg.cuda = False
        arg.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
        print(arg.cuda)
        print(arg.device)
        return arg

args = Options_KDV().parse()
print(args.hidden_layers)

def save_model(state, is_best=None, save_dir=None):
    last_model = os.path.join(save_dir, 'last_model.pth.tar')
    torch.save(state, last_model)
    if is_best:
        best_model = os.path.join(save_dir, 'best_model.pth.tar')
        shutil.copyfile(last_model, best_model)

True
cuda
9


In [12]:
model.to(device)
args.model=model

In [13]:
# 真解
def u(X):
    if X.ndim == 2 and X.shape[1] != 1:
        x = X[:, 1]
        t = X[:, 0]
    else :
        x = X
        t = np.zeros_like(x)
    k1, k2 = 0.4, 0.6
    a2 = 1./25
    x1, x2 = 4, 15
    theta1 = k1*x - k1**3 * t + x1
    theta2 = k2*x - k2**3 * t + x2
    up = 12*(k1 ** 2 * np.exp(theta1) + 
           k2 ** 2 * np.exp(theta2) + 
           2 * (k2 - k1) **2 * np.exp(theta1 + theta2)
           + a2 * (k2 ** 2 * np.exp(theta1) + k1**2 * np.exp(theta2)) * np.exp(theta1 + theta2))
    down = (1 + np.exp(theta1) + np.exp(theta2) + a2 * np.exp(theta1 + theta2)) ** 2
    u = up/down
    return u

# 数据集生成

In [17]:
class Trainset_KDV():
    '''
    时间上[0, 120] 划分1200段
    空间上[-40, 40] 取出4000个高斯点
    
    '''
    def __init__(self, *args):
        self.args = args
        self.shape = (self.args[0], self.args[1])
        
    def __call__(self):
        return self.data()
    
    def data(self):
        # 为了生成高斯积分点，n_x一定要是4的倍数
        gp = np.array([0.8611363116,-0.8611363116,0.3399810436,-0.3399810436])
        gc = np.array([0.3478548451,0.3478548451,0.6521451549,0.6521451549])
        n_t = self.args[0]
        n_x = self.args[1]
        n_ics = self.args[2]
        
        # 生成初值训练点
        t = np.linspace(0, 120, n_t)
        x = np.linspace(-40, 40, n_x)
        x, t = np.meshgrid(x, t)
        tx = np.hstack((t.reshape(-1,1), x.reshape(-1,1)))

        t_ics = np.zeros(n_ics)
        x_ics = np.linspace(-1, 1, n_ics)
        tx_ics = np.hstack([t_ics.reshape(-1,1),x_ics.reshape(-1,1)])
        
        u_ics = u(tx_ics)
        u_ics = u_ics.reshape(-1,1)
        M = np.triu(np.ones([n_t, n_t]),k=1).T
        
        # 生成tgx(高斯训练点)
        num_cell = int(n_x/4 + 1)
        l = np.linspace(-1, 1, num_cell)[:, None]
        l = np.hstack([l[:-1], l[1:]])
        c = (l[:, 1] - l[:, 0])/2
        c = c[:, None]
        gp = gp[None, :]
        d = ((l[:, 1] + l[:, 0])/2)
        d = d[:, None]
        n_p = c * gp + d  
        n_p = n_p.reshape(n_x, 1)
        gcl = c * gc      
        gcl = gcl.reshape(n_x, 1)
        t = np.linspace(0, 1, n_t)[:, None]
        x, t = np.meshgrid(n_p, t) 
        print(x.shape)
        txg = np.hstack([t.reshape(-1, 1), x.reshape(-1, 1)])
        # 计算一下初始能量
        E2 = u_ics ** 2 * gcl
        E2 = np.sum(E2)
        print(E2)
        
        tx = torch.from_numpy(tx).float().to(device)
        txg = torch.from_numpy(txg).float().to(device)
        
        tx_ics = torch.from_numpy(tx_ics).float().to(device)
        u_ics = torch.from_numpy(u_ics).float().to(device)
        M = torch.from_numpy(M).float().to(device)
        
        return tx, tx_ics, u_ics, M, gcl, txg, E2

In [18]:
nt = 1200
nx = 4000
n_ics = 4000

In [19]:
# tgx是高斯点的数据集， shape :(12800, 2) 第一列代表时间，第二列代表空间 ，前100个点表示t = 0 时刻上的采样点，第101个到200表示t = 1/nt 时刻的采样点， 依次类推
trainset = Trainset_KDV(nt, nx, n_ics)
args.trainset = trainset
tx, tx_ics, u_ics, M, gcl, txg, E2 = trainset()  # E2 是初始时刻的平方积分
trainset

(1200, 4000)
0.3393000694887256


In [28]:
class Trainer_Wave(object):
    def __init__(self, args):
        self.model = args.model
        self.lr = args.lr
        self.gamma = args.gamma
        self.trainset = args.trainset
        self.step_size = args.step_size
        self.model_name = self.model.__class__.__name__     
        self.optimizer_Adam = optim.Adam(self.model.parameters(), lr=self.lr, betas=(0.9, 0.999))
        self.scheduler = lr_scheduler.ExponentialLR(self.optimizer_Adam, gamma=self.gamma)
        self.epochs_Adam = args.epochs_Adam
        self.tol = args.tol
        
        # data
        self.tx, self.tx_ics, self.u_ics, self.M,  self.gcl, self.txg, self.E2 = self.trainset()
        self.gcl = torch.from_numpy(self.gcl).float().to(device)
        self.E2 = torch.tensor(self.E2).float().to(device)
        
        # Logger
        self.loss_log = []
        self.loss_ics_log = []
        self.loss_res_log = []
        self.W_log = []
        self.L_t_log = []
        self.epoch_log = []
        

    def net_r(self, tx):
        tx.requires_grad_(True).to(device)
        u = self.model(tx)
        grad_u = grad(u, tx)[0]
        u_t = grad_u[:,[0]]
        u_x = grad_u[:,[1]]
        u_xx = grad(u_x, tx)[0][:, [1]]
        u_xxx = grad(u_xx, tx)[0][:, [1]]

        residual = u_t  + u * u_x + 0.0025 * u_xxx

        return residual

    def net_u(self, tx):
        u = self.model(tx)  
        return u
    
    def residuals_and_weights(self):
        r_pred = self.net_r(self.txg) 
        r_pred = r_pred.reshape(100, 128) # :shape(100,100)
        
        q_pred = self.net_u(self.txg)
        q_pred = q_pred.reshape(100, 128)
        #print(q_pred.shape)
        q_pred = q_pred ** 2 * self.gcl
        q_pred = torch.sum(q_pred, axis=0)
        q_pred = (q_pred - self.E2) ** 2
        #print(q_pred.shape)
        #print("q:",torch.sum(q_pred))
        #print(q_pred.shape, min(q_pred). max(q_pred))
        L_t = torch.mean(r_pred**2, axis=0) 
        L_t = L_t + 10 * q_pred
        
        W = torch.exp(-self.tol * (self.M @ L_t.detach()))
        
        return L_t, W
    
    def loss_ics(self):
        u_pred = self.net_u(self.tx_ics)
        loss_ics = torch.mean((u_pred - self.u_ics)**2)
        return loss_ics
    
    def loss_res(self):
        r_pred = self.net_r(self.tx)
        loss_r = torch.mean(r_pred**2)
        return loss_r
    
    def loss(self):
        L0 = 100 * self.loss_ics()
        L_t, W = self.residuals_and_weights()
        loss = torch.mean(W * L_t) + L0        
        return loss
    
    def train(self):
        start = time.time()
        
        for epoch in range(self.epochs_Adam):
            self.optimizer_Adam.zero_grad()
            loss_value = self.loss()
            loss_value.backward()
            self.optimizer_Adam.step()
            
            if (epoch+1) % self.step_size == 0:
                self.scheduler.step()            
            
            if epoch % 1000 == 0:
                loss_value = self.loss()
                loss_ics_value = self.loss_ics()
                loss_res_value = self.loss_res()
                
                L_t_value, W_value = self.residuals_and_weights()
                
                self.loss_log.append(loss_value.detach().cpu())
                self.loss_ics_log.append(loss_ics_value.detach().cpu())
                self.loss_res_log.append(loss_res_value.detach().cpu())
                self.W_log.append(W_value.detach().cpu())
                self.L_t_log.append(L_t_value.detach().cpu())
                self.epoch_log.append(epoch)
                
                end = time.time()
                running_time = end - start
                start = time.time()
                
                print(f'Epoch #  {epoch}/{self.epochs_Adam}' + f'    time:{running_time:.2f}' + '\n' + \
                      f'loss:{loss_value:.2e}, loss_ics:{loss_ics_value:.2e}, loss_res:{loss_res_value:.2e},')

In [30]:
trainer = Trainer_Wave(args)
trainer.train()

trainset.shape (400, 1200)
(400, 1200) (400, 1200)


RuntimeError: CUDA out of memory. Tried to allocate 236.00 MiB (GPU 0; 10.76 GiB total capacity; 3.02 GiB already allocated; 71.56 MiB free; 3.06 GiB reserved in total by PyTorch)