# Improve on the deep learning appraoch

## Load model and dataset

In [None]:
import pandas as pd
import torch
import torch.nn as nn
from torch.utils.data import DataLoader, Dataset
import torch.optim as optim

In [None]:
## Code to reload the model
net = Net()

# Load the model
net.load_state_dict(torch.load('lorenz_model_win.pth'))

# New data generation and additional penalty

We choose an adaptive approach to enhance our model with new data: After a training phase, we consider the datapoints used as an estimation of the decision boundary, which are the points with probability predicted close to 0.5. We create new datapoints by adding small perturbations to such points with the following scheme:
1. For such a datapoint $x=(x_{0},y_{0},z_{0})$, we create $y=(x_{0}+\delta_{0},y_{0}+\delta_{1},z_{0}+\delta_{2})$, where $\delta_{i} \sim \mathbb{N}(0, \sigma^{2})$ is a Gaussian noise, and the variance $\sigma^{2}$ is positively related with the probability of $x$.
2. We add the created datapoints to the dataset and train again.

Furthermore, we expect that at the basin boundary, the direction of the dynamical system should be perpendicular to the gradient vector of the level set $f(x)=0$, we add an penalty for violation.

Let the direction of the dynamical system at point $\mathbf{x}$ be $b(\mathbf{x})$, then we expect that $p = \nabla f(\mathbf{x})^{T}b(\mathbf{x})=0$. We add penalty for violation:

1. $\log{p}$ (logarithm penalty)
2. $p^{k}$ (polynomial penalty, we try linear, quadratic and cubic)
3. $e^{p}$ (exponential penalty)

Remark: When we compute $p$, both vectors are normalized as we are interested with the angular violation. If we do not normalize both vectors, $p$ may be large even if two vectors are almost perpendicular, as long as one of them has a large norm.

In [None]:
def data_perturbation(data, probability):
    diff = abs(probability - 0.5)
    k = 2 * diff ## factor 2 is a hyperparameter
    noise = torch.randn_like(data) * torch.sqrt(torch.tensor(k))
    perturbed_data = data + noise
    return perturbed_data

In [None]:
class CustomLossLinear(nn.Module):
    def __init__(self):
        super(CustomLoss, self).__init__()
        self.bce_loss = nn.BCELoss()

    def forward(self, net, x, y, v):
        # Compute the output and the BCE loss
        out = net(x)
        bce_loss = self.bce_loss(out, y)

        # Compute the gradient of the output with respect to the input
        out.backward(torch.ones_like(out), retain_graph=True)
        grad = x.grad
        # Normalize the gradient
        grad_norm = grad / grad.norm()

        # Compute the inner product of the normalized gradient and v
        inner_product = torch.abs(torch.dot(grad_norm.view(-1), v.view(-1)))

        # The final loss is the sum of the BCE loss and the inner product
        loss = bce_loss + inner_product

        return loss
    
class CustomLossQuadratic(nn.Module):
    def __init__(self):
        super(CustomLoss, self).__init__()
        self.bce_loss = nn.BCELoss()

    def forward(self, net, x, y, v):
        # Compute the output and the BCE loss
        out = net(x)
        bce_loss = self.bce_loss(out, y)

        # Compute the gradient of the output with respect to the input
        out.backward(torch.ones_like(out), retain_graph=True)
        grad = x.grad
        # Normalize the gradient
        grad_norm = grad / grad.norm()

        # Compute the inner product of the normalized gradient and v
        inner_product = torch.abs(torch.dot(grad_norm.view(-1), v.view(-1)))

        # The final loss is the sum of the BCE loss and the inner product
        loss = bce_loss + inner_product**2

        return loss
    
class CustomLossLinear(nn.Module):
    def __init__(self):
        super(CustomLoss, self).__init__()
        self.bce_loss = nn.BCELoss()

    def forward(self, net, x, y, v):
        # Compute the output and the BCE loss
        out = net(x)
        bce_loss = self.bce_loss(out, y)

        # Compute the gradient of the output with respect to the input
        out.backward(torch.ones_like(out), retain_graph=True)
        grad = x.grad
        # Normalize the gradient
        grad_norm = grad / grad.norm()

        # Compute the inner product of the normalized gradient and v
        inner_product = torch.abs(torch.dot(grad_norm.view(-1), v.view(-1)))

        # The final loss is the sum of the BCE loss and the inner product
        loss = bce_loss + inner_product

        return loss
    
class CustomLossLinear(nn.Module):
    def __init__(self):
        super(CustomLoss, self).__init__()
        self.bce_loss = nn.BCELoss()

    def forward(self, net, x, y, v):
        # Compute the output and the BCE loss
        out = net(x)
        bce_loss = self.bce_loss(out, y)

        # Compute the gradient of the output with respect to the input
        out.backward(torch.ones_like(out), retain_graph=True)
        grad = x.grad
        # Normalize the gradient
        grad_norm = grad / grad.norm()

        # Compute the inner product of the normalized gradient and v
        inner_product = torch.abs(torch.dot(grad_norm.view(-1), v.view(-1)))

        # The final loss is the sum of the BCE loss and the inner product
        loss = bce_loss + inner_product

        return loss