In [1]:
import torch
from torch import Tensor                  
import torch.nn as nn                     

import matplotlib.pyplot as plt

import numpy as np
import time

torch.set_default_tensor_type(torch.DoubleTensor)

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
from tqdm import tqdm
print(device) 

cuda


In [2]:
class SNLE(nn.Module):
    
    def __init__(self,layers):
        super().__init__() 
              
        self.activation = nn.Tanh()
        self.loss_function = nn.MSELoss(reduction ='mean')
        self.linears = nn.ModuleList([nn.Linear(layers[i], layers[i+1]) for i in range(len(layers)-1)]).to(device)
        
        for i in range(len(layers)-1):
            nn.init.xavier_normal_(self.linears[i].weight.data, gain=1.0)
            nn.init.zeros_(self.linears[i].bias.data)
            
    def forward(self,x):
            if torch.is_tensor(x) !=True:
                x= torch.from_numpy(x).to(device)
            sigma = x.type(torch.DoubleTensor).to(device)
            for i in range(len(layers)-2):
                z = self.linears[i](sigma)
                sigma = self.activation(z)
            sigma = self.linears[-1](sigma)
            return sigma
    
    #Modify the loss function as per the problem
    def loss_func(self, x_train):        
             
        g = x_train.clone()
                        
        g.requires_grad = True
        
        u = self.forward(g)
        eqn1,eqn2 = eqn(u,x_train)
        loss_f1 = self.loss_function(eqn1,torch.tensor(0).type(torch.DoubleTensor).to(device))
        loss_f2 = self.loss_function(eqn2,torch.tensor(0).type(torch.DoubleTensor).to(device))
        return loss_f1+loss_f2 
    
    def closure(self,steps,eps=1e-8,lr=1e-2,show=True):
            start = time.time()
            optimizer = torch.optim.Adam(inn.parameters(),lr=lr)
            for i in tqdm(range(steps)):
                loss = self.loss_func(x_train)
                self.mse = loss
                optimizer.zero_grad()
                loss.backward()
                optimizer.step()
                #Learning rate scheduling. It performs better using this even for Adam.
                if i%(steps/4)==0:
                    lr=lr/5
                    optimizer = torch.optim.Adam(inn.parameters(),lr=lr)
                    if show==True:
                        with torch.no_grad():
                            print('Iter: ',i,'Loss: ',loss.detach().cpu().numpy(),' lr: ',lr)
                if self.mse<=eps:
                    print('Converged !')
                    break
            print('MSE Loss: ',float(self.mse.detach().cpu()))
            print('total time: ',time.time()-start)

# Equations :
$$x^3+ay^2+b=0\\y^3+cx+d=0$$ <br>We aim to find roots (x,y) as a function of a,b,c,d

In [3]:
def eqn(u,x_train):
    x = u[:,[0]]
    y= u[:,[1]]
    a=x_train[:,[0]]
    b=x_train[:,[1]]
    c=x_train[:,[2]]
    d=x_train[:,[3]]
    return (x**3+a*y**2+b),(y**3+c*x+d)

In [4]:
a = np.linspace(-1,1,50)
b = np.linspace(-1,1,50)
c = np.linspace(-1,1,50)
d = np.linspace(-1,1,50)

A,B,C,D = np.meshgrid(a,b,c,d)
x_train = torch.from_numpy(np.hstack(( A.flatten()[:,None],B.flatten()[:,None],
                                      C.flatten()[:,None],
                                      D.flatten()[:,None]))).to(device)
x_train.shape

torch.Size([6250000, 4])

In [5]:
steps=10000
layers = np.array([4,50,50,50,50,2])
inn = SNLE(layers)
inn.to(device)
inn.closure(steps=steps,show=True,lr=0.1)

  return F.mse_loss(input, target, reduction=self.reduction)
  0%|                                       | 1/10000 [00:00<2:18:02,  1.21it/s]

Iter:  0 Loss:  0.6410734004533946  lr:  0.02


 25%|█████████                           | 2501/10000 [27:30<1:22:29,  1.52it/s]

Iter:  2500 Loss:  0.48756017135449436  lr:  0.004


 50%|███████████████████                   | 5001/10000 [54:59<55:06,  1.51it/s]

Iter:  5000 Loss:  0.001651383027345114  lr:  0.0008


 75%|███████████████████████████         | 7501/10000 [1:22:32<27:19,  1.52it/s]

Iter:  7500 Loss:  0.0009994071354628578  lr:  0.00016


100%|███████████████████████████████████| 10000/10000 [1:50:02<00:00,  1.51it/s]

MSE Loss:  0.0007202511308959526
total time:  6602.348175048828





In [6]:
test=np.array([[0.5,0.2,0.6,0.9],[0.4,-0.6,0.2,-0.8],[0.2,0.8,0.5,-0.6],[-0.5,0.3,0.8,-0.2]])[None,:]
print(inn(test))#roots (x,y)

tensor([[[-0.7830, -0.7431],
         [ 0.6653,  0.8684],
         [-1.0024,  1.0326],
         [-0.2743,  0.7591]]], device='cuda:0', grad_fn=<ViewBackward0>)
