In [4]:
import torch
from torch import Tensor                  
import torch.nn as nn                     

import matplotlib.pyplot as plt

import numpy as np
import time

torch.set_default_tensor_type(torch.DoubleTensor)

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
from tqdm import tqdm
print(device) 

cuda


In [21]:
class NNR(nn.Module):
    
    def __init__(self,layers):
        super().__init__() 
              
        self.activation = nn.Tanh()
        self.loss_function = nn.MSELoss(reduction ='mean')
        self.linears = nn.ModuleList([nn.Linear(layers[i], layers[i+1]) for i in range(len(layers)-1)]).to(device)
        
        for i in range(len(layers)-1):
            nn.init.xavier_normal_(self.linears[i].weight.data, gain=1.0)
            nn.init.zeros_(self.linears[i].bias.data)
            
    def forward(self,x):
            if torch.is_tensor(x) !=True:
                x= torch.from_numpy(x).to(device)
            sigma = x.type(torch.DoubleTensor).to(device)
            for i in range(len(layers)-2):
                z = self.linears[i](sigma)
                sigma = self.activation(z)
            sigma = self.linears[-1](sigma)
            return sigma
    
    #Modify the loss function as per the problem
    def loss_func(self, x_train):                     
        g = x_train.clone()
                        
        g.requires_grad = True
        
        u = self.forward(g)
        loss_f = self.loss_function(eqn(u,x_train),torch.tensor(0).type(torch.DoubleTensor).to(device))   
        return loss_f
                                           
    def closure(self,steps,eps=1e-8,lr=1e-2,show=True):
            start = time.time()
            optimizer = torch.optim.Adam(self.parameters(),lr=lr)
            for i in tqdm(range(steps)):
                loss = self.loss_func(x_train)
                self.mse = loss
                optimizer.zero_grad()
                loss.backward()
                optimizer.step()
                #Learning rate scheduling. It performs better using this even for Adam.
                if i%(steps/4)==0:
                    lr=lr/5
                    optimizer = torch.optim.Adam(self.parameters(),lr=lr)
                    if show==True:
                        with torch.no_grad():
                            print('Iter: ',i,'Loss: ',loss.detach().cpu().numpy(),' lr: ',lr)
                if self.mse<=eps:
                    print('Converged !')
                    break
            print('MSE Loss: ',float(self.mse.detach().cpu()))
            print('total time: ',time.time()-start)

# Roots of Polynomials

In [12]:
def eqn(u,x_train):
    n=2
    pol=torch.pow(u,n)
    for i in range(n):
        pol=pol+x_train[:,[i]]*torch.pow(u,n-i-1)
    return pol

# Quadratic

In [13]:
# Make input data
b = np.linspace(0,10,100)
c = np.linspace(-5,0,100)
B,C = np.meshgrid(b,c)
x_train = torch.from_numpy(np.hstack(( B.flatten()[:,None],C.flatten()[:,None]))).to(device)
x_train.shape

torch.Size([10000, 2])

In [14]:
steps=5000
layers = np.array([2,20,20,20,20,20,20,1])
Root_NN2 = NNR(layers)
Root_NN2.to(device)
Root_NN2.closure(steps=steps,show=True)

  return F.mse_loss(input, target, reduction=self.reduction)
  1%|▎                                       | 32/5000 [00:00<00:31, 156.76it/s]

Iter:  0 Loss:  28.7037512120739  lr:  0.002


 25%|█████████▋                            | 1273/5000 [00:07<00:22, 165.68it/s]

Iter:  1250 Loss:  0.0007803828584463848  lr:  0.0004


 51%|███████████████████▎                  | 2534/5000 [00:15<00:14, 165.84it/s]

Iter:  2500 Loss:  0.00022263017886484235  lr:  8e-05


 76%|████████████████████████████▊         | 3784/5000 [00:22<00:06, 186.20it/s]

Iter:  3750 Loss:  0.0001253972708585716  lr:  1.6000000000000003e-05


100%|██████████████████████████████████████| 5000/5000 [00:29<00:00, 167.84it/s]

MSE Loss:  8.767221268003662e-05
total time:  29.792914628982544





In [16]:
test=np.array([1,-2])[None,:]
print(Root_NN2(test))

tensor([[0.9985]], device='cuda:0', grad_fn=<AddmmBackward0>)


In [15]:
def roots(b,c):
    y1=(-b+np.sqrt(b*b-4*c))/2
    y2=(-b-np.sqrt(b*b-4*c))/2
    return [y1,y2]

In [16]:
b_test = np.linspace(1,6,6)
c_test = np.linspace(-4,-1,4)
B,C = np.meshgrid(b_test,c_test)
x_test = torch.from_numpy(np.hstack(( B.flatten()[:,None],C.flatten()[:,None]))).to(device)

In [27]:
y_test=Root_NN2(x_test).detach().cpu().numpy()
r = []
for i in range(len(x_test)):
    r.append(roots(x_test[i][0].detach().cpu().numpy(),x_test[i][1].detach().cpu().numpy()))
r

[[1.5615528128088303, -2.5615528128088303],
 [1.2360679774997898, -3.23606797749979],
 [1.0, -4.0],
 [0.8284271247461903, -4.82842712474619],
 [0.7015621187164243, -5.701562118716424],
 [0.6055512754639891, -6.60555127546399],
 [1.3027756377319946, -2.302775637731995],
 [1.0, -3.0],
 [0.7912878474779199, -3.79128784747792],
 [0.6457513110645907, -4.645751311064591],
 [0.5413812651491097, -5.541381265149109],
 [0.4641016151377544, -6.464101615137754],
 [1.0, -2.0],
 [0.7320508075688772, -2.732050807568877],
 [0.5615528128088303, -3.5615528128088303],
 [0.4494897427831779, -4.449489742783178],
 [0.3722813232690143, -5.372281323269014],
 [0.3166247903553998, -6.3166247903554],
 [0.6180339887498949, -1.618033988749895],
 [0.41421356237309515, -2.414213562373095],
 [0.30277563773199456, -3.302775637731995],
 [0.2360679774997898, -4.23606797749979],
 [0.19258240356725187, -5.192582403567252],
 [0.16227766016837952, -6.16227766016838]]

In [28]:
y_test

array([[1.56071533],
       [1.23653367],
       [0.99937612],
       [0.8277143 ],
       [0.70147704],
       [0.60648932],
       [1.30040095],
       [1.00081443],
       [0.78999625],
       [0.64562202],
       [0.54239672],
       [0.46404594],
       [1.00314632],
       [0.73346697],
       [0.56053545],
       [0.44942637],
       [0.37165446],
       [0.31601662],
       [0.61711061],
       [0.41211162],
       [0.30429733],
       [0.23738375],
       [0.19328428],
       [0.1626472 ]])

# Qintic

In [7]:
def eqn(u,x_train):
    n=5
    pol=torch.pow(u,n)
    for i in range(n):
        pol=pol+x_train[:,[i]]*torch.pow(u,n-i-1)
    return pol

In [12]:
b = np.linspace(0,1,20)
c = np.linspace(0,1,20)
d = np.linspace(0,1,20)
e = np.linspace(0,1,20)
f = np.linspace(0,1,20)

B,C,D,E,F = np.meshgrid(b,c,d,e,f)
x_train = torch.from_numpy(np.hstack(( B.flatten()[:,None],C.flatten()[:,None],
                                      D.flatten()[:,None],E.flatten()[:,None],F.flatten()[:,None]))).to(device)
x_train.shape

torch.Size([3200000, 5])

In [13]:
steps=5000
layers = np.array([5,20,20,20,20,20,20,1])
Root_NN = NNR(layers)
Root_NN.to(device)
Root_NN.closure(steps=steps,show=True)

  return F.mse_loss(input, target, reduction=self.reduction)
  0%|                                          | 1/5000 [00:00<34:44,  2.40it/s]

Iter:  0 Loss:  0.3832866064595788  lr:  0.002


 25%|█████████▊                             | 1251/5000 [07:44<23:10,  2.70it/s]

Iter:  1250 Loss:  0.0010456006389888508  lr:  0.0004


 50%|███████████████████▌                   | 2501/5000 [15:27<15:26,  2.70it/s]

Iter:  2500 Loss:  0.00040494094281800483  lr:  8e-05


 75%|█████████████████████████████▎         | 3751/5000 [23:13<07:47,  2.67it/s]

Iter:  3750 Loss:  0.00025406713089186773  lr:  1.6000000000000003e-05


100%|███████████████████████████████████████| 5000/5000 [31:00<00:00,  2.69it/s]

MSE Loss:  0.0002025833228648703
total time:  1860.3010640144348





In [14]:
test=np.array([0.5,0.5,0.5,0.5,0.5])[None,:]
print(Root_NN(test))

tensor([[-0.8233]], device='cuda:0', grad_fn=<AddmmBackward0>)


In [32]:
b_test = np.linspace(0.1,0.8,5)
c_test = np.linspace(0.1,0.8,5)
d_test = np.linspace(0.1,0.8,5)
e_test = np.linspace(0.1,0.8,5)
f_test = np.linspace(0.1,0.8,5)

B_test,C_test,D_test,E_test,F_test = np.meshgrid(b_test,c_test,d_test,e_test,f_test)
x_test = torch.from_numpy(np.hstack(( B_test.flatten()[:,None],C_test.flatten()[:,None],
                                      D_test.flatten()[:,None],E_test.flatten()[:,None],
                                      F_test.flatten()[:,None]))).to(device)
x_test.shape

torch.Size([3125, 5])

In [40]:
test = x_test[np.random.randint(3125,size=6),:]
print(test)

tensor([[0.4500, 0.2750, 0.1000, 0.6250, 0.4500],
        [0.2750, 0.1000, 0.6250, 0.4500, 0.2750],
        [0.4500, 0.2750, 0.4500, 0.2750, 0.8000],
        [0.8000, 0.6250, 0.6250, 0.4500, 0.2750],
        [0.4500, 0.4500, 0.2750, 0.6250, 0.6250],
        [0.2750, 0.4500, 0.1000, 0.6250, 0.6250]], device='cuda:0')


In [50]:
for i in range(6):
    print("%1.6f"%Root_NN(test).detach().cpu().numpy()[i])

-0.628680
-0.837760
-1.040247
-0.793006
-0.765022
-0.690475


# Roots of Trancendental Equation:
$$x\sin(x)=b$$

In [7]:
def eqn(u,x_train):
    b=x_train[:,[0]]
    return u*torch.sin(u)-b

In [8]:
# Make input data
x_train = torch.linspace(1,3,10000)[:,None].to(device)
x_train.shape

torch.Size([10000, 1])

In [11]:
steps=10000
layers = np.array([1,50,50,50,50,1])
Root_NN = NNR(layers)
Root_NN.to(device)
Root_NN.closure(steps=steps,show=True,lr=0.1)

  0%|▏                                      | 44/10000 [00:00<00:45, 220.31it/s]

Iter:  0 Loss:  4.330035768668751  lr:  0.02


 25%|█████████▍                           | 2538/10000 [00:10<00:32, 230.43it/s]

Iter:  2500 Loss:  0.012518923408487617  lr:  0.004


 50%|██████████████████▌                  | 5033/10000 [00:21<00:21, 230.06it/s]

Iter:  5000 Loss:  7.499352665872822e-07  lr:  0.0008


 75%|███████████████████████████▊         | 7526/10000 [00:32<00:10, 229.79it/s]

Iter:  7500 Loss:  0.0003698029468645343  lr:  0.00016


100%|████████████████████████████████████| 10000/10000 [00:42<00:00, 232.74it/s]

MSE Loss:  1.1892242775821578e-06
total time:  42.96927881240845





In [12]:
test=np.array([[1.2],[1.8],[2.2],[2.5],[2.8]])[None,:]
print(Root_NN(test))

tensor([[[6.4695],
         [6.5610],
         [6.6217],
         [6.6674],
         [6.7132]]], device='cuda:0', grad_fn=<ViewBackward0>)
