# Libraries

In [1]:
import os
os.chdir('/leonardo_scratch/fast/IscrC_DL4EO/github/water-pinns/src')  # Provide the new path here

In [2]:
from operator import itemgetter
from tqdm import tqdm
import time
from datetime import datetime
import json

import numpy as np
import pandas as pd
import geopandas as gpd
import xarray
import rioxarray
import fiona

#import matplotlib
import matplotlib.pyplot as plt

#from rasterio.enums import Resampling

import torch
from torch.utils.data import Dataset, DataLoader
from torch.utils.data.sampler import SequentialSampler
from torch.utils.data.sampler import RandomSampler
import torch.nn as nn
from torch.autograd import Variable

import wandb

import torchview
from torchview import draw_graph

from utils.prediction_plot_1d import *

from models.load_models_1d import *
from dataloaders.load_1d_meteo_wtd import ContinuousDataset
from subprocess import Popen

import deepxde

Using backend: pytorch
Other supported backends: tensorflow.compat.v1, tensorflow, jax, paddle.
paddle supports more examples now and is recommended.


In [3]:
prova = torch.tensor([0.,1.,2.], requires_grad=False)
print(prova)
print(prova.requires_grad)
prova.requires_grad_()
print(prova.requires_grad)
prova.requires_grad_(False)
print(prova.requires_grad)

tensor([0., 1., 2.])
False
True
False


# 1) real function

In [7]:
x = torch.tensor([[0.,1.,2.,3.,4.,5.],
                  [0.,1.,2.,3.,4.,6.]], requires_grad=True)
print("X:", x)
print(x.shape)

y = x**2

print("Y:", y)
print(y.shape)

print("\nGradients: \n", torch.autograd.grad(y, x, grad_outputs= torch.ones_like(y)))

X: tensor([[0., 1., 2., 3., 4., 5.],
        [0., 1., 2., 3., 4., 6.]], requires_grad=True)
torch.Size([2, 6])
Y: tensor([[ 0.,  1.,  4.,  9., 16., 25.],
        [ 0.,  1.,  4.,  9., 16., 36.]], grad_fn=<PowBackward0>)
torch.Size([2, 6])

Gradients: 
 (tensor([[ 0.,  2.,  4.,  6.,  8., 10.],
        [ 0.,  2.,  4.,  6.,  8., 12.]]),)


# 2) function of vectors 

torch.autograd is an engine for computing vector-Jacobian product. That is, given any vector v compute: J.T @ v

In [4]:
x = torch.tensor([[1.005,2.7,3.],
                  [1.080,2.5,4.],
                  [0.90,0.45,0.],
                  [-0.90,0.5,0.]], requires_grad=True)
print("X:", x)
print(x.shape)

y = torch.ones(4,4)
y[:, 0] = x[:, 0] + x[:, 1] + x[:, 2] # gradients = 1 + 1 + 1 
y[:, 1] = - x[:, 0] - x[:, 1] - x[:, 2] # gradients = - 1 - 1 - 1
y[:, 2] = 2*x[:, 0] + 2*x[:, 1] + x[:, 2]**2 # gradients = 2 + 2 + 2x
y[:, 3] = - x[:, 0] + x[:, 1] - 3*x[:, 2]

print("Y:", y)
print(y.shape)


# grad_mask = torch.tensor([[[1,0,0,0], # lavora sulle batch - 1 vettore riga per ogni istanza
#                           [1,0,0,0]],
                          
#                           [[0,1,0,0], # itero sul gradiente che voglio estrarre
#                           [0,1,0,0]],
                          
#                           [[0,0,1,0], # itero sul gradiente che voglio estrarre
#                           [0,0,1,0]],
                          
#                           [[0,0,0,1], # itero sul gradiente che voglio estrarre
#                           [0,0,0,1]]])

grad_mask = torch.eye(y.shape[-1])
grad_mask = grad_mask[:,None,:].expand(-1, y.shape[0], -1)

X: tensor([[ 1.0050,  2.7000,  3.0000],
        [ 1.0800,  2.5000,  4.0000],
        [ 0.9000,  0.4500,  0.0000],
        [-0.9000,  0.5000,  0.0000]], requires_grad=True)
torch.Size([4, 3])
Y: tensor([[  6.7050,  -6.7050,  16.4100,  -7.3050],
        [  7.5800,  -7.5800,  23.1600, -10.5800],
        [  1.3500,  -1.3500,   2.7000,  -0.4500],
        [ -0.4000,   0.4000,  -0.8000,   1.4000]], grad_fn=<CopySlices>)
torch.Size([4, 4])


In [4]:
def grad_bk_loop(y, x, x_features = [0,1,2]):
    gradients_bk = []
    for b in range(4):
        b_grad = []
        for i in range(4):
            y[b,i].backward(retain_graph=True)
            b_grad.append(x.grad[b,x_features].clone())
            x.grad.zero_() 
        gradients_bk.append(b_grad)
    
    return gradients_bk

In [10]:
grad_bk_loop(y, x)

[[tensor([1., 1., 1.]),
  tensor([-1., -1., -1.]),
  tensor([2., 2., 6.]),
  tensor([-1.,  1., -3.])],
 [tensor([1., 1., 1.]),
  tensor([-1., -1., -1.]),
  tensor([2., 2., 8.]),
  tensor([-1.,  1., -3.])],
 [tensor([1., 1., 1.]),
  tensor([-1., -1., -1.]),
  tensor([2., 2., 0.]),
  tensor([-1.,  1., -3.])],
 [tensor([1., 1., 1.]),
  tensor([-1., -1., -1.]),
  tensor([2., 2., 0.]),
  tensor([-1.,  1., -3.])]]

In [96]:
print("Gradien backprop loop: ")
for i in range(4):
    print(grad_bk_loop(y, x)[0][i])
    print(grad_bk_loop(y, x)[1][i])
    print(grad_bk_loop(y, x)[2][i])
    print(grad_bk_loop(y, x)[3][i])
    print("\n")

Gradien backprop loop: 
tensor([1., 1., 1.])
tensor([1., 1., 1.])
tensor([1., 1., 1.])
tensor([1., 1., 1.])


tensor([-1., -1., -1.])
tensor([-1., -1., -1.])
tensor([-1., -1., -1.])
tensor([-1., -1., -1.])


tensor([2., 2., 6.])
tensor([2., 2., 8.])
tensor([2., 2., 0.])
tensor([2., 2., 0.])


tensor([-1.,  1., -3.])
tensor([-1.,  1., -3.])
tensor([-1.,  1., -3.])
tensor([-1.,  1., -3.])




In [93]:
gradients = torch.autograd.grad(y, x, grad_outputs= grad_mask, is_grads_batched=True, retain_graph=True)

# Autograd: Compute and return the sum of gradients of outputs with respect to the inputs.
print("\nGradients - Autograd: \n", gradients) # grad_outputs should be a sequence of length matching output containing the “vector” in vector-Jacobian product (v^T@J)
gradients[0].shape


Gradients - Autograd: 
 (tensor([[[ 1.,  1.,  1.],
         [ 1.,  1.,  1.],
         [ 1.,  1.,  1.],
         [ 1.,  1.,  1.]],

        [[-1., -1., -1.],
         [-1., -1., -1.],
         [-1., -1., -1.],
         [-1., -1., -1.]],

        [[ 2.,  2.,  6.],
         [ 2.,  2.,  8.],
         [ 2.,  2.,  0.],
         [ 2.,  2.,  0.]],

        [[-1.,  1., -3.],
         [-1.,  1., -3.],
         [-1.,  1., -3.],
         [-1.,  1., -3.]]]),)


torch.Size([4, 4, 3])

In [29]:
deepxde.config.set_default_autodiff("reverse")
gradients_dxde = deepxde.gradients.gradients.jacobian(y, x, i = 3)
# Autograd: Compute and return the sum of gradients of outputs with respect to the inputs.
print("\nGradients - Autograd: \n", gradients_dxde) # grad_outputs should be a sequence of length matching output containing the “vector” in vector-Jacobian product (v^T@J)
gradients_dxde[0].shape

Set the default automatic differentiation to reverse mode.

Gradients - Autograd: 
 tensor([[-1.,  1., -3.],
        [-1.,  1., -3.],
        [-1.,  1., -3.],
        [-1.,  1., -3.]], grad_fn=<AddBackward0>)


torch.Size([3])

# 3) One dimensional input

In [6]:
x = torch.tensor([[1.005],
                  [1.080],
                  [0.90],
                  [-0.90]], requires_grad=True)
print("X:", x)
print(x.shape)

def y_fun(x):
    y = torch.ones(4,4)
    y[:, 0] = x[:, 0] + x[:, 0] + x[:, 0] # gradients = 1 + 1 + 1 
    y[:, 1] = - x[:, 0] - x[:, 0] - x[:, 0] # gradients = - 1 - 1 - 1
    y[:, 2] = 2*x[:, 0] + 2*x[:, 0] + x[:, 0]**2 # gradients = 2 + 2 + 2x
    y[:, 3] = - x[:, 0] + x[:, 0] - 3*x[:, 0]
    return y

y = y_fun(x)
print("Y:", y)
print(y.shape)


# grad_mask = torch.tensor([[[1,0,0,0], # lavora sulle batch - 1 vettore riga per ogni istanza
#                           [1,0,0,0]],
                          
#                           [[0,1,0,0], # itero sul gradiente che voglio estrarre
#                           [0,1,0,0]],
                          
#                           [[0,0,1,0], # itero sul gradiente che voglio estrarre
#                           [0,0,1,0]],
                          
#                           [[0,0,0,1], # itero sul gradiente che voglio estrarre
#                           [0,0,0,1]]])

grad_mask = torch.eye(y.shape[-1])
grad_mask = grad_mask[:,None,:].expand(-1, y.shape[0], -1)

X: tensor([[ 1.0050],
        [ 1.0800],
        [ 0.9000],
        [-0.9000]], requires_grad=True)
torch.Size([4, 1])
Y: tensor([[ 3.0150, -3.0150,  5.0300, -3.0150],
        [ 3.2400, -3.2400,  5.4864, -3.2400],
        [ 2.7000, -2.7000,  4.4100, -2.7000],
        [-2.7000,  2.7000, -2.7900,  2.7000]], grad_fn=<CopySlices>)
torch.Size([4, 4])


In [7]:
gradients = torch.autograd.grad(y, x, grad_outputs= grad_mask, is_grads_batched=True, retain_graph=True)

# Autograd: Compute and return the sum of gradients of outputs with respect to the inputs.
print("\nGradients - Autograd: \n", gradients) # grad_outputs should be a sequence of length matching output containing the “vector” in vector-Jacobian product (v^T@J)
gradients[0].shape


Gradients - Autograd: 
 (tensor([[[ 3.0000],
         [ 3.0000],
         [ 3.0000],
         [ 3.0000]],

        [[-3.0000],
         [-3.0000],
         [-3.0000],
         [-3.0000]],

        [[ 6.0100],
         [ 6.1600],
         [ 5.8000],
         [ 2.2000]],

        [[-3.0000],
         [-3.0000],
         [-3.0000],
         [-3.0000]]]),)


torch.Size([4, 4, 1])

In [36]:
x.shape

torch.Size([4, 1])

In [63]:
torch.func.jvp(y_fun, (x,), (torch.ones_like(x),))[1]

tensor([[ 3.0000, -3.0000,  6.0100, -3.0000],
        [ 3.0000, -3.0000,  6.1600, -3.0000],
        [ 3.0000, -3.0000,  5.8000, -3.0000],
        [ 3.0000, -3.0000,  2.2000, -3.0000]], grad_fn=<CopySlices>)

In [33]:
deepxde.config.set_default_autodiff("forward")
#gradients_dxde = deepxde.gradients.gradients.jacobian(y_fun(x), x, j = 0)

gradients_dxde = deepxde.gradients.gradients.hessian((y,), x, component=0 , j=0)
# Autograd: Compute and return the sum of gradients of outputs with respect to the inputs.
print("\nGradients - Autograd: \n", gradients_dxde) # grad_outputs should be a sequence of length matching output containing the “vector” in vector-Jacobian product (v^T@J)
gradients_dxde[0].shape

Set the default automatic differentiation to forward mode.


IndexError: tuple index out of range

In [10]:
from torch.utils.benchmark import Timer

#backward_loop = Timer(stmt="grad_bk_loop(y, x)", globals=globals())
autograd = Timer(stmt="torch.autograd.grad(y, x, grad_outputs= grad_mask, is_grads_batched=True, retain_graph=True)",globals=globals())
forward_ad = Timer(stmt="third_res = torch.func.jvp(y_fun, (x,), (torch.ones_like(x),))", globals=globals())
#backward_loop_timing = backward_loop.timeit(50)
autograd_timing = autograd.timeit(50)
forward_ad_timing = forward_ad.timeit(50)

#print(f'Per-sample-grads backward_loop {backward_loop_timing}')
print(f'Per-sample-grads autograd {autograd_timing}')
print(f'Per-sample-grads forward_ad {forward_ad_timing}')

Per-sample-grads autograd <torch.utils.benchmark.utils.common.Measurement object at 0x7fab48643990>
torch.autograd.grad(y, x, grad_outputs= grad_mask, is_grads_batched=True, retain_graph=True)
  629.52 us
  1 measurement, 50 runs , 1 thread
Per-sample-grads forward_ad <torch.utils.benchmark.utils.common.Measurement object at 0x7fab48624f50>
third_res = torch.func.jvp(y_fun, (x,), (torch.ones_like(x),))
  1.81 ms
  1 measurement, 50 runs , 1 thread


# ET Comparison

In [11]:
def get_perf(first, first_descriptor, second, second_descriptor):
    """takes torch.benchmark objects and compares delta of second vs first."""
    second_res = second.times[0]
    first_res = first.times[0]

    gain = (first_res-second_res)/first_res
    if gain < 0: gain *=-1
    final_gain = gain

    print(f"Performance delta: {final_gain:.4f} percent improvement with {first_descriptor} ")

from torch.utils.benchmark import Timer

backward_loop = Timer(stmt="grad_bk_loop(y, x)", globals=globals())
autograd = Timer(stmt="torch.autograd.grad(y, x, grad_outputs= grad_mask, is_grads_batched=True, retain_graph=True)",globals=globals())
backward_loop_timing = backward_loop.timeit(50)
autograd_timing = autograd.timeit(50)

print(f'Per-sample-grads backward_loop {backward_loop_timing}')
print(f'Per-sample-grads autograd {autograd_timing}')

get_perf(autograd_timing, "autograd", backward_loop_timing, "backprop loop")

Per-sample-grads backward_loop <torch.utils.benchmark.utils.common.Measurement object at 0x7f6692b42c90>
grad_bk_loop(y, x)
  5.75 ms
  1 measurement, 50 runs , 1 thread
Per-sample-grads autograd <torch.utils.benchmark.utils.common.Measurement object at 0x7f6692ab00d0>
torch.autograd.grad(y, x, grad_outputs= grad_mask, is_grads_batched=True, retain_graph=True)
  627.74 us
  1 measurement, 50 runs , 1 thread
Performance delta: 8.1520 percent improvement with autograd 
