In [1]:
import autograd.numpy as np
from autograd import grad, elementwise_grad

In [2]:
g = lambda x, w: 5 * x + 7 * w
dgdx = elementwise_grad(g, 1)

dgdx(1.0, 1.0)

7.0

In [3]:
g = lambda x: 5 * x[0][0] + 7 * x[1][1]
dgdx = elementwise_grad(g, 0)

dgdx(np.array([[1.0, 15.0], [10.0, 2.0]]))

array([[5., 0.],
       [0., 7.]])

In [8]:
g = lambda x: 5 * x[0][0] + 7 * x[1][1]
dgdx = elementwise_grad(g, 0)

dgdx((np.array([1.0, 15.0]), np.array([10.0, 2.0])))

(array([5., 0.]), array([0., 7.]))

In [3]:
g(2, 1)

17

In [4]:
f = lambda w, x: 3 * x + 6 * w + g(x, w)
f(2, 1)

34

In [5]:
dfdx = elementwise_grad(f, 1)

In [6]:
dfdx(1.0, 1.0)

8.0

In [10]:
import torch
from torch.autograd import grad

In [11]:
x = torch.tensor([2.0], requires_grad=True)
w = torch.tensor([6.0], requires_grad=True)
g = 5 * x - 7 * w

In [12]:
dgdx = grad(outputs=g, inputs=x)
print(dgdx)

(tensor([5.]),)


In [13]:
f = lambda x, w: 10 * x + 5 * w

dfdx = grad(outputs=f, inputs=x)
print(dfdx)

TypeError: 'function' object is not iterable

In [45]:
import torch.nn as nn

# Create some dummy data.
x = torch.Tensor([[3], [3]])
x.requires_grad = True
gt = torch.ones_like(x) * 16 - 0.5  # "ground-truths"

# We will use MSELoss as an example.
loss_fn = nn.MSELoss()

# Do some computations.
v = x + 2
y = v**2

# Compute loss.
loss = loss_fn(y, gt)

print(f"Loss: {loss}")

# Now compute gradients:
d_loss_dx = grad(outputs=loss, inputs=x, retain_graph=True)
print(f"dloss/dx:\n {d_loss_dx}")

Loss: 90.25
dloss/dx:
 (tensor([[95.],
        [95.]]),)


In [46]:
lossx = loss * x
print(lossx)
external = torch.ones_like(x)
d_lossx_dx = grad(outputs=lossx, inputs=x, grad_outputs=external, retain_graph=True)
print(f"dlossx/dx:\n {d_lossx_dx}")

tensor([[270.7500],
        [270.7500]], grad_fn=<MulBackward0>)
dlossx/dx:
 (tensor([[660.2500],
        [660.2500]]),)


array([[[0., 0.],
        [0., 0.],
        [0., 0.],
        [0., 0.],
        [0., 0.]],

       [[0., 0.],
        [0., 0.],
        [0., 0.],
        [0., 0.],
        [0., 0.]],

       [[0., 0.],
        [0., 0.],
        [0., 0.],
        [0., 0.],
        [0., 0.]]])

hello


In [55]:
import pytorch_lightning as L
from torch.utils.data import DataLoader, Dataset


class SimpleDataset(Dataset):
    def __init__(self):
        X = np.arange(10000)
        y = X * 2
        X = [[_] for _ in X]
        y = [[_] for _ in y]
        self.X = torch.Tensor(X)
        self.y = torch.Tensor(y)

    def __len__(self):
        return len(self.y)

    def __getitem__(self, idx):
        return {"X": self.X[idx], "y": self.y[idx]}


class MyModel(L.LightningModule):
    def __init__(self):
        super().__init__()
        self.fc = nn.Linear(1, 1)
        self.criterion = nn.MSELoss()

    def forward(self, inputs_id, labels=None):
        outputs = self.fc(inputs_id)
        loss = 0
        if labels is not None:
            loss = self.criterion(outputs, labels)
        return loss, outputs

    def train_dataloader(self):
        dataset = SimpleDataset()
        return DataLoader(dataset, batch_size=1000)

    def training_step(self, batch, batch_idx):
        input_ids = batch["X"]
        labels = batch["y"]
        loss, outputs = self(input_ids, labels)
        self.log("train_loss", loss, prog_bar=True, on_step=True, on_epoch=True)
        return {"loss": loss}

    def configure_optimizers(self):
        optimizer = torch.optim.Adam(self.parameters())
        return optimizer


if __name__ == "__main__":
    model = MyModel()
    trainer = L.Trainer(max_epochs=100000)
    trainer.fit(model)

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name      | Type    | Params
--------------------------------------
0 | fc        | Linear  | 2     
1 | criterion | MSELoss | 0     
--------------------------------------
2         Trainable params
0         Non-trainable params
2         Total params
0.000     Total estimated model params size (MB)


Training: |          | 0/? [00:00<?, ?it/s]

c:\Users\zheyu\AppData\Local\Programs\Python\Python312\Lib\site-packages\pytorch_lightning\trainer\call.py:54: Detected KeyboardInterrupt, attempting graceful shutdown...


RuntimeError: Expected all tensors to be on the same device, but found at least two devices, cuda:0 and cpu! (when checking argument for argument mat1 in method wrapper_CUDA_addmm)

In [58]:
X = torch.Tensor([[1.0], [51.0], [89.0]]).to("cuda")
_, y = model(X)
print(y)

tensor([[  4.7657],
        [104.7448],
        [180.7290]], device='cuda:0', grad_fn=<AddmmBackward0>)


In [59]:
for param in model.parameters():
    print(param.data)
# weight and then bias

tensor([[1.9996]], device='cuda:0')
tensor([2.7661], device='cuda:0')


In [62]:
print(model.fc.weight)

Parameter containing:
tensor([[1.9996]], device='cuda:0', requires_grad=True)


In [81]:
x = torch.Tensor([5]).to("cuda")
x.requires_grad = True

F = x * model(x)[1]
print(x)
print(model(x)[1])
print(F)

tensor([5.], device='cuda:0', requires_grad=True)
tensor([12.7640], device='cuda:0', grad_fn=<ViewBackward0>)
tensor([63.8200], device='cuda:0', grad_fn=<MulBackward0>)


In [82]:
df_dx = grad(outputs=F, inputs=x, retain_graph=True)
print(df_dx)

(tensor([22.7619], device='cuda:0'),)


In [16]:
input = np.array([[1, 2, 3], [2, 4, 6], [3, 6, 9], [4, 8, 12]])

print(np.size(input))
print(np.shape(input))

12
(4, 3)
