In [348]:
import numpy as np
import torch

In [349]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
display(device)

device(type='cuda')

In [350]:
np.random.seed(45)
num_samples = 40
    
# Generate data
x1 = np.random.uniform(-1, 1, num_samples)
f_x = 3*x1 + 4
eps = np.random.randn(num_samples)
y = f_x + eps

In [351]:
theta = torch.tensor([0., 0.], requires_grad=True)

In [352]:
x_tensor = torch.tensor(x1, dtype=torch.float32).reshape(-1, 1)
X = torch.cat((torch.ones(x_tensor.shape), x_tensor), 1)
y = torch.tensor(y, dtype=torch.float32).reshape(-1, 1)

y_pred = (X @ theta).reshape(-1, 1)
loss = torch.mean((y_pred - y) ** 2)
loss.backward()

grad = theta.grad

display(grad)

tensor([-7.4471, -1.0253])

In [353]:
def compute_sgd(x_i, y_i):
    theta.grad.zero_()
    y_pred_i = x_i @ theta
    loss_i = torch.mean((y_pred_i - y_i) ** 2)
    loss_i.backward()
    
    return theta.grad.clone()

stochastic_grads = []

for i in range(y.shape[0]):
    x_i = X[i].clone().detach()
    y_i = y[i].clone().detach()
    stochastic_grads.append(compute_sgd(x_i, y_i))

mean_stochastic_grad = torch.mean(torch.stack(stochastic_grads), dim=0)

display(mean_stochastic_grad)

tensor([-7.4471, -1.0253])