In [1]:
import torch
import numpy
a = torch.tensor(3)
b = torch.tensor([1,2,3])
c = torch.zeros(3,2)

In [2]:
c

tensor([[0., 0.],
        [0., 0.],
        [0., 0.]])

In [4]:
d = torch.zeros([3,2])
d

tensor([[0., 0.],
        [0., 0.],
        [0., 0.]])

In [5]:
torch.rand(3,3)

tensor([[0.0942, 0.8990, 0.5628],
        [0.5245, 0.9958, 0.4340],
        [0.3388, 0.5685, 0.0998]])

In [6]:
x = torch.randn(3,5)
y = torch.randn(5,4)
z = x @ y
z

tensor([[ 4.6238,  2.3873, -0.6328, -0.3363],
        [ 4.6013,  4.3367, -0.3081,  1.9393],
        [-0.0841, -1.3968,  1.0518,  0.8191]])

In [7]:
z.numpy()

array([[ 4.623833  ,  2.3873324 , -0.63276184, -0.33627203],
       [ 4.601251  ,  4.3367076 , -0.30809346,  1.9392987 ],
       [-0.08412287, -1.3968433 ,  1.051817  ,  0.81908137]],
      dtype=float32)

In [9]:
# Autograd
x = torch.tensor(1.0, requires_grad=True)
def u(x):
    return x*x
def g(u):
    return -u

dgdx = torch.autograd.grad(g(u(x)),x)
dgdx

(tensor(-2.),)

In [20]:
# Curve fitting
w = torch.tensor(torch.randn(3,1), requires_grad=True)
opt = torch.optim.Adam([w],0.1)

def model(x):
    f = torch.stack([x*x, x, torch.ones_like(x)], dim=1)
    y_hat = torch.squeeze(f @ w, 1)
    return y_hat

def compute_loss(y, yhat):
    # The loss is defined to be the mean squared error distance between our
    # estimate of y and its true value. 
    loss = torch.nn.functional.mse_loss(yhat, y)
    return loss

def generate_data():
    # Generate some training data based on the true function
    x = torch.rand(100) * 20 - 10
    y = 5 * x * x + 3
    return x, y

def train_step():
    x, y = generate_data()
    yhat = model(x)
    loss = compute_loss(y, yhat)
    opt.zero_grad()
    loss.backward()
    opt.step()

for _ in range(1000):
    train_step()

print(w.detach().numpy())



  


[[ 4.9960060e+00]
 [-5.6581717e-04]
 [ 3.2330198e+00]]


organized Modules

In [22]:
import torch

class Net(torch.nn.Module):

  def __init__(self):
    super().__init__()
    self.a = torch.nn.Parameter(torch.rand(1))
    self.b = torch.nn.Parameter(torch.rand(1))

  def forward(self, x):
    yhat = self.a * x + self.b
    return yhat


x = torch.arange(100, dtype=torch.float32)

net = Net()
y = net(x)

for p in net.parameters():
    print(p)
    
x = torch.arange(100, dtype=torch.float32) / 100
y = 5 * x + 3 + torch.rand(100) * 0.3
# Similar to the previous example, you can define a loss function and optimize the parameters of your model as follows:

criterion = torch.nn.MSELoss()
optimizer = torch.optim.SGD(net.parameters(), lr=0.01)

for i in range(10000):
  net.zero_grad()
  yhat = net(x)
  loss = criterion(yhat, y)
  loss.backward()
  optimizer.step()

print(net.a, net.b) # Should be close to 5 and 3

Parameter containing:
tensor([0.9651], requires_grad=True)
Parameter containing:
tensor([0.4609], requires_grad=True)
Parameter containing:
tensor([4.9945], requires_grad=True) Parameter containing:
tensor([3.1604], requires_grad=True)


In [None]:
class Net(torch.nn.Module):

  def __init__(self):
    super().__init__()
    self.linear = torch.nn.Linear(1, 1)

  def forward(self, x):
    yhat = self.linear(x.unsqueeze(1)).squeeze(1)
    return yhat

# Note that we used squeeze and unsqueeze since torch.nn.Linear 
# operates on batch of vectors as opposed to scalars.

In [None]:
model = torch.nn.Sequential(
    torch.nn.Linear(64, 32),
    torch.nn.ReLU(),
    torch.nn.Linear(32, 10),
)

In [None]:
# floating
import torch

def softmax(logits):
    exp = torch.exp(logits - torch.reduce_max(logits))
    return exp / torch.sum(exp)

print(softmax(torch.tensor([1000., 0.])).numpy()) 