In [1]:
import torch

In [2]:
n = 64 # Training set size
d_in = 1000 # Dimension of data in
h = 100 # Dimension of hiddden layer
d_out = 10 # Dimension of data out
epochs = 501 # Number of epochs used to train network

x = torch.randn(n, d_in, dtype = torch.float, requires_grad = False) # Input tensor
y = torch.randn(n, d_out, dtype = torch.float, requires_grad = False)# Output tensor

In [6]:
%%time

# Declare overall structure of your neural network in sequence
## Linear layers contain tensors for weight and bias
### Bias adjusts data by a constant
model = torch.nn.Sequential(
            torch.nn.Linear(d_in, h),
            torch.nn.ReLU(),
            torch.nn.Linear(h, d_out),
        )

# Declare your loss function
## We are using MSE Loss, using "reduction = 'sum'" computes the sum of the squared errors rather than the mean
loss_fn = torch.nn.MSELoss(reduction = 'sum')

learning_rate = 1e-4 # Higher learning rate with a bias node and better initialized weights
for epoch in range(epochs):
    y_predict = model(x) # Generate your predictions by calling your model with your data
    
    loss = loss_fn(y_predict, y) # Calcuate your MSE Loss between your prediction and your actual data by calling your loss function
    if not epoch % 50:
        print("Iteration: %4d - Loss: %0.2e" % (epoch, loss.item()))
        
    model.zero_grad() # Calling zero grad on the model will zero all of the tensors in it
    
    loss.backward() # Run your backpropogation function
    
    # Update the weights using gradient descent with no autograd since we do not have to keep the gradients on the weights
    with torch.no_grad():
        for param in model.parameters():
            param.data -= learning_rate * param.grad

Iteration:    0 - Loss: 6.29e+02
Iteration:   50 - Loss: 3.11e+01
Iteration:  100 - Loss: 2.58e+00
Iteration:  150 - Loss: 3.15e-01
Iteration:  200 - Loss: 4.43e-02
Iteration:  250 - Loss: 6.72e-03
Iteration:  300 - Loss: 1.08e-03
Iteration:  350 - Loss: 1.83e-04
Iteration:  400 - Loss: 3.21e-05
Iteration:  450 - Loss: 5.84e-06
Iteration:  500 - Loss: 1.09e-06
Wall time: 586 ms


In [7]:
model

Sequential(
  (0): Linear(in_features=1000, out_features=100, bias=True)
  (1): ReLU()
  (2): Linear(in_features=100, out_features=10, bias=True)
)

In [8]:
from torchsummary import summary

if not torch.cuda.is_available():
    summary(model, input_size = (1, d_in))
else:
    summary(model.to(torch.device('cuda')), input_size = (1, d_in))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Linear-1               [-1, 1, 100]         100,100
              ReLU-2               [-1, 1, 100]               0
            Linear-3                [-1, 1, 10]           1,010
Total params: 101,110
Trainable params: 101,110
Non-trainable params: 0
----------------------------------------------------------------
Input size (MB): 0.00
Forward/backward pass size (MB): 0.00
Params size (MB): 0.39
Estimated Total Size (MB): 0.39
----------------------------------------------------------------
