In [5]:
# Data Generation
import numpy as np

np.random.seed(42)
x = np.random.rand(100, 1)
y = 1 + 2 * x + .1 * np.random.randn(100, 1)

# Shuffles the indices
idx = np.arange(100)
np.random.shuffle(idx)

# Uses first 80 random indices for train
train_idx = idx[:80]
# Uses the remaining indices for validation
val_idx = idx[80:]

# Generates train and validation sets
x_train, y_train = x[train_idx], y[train_idx]
x_val, y_val = x[val_idx], y[val_idx]

In [6]:
# Initializes parameters "a" and "b" randomly
np.random.seed(42)
a = np.random.randn(1)
b = np.random.randn(1)

print(a, b)

# Sets learning rate
lr = 1e-1
# Defines number of epochs
n_epochs = 1000

for epoch in range(n_epochs):
    # Computes our model's predicted output
    yhat = a + b * x_train
    
    # How wrong is our model? That's the error! 
    error = (y_train - yhat)
    # It is a regression, so it computes mean squared error (MSE)
    loss = (error ** 2).mean()
    
    # Computes gradients for both "a" and "b" parameters
    a_grad = -2 * error.mean()
    b_grad = -2 * (x_train * error).mean()
    
    # Updates parameters using gradients and the learning rate
    a = a - lr * a_grad
    b = b - lr * b_grad
    
print(a, b)

# Sanity Check: do we get the same results as our gradient descent?
from sklearn.linear_model import LinearRegression
linr = LinearRegression()
linr.fit(x_train, y_train)
print(linr.intercept_, linr.coef_[0])

[0.49671415] [-0.1382643]
[1.02354094] [1.96896411]
[1.02354075] [1.96896447]


In [7]:
import torch
import torch.optim as optim
import torch.nn as nn
from torchviz import make_dot

device = 'cuda' if torch.cuda.is_available() else 'cpu'

# Our data was in Numpy arrays, but we need to transform them into PyTorch's Tensors
# and then we send them to the chosen device
x_train_tensor = torch.from_numpy(x_train).float().to(device)
y_train_tensor = torch.from_numpy(y_train).float().to(device)

# Here we can see the difference - notice that .type() is more useful
# since it also tells us WHERE the tensor is (device)
print(type(x_train), type(x_train_tensor), x_train_tensor.type())

<class 'numpy.ndarray'> <class 'torch.Tensor'> torch.FloatTensor


tensor([[0.7713],
        [0.0636],
        [0.8631],
        [0.0254],
        [0.7320],
        [0.0740],
        [0.1987],
        [0.3110],
        [0.4722],
        [0.9696],
        [0.1220],
        [0.7751],
        [0.8022],
        [0.7296],
        [0.0977],
        [0.1849],
        [0.1560],
        [0.0206],
        [0.9869],
        [0.6233],
        [0.7081],
        [0.5979],
        [0.9219],
        [0.6376],
        [0.2809],
        [0.2588],
        [0.1196],
        [0.7290],
        [0.9489],
        [0.6075],
        [0.5613],
        [0.4938],
        [0.1818],
        [0.2713],
        [0.9699],
        [0.2123],
        [0.1834],
        [0.8662],
        [0.3745],
        [0.2912],
        [0.8084],
        [0.0581],
        [0.8324],
        [0.5427],
        [0.7722],
        [0.8872],
        [0.0885],
        [0.0452],
        [0.5924],
        [0.6842],
        [0.7132],
        [0.0344],
        [0.6011],
        [0.8155],
        [0.4402],
        [0