In [58]:
import numpy as np
import torch 
from d2l import torch as d2l
import math
import matplotlib.pyplot as plt
from tqdm import tqdm

In [3]:
A = torch.zeros((256, 256))
B = torch.rand((256, 256))
C = torch.rand((256, 256))

In [9]:
%%time
for i in range(256):
    for j in range(256):
        A[i, j] = torch.dot(B[i, :], C[:, j])

CPU times: user 788 ms, sys: 26.8 ms, total: 815 ms
Wall time: 815 ms


In [12]:
%%time
for i in range(256):
    A[:, j] = torch.mv(B, C[:, j])

CPU times: user 54.4 ms, sys: 3.28 ms, total: 57.7 ms
Wall time: 13.7 ms


In [39]:
%%time
A = torch.mm(B, C)

CPU times: user 4.43 ms, sys: 565 µs, total: 4.99 ms
Wall time: 1.08 ms


In [28]:
%%time
for i in range(0, 256, 64):
    A[:, j:j + 64] = torch.mm(B, C[:, j:j+64])

CPU times: user 6.99 ms, sys: 1.06 ms, total: 8.06 ms
Wall time: 1.69 ms


In [40]:
#@save
d2l.DATA_HUB['airfoil'] = (d2l.DATA_URL + 'airfoil_self_noise.dat',
                           '76e5be1548fd8222e5074cf0faae75edff8cf93f')

#@save
def get_data_ch11(batch_size=10, n=1500):
    data = np.genfromtxt(d2l.download('airfoil'), dtype=np.float32,
                         delimiter='\t')
    data = torch.from_numpy((data - data.mean(axis=0)) / data.std(axis=0))
    data_iter = d2l.load_array((data[:n, :-1], data[:n, -1]), batch_size,
                               is_train=True)
    return data_iter, data.shape[1] - 1

In [41]:
def sgd(params, states, hyperparams):
    for p in params:
        p.data.sub_(hyperparams['lr']*p.grad)
        p.grad.data.zero_()

In [71]:
def train(trainer_fn, states, hyperparams, data_iter, feature_dim, num_epochs=2):
    w = torch.normal(mean=0.0, std=0.01, size=(feature_dim, 1), requires_grad=True)
    b = torch.zeros((1), requires_grad=True)
    
    net, loss = lambda X: d2l.linreg(X, w, b), d2l.squared_loss
    
    timer = d2l.Timer()
    
    for epoch in range(num_epochs):
        epoch_loss = 0.0
        timer.start()
        for i, (X, y) in enumerate(tqdm(data_iter)):
            l = loss(net(X), y).mean()
            
            l.backward()
            trainer_fn([w, b], states, hyperparams)
            
            epoch_loss += d2l.evaluate_loss(net, data_iter, loss)
        print(f'Epoch {epoch}, Loss: {epoch_loss/len(data_iter):.2f}')
        timer.stop()
    print(f'Epoch time average {timer.avg():.2f} s/epoch')

In [67]:
def train_sgd(lr, batch_size, num_epochs=2):
    data_iter, feature_dim = get_data_ch11(batch_size)
    
    train(sgd, None, {"lr": lr}, data_iter, feature_dim, num_epochs)

In [72]:
train_sgd(1, 1500, 10)

100%|██████████| 1/1 [00:00<00:00, 28.41it/s]
100%|██████████| 1/1 [00:00<00:00, 26.96it/s]
100%|██████████| 1/1 [00:00<00:00, 24.50it/s]
100%|██████████| 1/1 [00:00<00:00, 23.34it/s]
100%|██████████| 1/1 [00:00<00:00, 29.91it/s]
  0%|          | 0/1 [00:00<?, ?it/s]

Epoch 0, Loss: 0.27
Epoch 1, Loss: 0.25
Epoch 2, Loss: 0.25
Epoch 3, Loss: 0.24
Epoch 4, Loss: 0.24


100%|██████████| 1/1 [00:00<00:00,  5.74it/s]
100%|██████████| 1/1 [00:00<00:00, 26.35it/s]
100%|██████████| 1/1 [00:00<00:00, 32.90it/s]
100%|██████████| 1/1 [00:00<00:00, 25.82it/s]
100%|██████████| 1/1 [00:00<00:00, 34.80it/s]

Epoch 5, Loss: 0.24
Epoch 6, Loss: 0.24
Epoch 7, Loss: 0.24
Epoch 8, Loss: 0.24
Epoch 9, Loss: 0.24
Epoch time average 0.05 s/epoch





In [73]:
train_sgd(0.4, 100, 2)

100%|██████████| 15/15 [00:00<00:00, 69.17it/s]
100%|██████████| 15/15 [00:00<00:00, 79.57it/s]

Epoch 0, Loss: 0.27
Epoch 1, Loss: 0.25
Epoch time average 0.20 s/epoch





In [None]:
# really slow
train_sgd(0.4, 1, 2)