<a href="https://colab.research.google.com/github/DavoodSZ1993/Dive_into_Deep_Learning/blob/main/12_5_minibatch_stochastic_gradient_descent.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
!pip install d2l==1.0.0-alpha1.post0 --quiet

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m93.0/93.0 kB[0m [31m5.0 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m121.2/121.2 kB[0m [31m9.6 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m84.9/84.9 kB[0m [31m9.5 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.6/1.6 MB[0m [31m68.6 MB/s[0m eta [36m0:00:00[0m
[?25h

## 12.5 Minibatch Stochastic Gradient Descent

### 12.5.1 Vectorization and Caches

In [2]:
%matplotlib inline
import time
import numpy as np
import torch
from torch import nn
from d2l import torch as d2l

In [3]:
A = torch.zeros(256, 256)
B = torch.randn(256, 256)
C = torch.randn(256, 256)

In [4]:
class Timer:
  def __init__(self):
    self.times = []
    self.start()

  def start(self):
    self.tik = time.time()

  def stop(self):
    self.times.append(time.time() - self.tik)
    return self.times[-1]

  def avg(self):
    return sum(self.times) / len(self.times)

  def sum(self):
    return sum(self.times)

  def cumsum(self):
    return np.array(self.times).cumsum().tolist()

timer = Timer()

In [5]:
# Compute A = BC one element at a time
timer.start()
for i in range(256):
  for j in range(256):
    A[i, j] = torch.dot(B[i, :], C[:, j])
timer.stop()

1.3486497402191162

In [6]:
# Compute A = BC one column at a time
timer.start()
for j in range(256):
  A[:, j] = torch.mv(B, C[:, j])
timer.stop()

0.015790462493896484

In [7]:
## COmpute A = BC in one go
timer.start()
A = torch.mm(B, C)
timer.stop()

gigaflops = [0.03 / i for i in timer.times]
print(f'performance in Gigaflops: element {gigaflops[0]: .3f}, '
      f'column {gigaflops[1]: .3f}, full {gigaflops[2]: .3f}')

performance in Gigaflops: element  0.022, column  1.900, full  1.722


### 12.5.2 Minibatches

In [8]:
timer.start()
for j in range(0, 256, 64):
  A[:, j:j+64] = torch.mm(B, C[:, j:j+64])
timer.stop()
print(f'performance in Gigaflops: block {0.03 / timer.times[3]: .3f}')

performance in Gigaflops: block  7.647


### 12.5.3 Reading the Dataset

In [9]:
d2l.DATA_HUB['airfoil'] = (d2l.DATA_URL + 'airfoil_self_noise.dat',
                           '76e5be1548fd8222e5074cf0faae75edff8cf93f')

def get_data_ch11(batch_size=10, n=1500):
  data = np.genfromtxt(d2l.download('airfoil'),
                       dtype=np.float32, delimiter='\t')
  data = torch.from_numpy((data - data.mean(axis=0)) / data.std(axis=0))
  data_iter = d2l.load_array((data[:n, :-1], data[:n, -1]),
                             batch_size, is_train=True)
  return data_iter, data.shape[1] - 1

### 12.5.4 Implementation from Scratch