<a href="https://colab.research.google.com/github/Hlompho-Dash/first_try_of_fastai/blob/master/03_minibatch_training.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import os

from google.colab import drive
drive.mount('/content/gdrive')

os.chdir('/content/gdrive/MyDrive/first_try_of_fastai')

print("------------------------------------------------------------------")

Drive already mounted at /content/gdrive; to attempt to forcibly remount, call drive.mount("/content/gdrive", force_remount=True).
------------------------------------------------------------------


In [2]:
#export
os.chdir('/content/gdrive/MyDrive/first_try_of_fastai/exp')
from nb_02 import *
import torch.nn.functional as F
os.chdir('/content/gdrive/MyDrive/first_try_of_fastai')

**Data**

In [3]:
mpl.rcParams["image.cmap"] = "gray"

In [4]:
x_train,y_train,x_valid,y_valid = get_data()

In [5]:
n,m = x_train.shape
c = y_train.max() + 1
nh = 50

In [6]:
class Model(nn.Module):
  def __init__(self, n_in, nh, n_out):
    super().__init__()
    self.layers = [nn.Linear(n_in, nh), nn.ReLU(), nn.Linear(nh, n_out)]
  def __call__(self, x):
    for l in self.layers: x = l(x)
    return x

In [7]:
model = Model(m,nh,10)

In [8]:
pred = model(x_train)

**Cross Entropy Loss**

In [9]:
def log_softmax(x): return (x.exp()/(x.exp().sum(-1,keepdim=True))).log()

In [10]:
sm_pred = log_softmax(pred)
sm_pred

tensor([[-2.4403, -2.2687, -2.4579,  ..., -2.2915, -2.2691, -2.1631],
        [-2.5275, -2.1769, -2.5339,  ..., -2.2442, -2.2007, -2.1862],
        [-2.4590, -2.1608, -2.5786,  ..., -2.2376, -2.3488, -2.1445],
        ...,
        [-2.3291, -2.2014, -2.5414,  ..., -2.2543, -2.3528, -2.2668],
        [-2.3701, -2.2436, -2.4680,  ..., -2.1812, -2.4084, -2.1773],
        [-2.3718, -2.2003, -2.4877,  ..., -2.2174, -2.2975, -2.2244]],
       grad_fn=<LogBackward>)

In [11]:
y_train[:3]

tensor([5, 0, 4])

In [12]:
sm_pred[[0,1,2], [5,0,4]]

tensor([-2.3291, -2.5275, -2.4069], grad_fn=<IndexBackward>)

In [13]:
x_train.shape[0]

50000

In [14]:
def nll(input, target): return -input[range(target.shape[0]), target].mean()

In [15]:
loss = nll(sm_pred, y_train)

In [16]:
loss

tensor(2.3154, grad_fn=<NegBackward>)

In [17]:
def log_softmax(x): return x - x.exp().sum(-1,keepdim=True).log()

In [18]:
test_near(nll(log_softmax(pred), y_train), loss)

In [19]:
def logsumexp(x):
  m = x.max(-1)[0]
  return m + (x-m[:,None]).exp().sum(-1).log()

In [20]:
test_near(logsumexp(pred), pred.logsumexp(-1))

In [21]:
def log_softmax(x): return x - x.logsumexp(-1, keepdim = True)

In [22]:
test_near(nll(log_softmax(pred),y_train), loss)

In [23]:
test_near(F.nll_loss(F.log_softmax(pred,-1), y_train), loss)

In [24]:
test_near(F.cross_entropy(pred, y_train), loss)

BASIC TRAINING LOOP

In [25]:
loss_func = F.cross_entropy

In [26]:
#export

def accuracy(out, yb): return (torch.argmax(out, dim = 1) == yb).float().mean()

In [27]:
bs = 64
xb = x_train[0:bs]
preds = model(xb)
preds[0]

tensor([-0.1215,  0.0501, -0.1391,  0.0534, -0.0880, -0.0103,  0.1386,  0.0273,
         0.0497,  0.1557], grad_fn=<SelectBackward>)

In [28]:
yb = y_train[0:bs]
loss_func(preds, yb)

tensor(2.3150, grad_fn=<NllLossBackward>)

In [29]:
accuracy(preds, yb)

tensor(0.1719)

In [30]:
lr = 0.5
epochs = 1

In [31]:
for epoch in range(epochs):
  for i in range((n-1)//bs + 1):
    start_i = i*bs
    end_i = start_i*bs

    xb = x_train[start_i:end_i]
    yb = y_train[start_i:end_i]

    loss = loss_func(model(xb), yb)

    loss.backward()
    with torch.no_grad():
      for l in model.layers:
        if hasattr(l, "weight"):
          l.weight -= l.weight.grad * lr
          l.bias -= l.bias.grad * lr

          l.weight.grad.zero_()
          l.bias.grad.zero_()

In [32]:
loss_func(model(xb), yb), accuracy(model(xb), yb)

(tensor(0.0064, grad_fn=<NllLossBackward>), tensor(1.))

**UNSING PARAMEMTERS AND OPTIM**

Parameters

In [68]:
class Model(nn.Module):
  def __init__(self, n_in, nh, n_out):
    super().__init__()
    self.l1 = nn.Linear(n_in, nh)
    self.l2 = nn.Linear(nh, n_out)
    self.Relu = nn.ReLU()

  def __call__(self, x): return self.l2( self.Relu( self.l1(x) ) )

In [69]:
model = Model(m, nh, 10)

In [70]:
for name,l in model.named_children(): print(f"{name}: {l}")

l1: Linear(in_features=784, out_features=50, bias=True)
l2: Linear(in_features=50, out_features=10, bias=True)
Relu: ReLU()


In [71]:
model

Model(
  (l1): Linear(in_features=784, out_features=50, bias=True)
  (l2): Linear(in_features=50, out_features=10, bias=True)
  (Relu): ReLU()
)

In [72]:
model.l1

Linear(in_features=784, out_features=50, bias=True)

In [73]:
def fit():
  for epoch in range(epochs):
    for i in range((n-1)//bs + 1):
      start_i = i*bs
      end_i  = start_i+bs

      xb = x_train[start_i:end_i]
      yb = y_train[start_i:end_i]

      loss = loss_func(model(xb), yb)

      loss.backward()
      with torch.no_grad():
        for p in model.parameters(): p -= p.grad * lr
        model.zero_grad()

In [74]:
#model??

In [75]:
fit()
loss_func(model(xb), yb), accuracy(model(xb), yb)

(tensor(0.3764, grad_fn=<NllLossBackward>), tensor(0.8750))