### Prior steps....

In [1]:
from fastai.vision.all import *
from fastbook import *
from fastai.vision.widgets import *
import seaborn as sns


sns.set()

In [2]:
path = untar_data(URLs.MNIST_SAMPLE)

In [3]:
threes = (path/'train'/'3').ls().sorted()
sevens = (path/'train'/'7').ls().sorted()

In [4]:
seven_tensors = [tensor(Image.open(o)) for o in sevens]
three_tensors = [tensor(Image.open(o)) for o in threes]
len(three_tensors), len(seven_tensors)

(6131, 6265)

In [5]:
stacked_sevens = torch.stack(seven_tensors).float()/255
stacked_threes = torch.stack(three_tensors).float()/255
stacked_threes.shape

torch.Size([6131, 28, 28])

In [6]:
valid_3_tens = torch.stack([tensor(Image.open(o))
                            for o in (path/'valid'/'3').ls()])
valid_3_tens = valid_3_tens.float()/255
valid_7_tens = torch.stack([tensor(Image.open(o))
                            for o in (path/'valid'/'7').ls()])
valid_7_tens = valid_7_tens.float()/255
valid_3_tens.shape,valid_7_tens.shape

(torch.Size([1010, 28, 28]), torch.Size([1028, 28, 28]))

In [7]:
train_x = torch.cat([stacked_threes, stacked_sevens]).view(-1, 28 * 28)

# 1 - is three and 0 - is seven
train_y = tensor([1] * len(threes) + [0] * len(sevens)).unsqueeze(1)

train_x.shape, train_y.shape

(torch.Size([12396, 784]), torch.Size([12396, 1]))

In [8]:
dset = list(zip(train_x, train_y))
x, y = dset[0]
x.shape, y

(torch.Size([784]), tensor([1]))

In [9]:
valid_x = torch.cat([valid_3_tens, valid_7_tens]).view(-1, 28 * 28)
valid_y = tensor([1] * len(valid_3_tens) + [0] * len(valid_7_tens)).unsqueeze(1)
valid_dset = list(zip(valid_x, valid_y))

In [10]:
def init_params(size, std=1.0):
    return (torch.randn(size) * std).requires_grad_()

In [11]:
def linear1(xb):
    return xb @ weights + bias

In [12]:
def mnist_loss(predictions, targets):
    return torch.where(targets == 1, 1 - predictions, predictions).mean()

### Let's train our model

we will see something similar on this code

```
for x, y in dl:
    pred = model(x)
    loss = loss_func(pred, y)
    loss.backward()
    parameters -= parameters.grad * lr
```

Reinitialize our parameters

In [13]:
weights = init_params((28*28,1))
bias = init_params(1)

A DataLoader can be created from a Dataset

In [14]:
dl = DataLoader(dset, batch_size=256)
xb, yb = first(dl)
xb.shape, yb.shape

(torch.Size([256, 784]), torch.Size([256, 1]))

In [15]:
valid_dl = DataLoader(valid_dset, batch_size=256)

Let’s create a mini-batch of size 4 for testing

In [16]:
batch = train_x[:4]
batch.shape

torch.Size([4, 784])

In [17]:
preds = linear1(batch)
preds

tensor([[ -6.2330],
        [-10.6388],
        [-20.8865],
        [-17.8700]], grad_fn=<AddBackward0>)

In [18]:
loss = mnist_loss(preds, train_y[:4])
loss

tensor(14.9071, grad_fn=<MeanBackward0>)

Now we can calculate the gradients

In [19]:
loss.backward()
weights.grad.shape, weights.grad.mean(), bias.grad

(torch.Size([784, 1]), tensor(-0.1511), tensor([-1.]))

Let’s put that all in a function

In [20]:
def calc_grad(xb, yb, model):
    preds = model(xb)
    loss = mnist_loss(preds, yb)
    loss.backward()

In [29]:
calc_grad(batch, train_y[:4], linear1)

weights.grad.mean(), bias.grad

(tensor(-1.5112), tensor([-10.]))