In [None]:
# !pip install -Uqq fastai
# !pip install utils
# !pip install -Uqq fastbook
import fastbook
fastbook.setup_book()
from fastai.vision.all import *
from utils import *

matplotlib.rc('image', cmap='Greys')
Path.Base_path = path

# End sidebar
  - For this initial tutorial we are just going to try to create a model that can classify any image as a 3 or a 7. So let's download a sample of MNIST that contains images of just these digits:

In [None]:
path = untar_data(URLs.MNIST_SAMPLE)

In [None]:
path

In [None]:
# path.ls?
# path.ls??
# doc(path.ls)
path.ls()

In [None]:
(path/'train').ls()

In [None]:
threes = (path/'train'/'3').ls().sorted()
sevens = (path/'train'/'7').ls().sorted()
threes

In [None]:
im3_path = threes[1]
im3 = Image.open(im3_path)
im3

In [None]:
array(im3)[4:10, 4:10]

In [None]:
# tensor: pythorch version numpy array
# teensor can be computed on GPU
tensor(im3)[4:10, 4:10]

In [None]:
im3_t = tensor(im3)
df = pd.DataFrame(im3_t[4:15, 4:22])
df.style.set_properties(**{'font-size': '6pt'}).background_gradient('Greys')

# First try: pixel similarity


In [None]:
three_tensors = [tensor(Image.open(o)) for o in threes]
seven_tensors = [tensor(Image.open(o)) for o in sevens]
len(three_tensors), len(seven_tensors)

In [None]:
show_image(three_tensors[1])

In [None]:
three_tensors[1].shape

In [None]:
type(three_tensors)

In [None]:
stacked_threes = torch.stack(three_tensors).float()/ 255
stacked_sevens = torch.stack(seven_tensors).float()/ 255
stacked_threes.shape, stacked_threes.ndim

In [None]:
mean3 = stacked_threes.mean(0)
show_image(mean3)

In [None]:
a_3 = stacked_threes[1]
show_image(a_3)

In [None]:
mean7 = stacked_sevens.mean(0)
show_image(mean7)

- Take the mean of the absolute value of differences (absolute value is the function that replaces negative values with positive values). This is called the mean absolute difference or L1 norm

-  Take the mean of the square of differences (which makes everything positive) and then take the square root (which undoes the squaring). This is called the root mean squared error (RMSE) or L2 norm.


In [None]:
dist_3_abs = (a_3 - mean3).abs().mean()
dist_3_sqr = ((a_3 - mean3) **2).mean().sqrt()
dist_3_abs, dist_3_sqr

In [None]:
dist_7_abs = (a_3 - mean7).abs().mean()
dist_7_sqr = ((a_3 - mean7)** 2).mean().sqrt()
dist_7_abs, dist_7_sqr

In [None]:
# PyTorch already provides both of these as loss functions. You'll find these inside torch.nn.functional, which the PyTorch team recommends importing as F (and is available by default under that name in fastai):
F.l1_loss(a_3.float(), mean7), F.mse_loss(a_3, mean7).sqrt()

# NumPy Arrays and PyTorch Tensors

In [None]:
data = [[1,2,3], [4,5,6]]
arr = array(data)
tns = tensor(data)

In [None]:
arr # numpy

In [None]:
tns # pytorch

# Computing metrics using broadcasting

In [None]:
valid_3_tens = torch.stack([tensor(Image.open(o)) for o  in (path/'valid'/'3').ls()])

valid_3_tens = valid_3_tens.float()/ 255

valid_7_tens = torch.stack([tensor(Image.open(o)) for o in (path/'valid'/'7').ls()])

valid_7_tens = valid_7_tens.float()/ 255
valid_3_tens.shape, valid_7_tens.shape

In [None]:
def mnist_distance(a, b): return (a- b).abs().mean((-1, -2))
mnist_distance(a_3, mean3)

In [None]:
valid_3_dist = mnist_distance(valid_3_tens, mean3)
valid_3_dist, valid_3_dist.shape

In [None]:
(valid_3_tens-mean3).shape

In [None]:
def is_3(x): return mnist_distance(x,mean3) < mnist_distance(x,mean7)

In [None]:
is_3(a_3), is_3(a_3).float()

In [None]:
is_3(valid_3_tens)

In [None]:
accuracy_3s =      is_3(valid_3_tens).float() .mean()
accuracy_7s = (1 - is_3(valid_7_tens).float()).mean()

accuracy_3s,accuracy_7s,(accuracy_3s+accuracy_7s)/2

# Stochastic Gradient Descent (SGD)


In [None]:
def f(x): return x**2

In [None]:

#id gradient_descent
#caption The gradient descent process
#alt Graph showing the steps for Gradient Descent
gv('''
init->predict->loss->gradient->step->stop
step->predict[label=repeat]
''')

In [None]:
xt = tensor(3.).requires_grad_()

In [None]:
yt = f(xt)
yt

In [None]:
yt.backward()

In [None]:
xt.grad

# Stepping with a learning rate
  - w -= gradient(w) * lr 



## An end-to-end SGD example

In [None]:
time = torch.arange(0, 20).float()
time

In [None]:
speed = torch.rand(20)* 3 + 0.75* (time- 9.5) **2 +1
plt.scatter(time, speed)

In [None]:
def f(t, params): 
  a, b, c = params
  return a*(t**2) + (b*t) + c

In [None]:
def mse(preds, targets): return ((preds- targets)**2).mean()

In [None]:
params = torch.randn(3).requires_grad_()

In [None]:
orig_params = params.clone()

In [None]:
preds = f(time, params)

In [None]:
def show_preds(preds, ax=None):
    if ax is None: ax=plt.subplots()[1]
    ax.scatter(time, speed)
    ax.scatter(time, to_np(preds), color='red')
    ax.set_ylim(-300,100)

In [None]:
show_preds(preds)

In [None]:
loss = mse(preds, speed)
loss

In [None]:
loss.backward()
params.grad

In [None]:
params.grad * 1e-5

In [None]:
params

In [None]:
lr = 1e-5
params.data-= lr* params.grad.data
params.grad = None

In [None]:
preds = f(time,params)
mse(preds, speed)

In [None]:
show_preds(preds)

In [None]:

def apply_step(params, prn=True):
    preds = f(time, params)
    loss = mse(preds, speed)
    loss.backward()
    params.data -= lr * params.grad.data
    params.grad = None
    if prn: print(loss.item())
    return preds

In [None]:
for i in range(10): apply_step(params)

In [None]:
params = orig_params.detach().requires_grad_()

In [None]:
_,axs = plt.subplots(1,4,figsize=(12,3))
for ax in axs: show_preds(apply_step(params, False), ax)
plt.tight_layout()

# MNIST loss function