Setup:

In [1]:
%pip install -Uqq fastai
from fastai.vision.all import *

Note: you may need to restart the kernel to use updated packages.


Import MNIST sample that contains samples of all numbers from 0 to 9:

In [2]:
path = untar_data(URLs.MNIST)
Path.BASE_PATH = path
(path/'testing').ls()

(#10) [Path('testing/9'),Path('testing/0'),Path('testing/7'),Path('testing/6'),Path('testing/1'),Path('testing/8'),Path('testing/4'),Path('testing/3'),Path('testing/2'),Path('testing/5')]

Create dictionary `digits` and `digits_training` of all digits in MNIST sample:

In [3]:
import os
dir = os.listdir(path/'training')
print((path/'testing'/'9').ls().sorted())

[Path('testing/9/1000.png'), Path('testing/9/1005.png'), Path('testing/9/1013.png'), Path('testing/9/104.png'), Path('testing/9/1045.png'), Path('testing/9/1048.png'), Path('testing/9/105.png'), Path('testing/9/1058.png'), Path('testing/9/1063.png'), Path('testing/9/108.png'), Path('testing/9/1081.png'), Path('testing/9/1086.png'), Path('testing/9/1088.png'), Path('testing/9/1090.png'), Path('testing/9/1103.png'), Path('testing/9/1105.png'), Path('testing/9/1107.png'), Path('testing/9/113.png'), Path('testing/9/1130.png'), Path('testing/9/1152.png'), Path('testing/9/1165.png'), Path('testing/9/118.png'), Path('testing/9/1183.png'), Path('testing/9/1192.png'), Path('testing/9/12.png'), Path('testing/9/1217.png'), Path('testing/9/1228.png'), Path('testing/9/1232.png'), Path('testing/9/1247.png'), Path('testing/9/125.png'), Path('testing/9/1255.png'), Path('testing/9/1277.png'), Path('testing/9/1282.png'), Path('testing/9/1304.png'), Path('testing/9/1308.png'), Path('testing/9/1309.png'),

In [4]:
import os

dir = os.listdir(path/'training')
dir_valid = os.listdir(path/'testing')

digits = {}
digits['larger_group'] = []
digits['smaller_group'] = []
for digit in dir:
    if digit in ['9', '4', '7', '1']:
        digits['smaller_group'] += (path/'testing'/digit).ls().sorted()
    else:
        digits['larger_group'] += (path/'testing'/digit).ls().sorted()

    
digits_valid = {}
digits_valid['larger_group'] = []
digits_valid['smaller_group'] = []
for digit in dir_valid:
    if digit in ['9', '4', '7', '1']:
        digits_valid['smaller_group'] += (path/'testing'/digit).ls().sorted()
    else:
        digits_valid['larger_group'] += (path/'testing'/digit).ls().sorted()

**Organize all training data:**

Transform data in `digits` elements into tuples containing stacked tensors and the amount of images:

In [5]:
for name, data in digits.items():
    digit_tensor = [tensor(Image.open(o)) for o in data]
    stacked = torch.stack(digit_tensor).float()/255
    digits[name] = (stacked, len(data))

Create training set `train_x` containing each digit in the set:

In [6]:
train_x = torch.cat([i[0] for i in digits.values()]).view(-1, 28*28)

Create training set `train_y` containing the amount of each digit in the set:

In [7]:
train_y = tensor([0]*digits['smaller_group'][1] + [1]*digits['larger_group'][1]).unsqueeze(1)
train_x.shape,train_y.shape

(torch.Size([10000, 784]), torch.Size([10000, 1]))

Create dataset of `train_x` and `train_y`:

In [8]:
dset = list(zip(train_x,train_y))

Create a `DataLoader` from `Dataset`:

In [9]:
dl = DataLoader(dset, batch_size=1024)
xb,yb = first(dl)
xb.shape,yb.shape

(torch.Size([1024, 784]), torch.Size([1024, 1]))

**Organize all validation data:**

Transform data in `digits_valid` elements into tuples containing stacked tensors and the amount of images:

In [10]:
for digit, data in digits_valid.items():
    digit_tensor = [tensor(Image.open(o)) for o in data]
    stacked_digit = torch.stack(digit_tensor).float()/255
    digits_valid[digit] = (stacked_digit, len(data))

Create training set `valid_x` containing each digit in the set:

In [11]:
valid_x = torch.cat([i[0] for i in digits_valid.values()]).view(-1, 28*28)

Create training set `valid_y` containing the amount of each digit in the set:

In [12]:
valid_y = tensor([0]*digits['smaller_group'][1] + [1]*digits['larger_group'][1]).unsqueeze(1)
valid_x.shape,valid_y.shape

(torch.Size([10000, 784]), torch.Size([10000, 1]))

Create dataset of `valid_x` and `valid_y`:

In [13]:
valid_dset = list(zip(valid_x,valid_y))

Create a `DataLoader` from `Dataset`:

In [14]:
valid_dl = DataLoader(valid_dset, batch_size=1024)

**Create dataloader of both training and validation data:**

In [15]:
dls = DataLoaders(dl, valid_dl)

**Training the model:**

Create model to be trained:

In [16]:
from learning_functions import NEURAL_NET_STRUCTURE, mnist_loss, batch_accuracy

learn = Learner(dls, NEURAL_NET_STRUCTURE, opt_func=SGD, loss_func=mnist_loss, metrics=batch_accuracy)

Train model:

In [17]:
learn.fit(100, 1)
learn.fit(100, 5)
learn.fit(100, 10)
learn.fit(100, 25)

epoch,train_loss,valid_loss,batch_accuracy,time
0,0.610282,0.478883,0.6366,00:00
1,0.587465,0.422071,0.7273,00:00
2,0.517599,0.329384,0.6763,00:00
3,0.439042,0.298049,0.7034,00:00
4,0.388816,0.258522,0.7457,00:00
5,0.356546,0.246724,0.755,00:00
6,0.331504,0.235915,0.7664,00:00
7,0.310916,0.224712,0.7758,00:00
8,0.293397,0.213703,0.7889,00:01
9,0.278399,0.205648,0.7977,00:00


epoch,train_loss,valid_loss,batch_accuracy,time
0,0.164024,0.240326,0.7602,00:00
1,0.174817,0.213364,0.7875,00:00
2,0.175923,0.207142,0.7924,00:00
3,0.176645,0.193322,0.8076,00:00
4,0.174174,0.19202,0.8088,00:00
5,0.177087,0.186803,0.8144,00:00
6,0.175661,0.186252,0.8147,00:00
7,0.175073,0.18242,0.8188,00:00
8,0.174288,0.181404,0.8195,00:00
9,0.173881,0.180408,0.821,00:00


epoch,train_loss,valid_loss,batch_accuracy,time
0,0.095569,0.135862,0.8637,00:00
1,0.097534,0.204945,0.7957,00:00
2,0.101203,0.157108,0.8426,00:00
3,0.100045,0.187883,0.8127,00:00
4,0.106131,0.207708,0.7912,00:00
5,0.111062,0.161905,0.8388,00:00
6,0.109807,0.14988,0.8505,00:00
7,0.107847,0.143392,0.8574,00:00
8,0.107131,0.147099,0.8532,00:00
9,0.106408,0.163432,0.8374,00:00


epoch,train_loss,valid_loss,batch_accuracy,time
0,0.046483,0.096489,0.9045,00:00
1,0.071978,0.229341,0.7711,00:00
2,0.091346,0.291177,0.7082,00:00
3,0.122171,0.312707,0.6872,00:00
4,0.130929,0.221244,0.7783,00:00
5,0.126239,0.222153,0.776,00:00
6,0.123888,0.237283,0.7623,00:00
7,0.12673,0.286601,0.7134,00:00
8,0.139783,0.202964,0.798,00:00
9,0.163947,0.307228,0.6928,00:00


Export first model:

In [24]:
learn.export('initial_model.pkl')

AttributeError: 'list' object has no attribute 'new_empty'