In [1]:
import torch.nn as nn
import numpy as np
import torch
import torch.nn.functional as F
from torch import optim

from torch.utils.data import DataLoader, TensorDataset

import gzip
import pickle

In [2]:
path = "C:\Apps\Masters_SJSU\Pytorch Learning\data\mnist\mnist.pkl.gz"
with gzip.open((path), "rb") as f:
        ((x_train, y_train), (x_valid, y_valid), _) = pickle.load(f, encoding="latin-1")

In [3]:
x_train, y_train, x_valid, y_valid = map(torch.tensor, (x_train, y_train, x_valid, y_valid))
n, c = x_train.shape
print(x_train, y_train)
print(x_train.shape)
print(y_train.min(), y_train.max())

train_ds = TensorDataset(x_train, y_train)
valid_ds = TensorDataset(x_valid, y_valid)

tensor([[0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        ...,
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.]]) tensor([5, 0, 4,  ..., 8, 4, 8])
torch.Size([50000, 784])
tensor(0) tensor(9)


## Data Loader

In [4]:
bs = 28
def get_data(train_ds, valid_ds, bs):
    return (
        DataLoader(train_ds, batch_size=bs, shuffle=True),
        DataLoader(valid_ds, batch_size=bs * 2),
    )

## Model Class

In [5]:
class Classifier_example1(nn.Module):
    def __init__(self):
        super().__init__()
        self.layer1 = nn.Linear(784, 10)
    
    def forward(self, x):
        return self.layer1(x)

## Fit and loss Method

In [6]:
def loss_batch(model, loss_func, xb, yb, opt=None):
    loss = loss_func(model(xb), yb)

    if opt is not None:
        loss.backward()
        opt.step()
        opt.zero_grad()
    
    return loss.item(), len(xb)

def fit(epochs, model, loss_func, opt, train_dl, valid_dl):
    """
    """
    for epoch in range(epochs):
        model.train()
        for xb, yb in train_dl:
            loss_batch(model, loss_func, xb, yb, opt)
        
        model.eval()
        with torch.no_grad():
            losses, nums = zip(*[loss_batch(model, loss_func, xb, yb)  for xb, yb in valid_dl])
        val_loss = np.sum(np.multiply(losses, nums)) / np.sum(nums)
    
        print(epoch, val_loss)

In [7]:
epochs = 50
loss_func = F.cross_entropy

model = Classifier_example1()

## Optimizer
opt = optim.SGD(model.parameters(), lr=0.001)

In [8]:
train_dl, valid_dl = get_data(train_ds, valid_ds, bs)
fit(epochs, model, loss_func, opt, train_dl, valid_dl)

0 1.2250220235347749
1 0.8907780708312988
2 0.7387329825401306
3 0.6519607435703277
4 0.5954432370781898
5 0.5554084719657898
6 0.5252411779642105
7 0.5018328042030334
8 0.48295313597917555
9 0.4674892063260078
10 0.45425702884197233
11 0.4429820591211319
12 0.4332440322756767
13 0.4247190747141838
14 0.41709169782996175
15 0.41044636672735213
16 0.40425404329895975
17 0.3988062624037266
18 0.39367695026397703
19 0.38923819496631623
20 0.38502326833605766
21 0.38112844671607016
22 0.3774125487148762
23 0.3741729298532009
24 0.3709679581046104
25 0.3680733942449093
26 0.36535028819441795
27 0.36287801557183264
28 0.3602853139638901
29 0.3580113174676895
30 0.355868393266201
31 0.353768489831686
32 0.3517802198469639
33 0.34992119819521905
34 0.3481344521522522
35 0.3464404126048088
36 0.34484532029628756
37 0.3433127926617861
38 0.3416943694323301
39 0.3402946888655424
40 0.3389296435326338
41 0.3376388915210962
42 0.33641643644273284
43 0.33510592546761037
44 0.33391935338377954
45 0.3

## Using GPU

In [9]:
dev = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
dev

device(type='cuda')

In [10]:
class WrappedDataLoader:
    def __init__(self, dl, func):
        self.dl = dl
        self.func = func

    def __len__(self):
        return len(self.dl)

    def __iter__(self):
        batches = iter(self.dl)
        for b in batches:
            yield (self.func(*b))

def preprocess(x, y):
    return x.to(dev), y.to(dev)
    # return x.view(-1, 1, 28, 28).to(dev), y.to(dev)


train_dl, valid_dl = get_data(train_ds, valid_ds, bs)
train_dl = WrappedDataLoader(train_dl, preprocess)
valid_dl = WrappedDataLoader(valid_dl, preprocess)

In [11]:
model.to(dev)
opt = optim.SGD(model.parameters(), lr=0.001, momentum=0.9)

In [12]:
fit(epochs, model, loss_func, opt, train_dl, valid_dl)

0 0.3208117388278246
1 0.3141814857572317
2 0.3089945286005735
3 0.3041380188524723
4 0.3012416485697031
5 0.2969713618338108
6 0.2951829902797937
7 0.29212125577032566
8 0.29039443618655203
9 0.28834391021430494
10 0.2869814682364464
11 0.2850842940688133
12 0.284028061529994
13 0.282780534312129
14 0.2818161723047495
15 0.28095057300031184
16 0.27959725555479525
17 0.27929387919902804
18 0.278230649086833
19 0.2771048096626997
20 0.2758956632733345
21 0.2752242734208703
22 0.27483449535518883
23 0.27461204688847063
24 0.2739299431934953
25 0.2728469856709242
26 0.27301055262982843
27 0.2720826304793358
28 0.27133890598118304
29 0.2711991373896599
30 0.2706900473147631
31 0.2697613130822778
32 0.269638333798945
33 0.26954908851087095
34 0.26912094976753
35 0.26837810919582844
36 0.2683515486612916
37 0.2675577922061086
38 0.26751021286845206
39 0.26783263971954585
40 0.267152568218112
41 0.267114639146626
42 0.2661290294662118
43 0.2665175373136997
44 0.2656302153155208
45 0.265544389