In [None]:
import torch, torch.nn as nn, numpy as np
from torch import tensor
from itertools import islice

In [2]:
import torch, torch.nn as nn, pandas as pd, numpy as np, matplotlib.pyplot as plt, matplotlib as mpl
import torch.nn.functional as F, random
from torch import tensor, optim
from functools import reduce
from itertools import islice

mpl.rcParams['image.cmap'] = 'gray'
torch.set_printoptions(precision=3, linewidth=100)

In [2]:
data = pd.read_csv('data/fashion_mnist/train.csv')

In [3]:
X_train = tensor(data.iloc[:50000, 1:].values)
y_train = tensor(data.iloc[:50000, 0].values)
X_valid = tensor(data.iloc[50000:, 1:].values)
y_valid = tensor(data.iloc[50000:, 0].values)

In [4]:
# X_train = X_train/255. - 0.5; X_valid = X_valid/255. - 0.5

<br>**Dataset and Dataloader**

In [5]:
class Dataset():
    def __init__(self, x, y): self.x = x; self.y = y
    def __getitem__(self, idx): return (self.x[idx], self.y[idx])
    def __len__(self): return len(self.y)

In [6]:
dset_train = Dataset(X_train, y_train)
dset_valid = Dataset(X_valid, y_valid)

In [7]:
len(dset_train), len(dset_valid)

(50000, 10000)

In [8]:
class Sampler():
    def __init__(self, dset, shuffle=False): self.n = len(dset); self.shuffle = shuffle
    def __iter__(self): 
        idx_list = list(range(self.n))
        if self.shuffle: random.shuffle(idx_list)
        return iter(idx_list)

class BatchSampler():
    def __init__(self, sampler, bs): self.sampler = sampler; self.bs = bs
    def __iter__(self): 
        it = iter(self.sampler)
        has_last_batch = 0 if self.sampler.n%self.bs == 0 else 1
        for i in range(self.sampler.n//self.bs): yield list(islice(it, self.bs))
        if has_last_batch: yield list(it)

In [9]:
train_samp = BatchSampler(sampler=Sampler(dset_train, True), bs=64)
valid_samp = BatchSampler(sampler=Sampler(dset_valid, False), bs=len(dset_valid))

In [10]:
class DataLoader():
    def __init__(self, dset, sampler): self.dset = dset; self.sampler = sampler
    def __iter__(self): 
        for batch in iter(self.sampler): yield self.dset[batch]

In [38]:
#dl_train = DataLoader(dset_train, train_samp)
#dl_valid = DataLoader(dset_valid, valid_samp)

In [40]:
dl_train = torch.utils.data.DataLoader(dset_train, batch_size=256, shuffle=True)
dl_valid = torch.utils.data.DataLoader(dset_valid, batch_size=len(dset_valid), shuffle=False)

<br><br> **Model class**

In [None]:
class MyModule:
    def __init__(self):
        self._modules = {}
    def __setattr__(self, k, v):
        if k[0] != '_': self._modules[k] = v
        super().__setattr__(k, v)
    def __repr__(self):
        return f'{self._modules}'
    def parameters(self):
        for l in self._modules.values(): yield from l.parameters()

In [None]:
class SequentialModel(nn.Module):
    def __init__(self, layers):
        super().__init__()
        self.layers = nn.ModuleList(layers)
    def forward(self, x):
        return reduce(lambda val, layer: layer(val), self.layers, x)

In [None]:
class Model(nn.Module):
    def __init__(self, layers):
        super().__init__()
        self.layers = nn.Sequential(*layers)
    def forward(self, x):
        return self.layers(x)

In [None]:
class Optimizer():
    def __init__(self, params, lr=1e-3): self.params = list(params); self.lr = lr
    def step(self):
        with torch.no_grad():
            for p in self.params: p -= p.grad * self.lr
    def zero_grad(self):
        for p in self.params: p.grad.data.zero_()

In [46]:
model = nn.Sequential(nn.Linear(784, 1024), nn.ReLU(), nn.Linear(1024, 50), nn.ReLU(), nn.Linear(50, 10))

In [47]:
loss_func = F.cross_entropy

In [48]:
batch = next(iter(dl_train))
X_b = batch[0]; y_b = batch[1]

In [49]:
with torch.no_grad():
    preds = model(X_b)
    print((preds.argmax(dim=1) == y_b).sum(), len(preds))

tensor(31) 256


In [51]:
bs = 200
epochs = 20
opt = optim.SGD(model.parameters(), lr=5e-3)

for epoch in range(epochs):
    # Loop through batches
    loss_sum, cnt = 0, 0
    for X_b, y_b in dl_train:
        # Get the gradients by calculating the loss
        preds = model(X_b)
        loss = loss_func(preds, y_b)
        loss.backward()
        # For model evaluation
        with torch.no_grad(): 
            loss_sum += loss.item(); cnt += 1
        # Update the weights
        opt.step()
        opt.zero_grad()
    # Evaluate the model
    if epoch%1 == 0: 
        train_loss = loss_sum/cnt; loss_sum, cnt = 0, 0; correct_cnt, all_cnt = 0, 0
        for X_b, y_b in dl_valid:
            with torch.no_grad():
                preds = model(X_b)
                loss_sum += loss_func(preds, y_b).item(); cnt += 1
                correct_cnt = (preds.argmax(dim=1) == y_b).sum(); all_cnt += len(y_b)
        valid_loss = loss_sum/cnt 
        accuracy = correct_cnt/all_cnt
        print(f'{train_loss:.4f}\t{valid_loss:.4f}\t{accuracy:.4f}')

0.7128	0.6988	0.7483
0.6882	0.6775	0.7555
0.6669	0.6592	0.7578
0.6499	0.6431	0.7660
0.6343	0.6286	0.7685
0.6202	0.6161	0.7723
0.6079	0.6041	0.7770
0.5967	0.5946	0.7848
0.5865	0.5847	0.7867
0.5767	0.5768	0.7902
0.5681	0.5686	0.7910
0.5594	0.5607	0.7957
0.5516	0.5532	0.7986
0.5449	0.5462	0.8013
0.5376	0.5414	0.8034
0.5310	0.5332	0.8068
0.5254	0.5293	0.8079
0.5193	0.5228	0.8109
0.5142	0.5173	0.8141
0.5092	0.5128	0.8168


In [3]:
def train(epochs, lr, opt, model, loss_func, dl_train, dl_valid, eval_freq=1):
    for epoch in range(epochs):
        # Loop through batches
        loss_sum, cnt = 0, 0
        for X_b, y_b in dl_train:
            # Get the gradients by calculating the loss
            preds = model(X_b)
            loss = loss_func(preds, y_b)
            loss.backward()
            # For model evaluation
            with torch.no_grad(): 
                loss_sum += loss.item(); cnt += 1
            # Update the weights
            opt.step()
            opt.zero_grad()
        # Evaluate the model
        if epoch%eval_freq == 0: 
            train_loss = loss_sum/cnt; loss_sum, cnt = 0, 0; correct_cnt, all_cnt = 0, 0
            for X_b, y_b in dl_valid:
                with torch.no_grad():
                    preds = model(X_b)
                    loss_sum += loss_func(preds, y_b).item(); cnt += 1
                    correct_cnt = (preds.argmax(dim=1) == y_b).sum(); all_cnt += len(y_b)
            valid_loss = loss_sum/cnt 
            accuracy = correct_cnt/all_cnt
            print(f'{train_loss:.4f}\t{valid_loss:.4f}\t{accuracy:.4f}')