In [None]:
import gzip, pickle
from torch import tensor

In [None]:
from torch.functional import F

In [None]:
from torch import nn
import torch

In [None]:
def get_data():
    # path = datasets.download_data(MNIST_URL, ext='.gz')
    path = '/content/mnist.pkl.gz'
    with gzip.open(path, 'rb') as f:
        ((x_train, y_train), (x_valid, y_valid), _) = pickle.load(f, encoding='latin-1')
    return map(tensor, (x_train,y_train,x_valid,y_valid))

In [None]:
x_train, y_train, x_valid, y_valid = get_data()
nh, bs = 50, 512
c = y_train.max().item()+1
loss_func = F.cross_entropy

In [None]:
class DummyModule():
    def __init__(self, n_in, nh, n_out):
        self._modules = {}
        self.l1 = nn.Linear(n_in,nh)
        self.l2 = nn.Linear(nh,n_out)
        
    def __setattr__(self,k,v):
        if not k.startswith("_"): self._modules[k] = v
        super().__setattr__(k,v)
        
    def __repr__(self): return f'{self._modules}'
    
    def parameters(self):
        for l in self._modules.values():
            for p in l.parameters(): yield p

    def __call__(self, x): return self.l2(F.relu(self.l1(x)))

    def zero_grad(self):
        for p in self.parameters():
            p.grad.data.zero_()

In [None]:
def fit():
    for epoch in range(epochs):
        for i in range((n-1)//bs + 1):
            start_i = i*bs
            end_i = start_i+bs
            xb = x_train[start_i:end_i]
            yb = y_train[start_i:end_i]
            loss = loss_func(model(xb), yb)

            loss.backward()
            with torch.no_grad():
                for p in model.parameters(): p -= p.grad * lr
            model.zero_grad()

In [None]:
epochs, n, lr = 1, x_train.shape[0], 0.5
model = DummyModule(x_train.shape[1], nh, 10)
fit()

---

In [None]:
#  A8
class Optimizer():
    def __init__(self, params, lr): self.params, self.lr = list(params), lr
    def zero_grad(self):
        for p in self.params:
            p.grad.data.zero_()

    def step(self):
        with torch.no_grad():
            for p in self.params:
                p -= p.grad * self.lr

def fit():
    for epoch in range(epochs):
        for i in range((n-1)//bs + 1):
            start_i = i*bs
            end_i = start_i+bs
            xb = x_train[start_i:end_i]
            yb = y_train[start_i:end_i]
            loss = loss_func(model(xb), yb)

            loss.backward()
            opt.step()
            opt.zero_grad()

In [None]:
model = nn.Sequential(nn.Linear(x_train.shape[1], nh), nn.ReLU(), nn.Linear(nh, c))
# accuracy-before train
print((model(x_train[:bs]).max(-1).indices == y_train[:bs]).sum()/ bs)
opt = Optimizer(model.parameters(), 0.9)
fit()
print((model(x_train[:bs]).max(-1).indices == y_train[:bs]).sum()/ bs)

tensor(0.1250)
tensor(0.9062)


In [None]:
from torch import optim
def get_model(model_func, lr=0.9):
    model = nn.Sequential(*model_func())
    return model, optim.SGD(model.parameters(), lr=lr)

In [None]:
def get_layers():
    return nn.Linear(x_train.shape[1], nh), nn.ReLU(), nn.Linear(nh, c)

In [None]:
epochs = 100
model, opt = get_model(get_layers, lr = 0.001)
print((model(x_train[:bs]).max(-1).indices == y_train[:bs]).sum()/ bs)
fit()
print((model(x_train[:bs]).max(-1).indices == y_train[:bs]).sum()/ bs)

tensor(0.0586)
tensor(0.8574)


----

10
```python3
class Dataset():
    def __init__(self, x, y): self.x, self.y = x, y
    def __len__(self): return len(self.x)
    def __getitem__(self, i): return self.x[i], self.y[i]

train_ds = Dataset(x_train, y_train)
for epoch in range(epochs):
    for i in range((n-1)//bs +1):
        xb, yb = train_ds[i*bs:(i+1)*bs]
        pred = model(xb)
        loss = loss_func(pred, yb)

        loss.backward()
        opt.step()
        opt.zero_grad()
model = nn.Sequential(nn.Linear(x_train.shape[1], nh), nn.ReLU(), nn.Linear(nh, c))
loss,acc = loss_func(model(xb), yb), accuracy(model(xb), yb)
```


A11
```python3
class DataLoader():
    def __init__(self, ds, bs): self.ds, self.bs = ds, bs
    def __iter__(self):
        for i in range(0, len(self.ds), self.bs): yield self.ds[i:i+self.bs]

loss_func = F.cross_entropy
model, opt = get_model(get_layers, lr = 0.001)
train_dl = DataLoader(train_ds, bs)

for epoch in range(epochs):
    for xb, yb in train_dl:
        pred = model(xb)
        loss = loss_func(pred, yb)
        opt.step()
        opt.zero_grad()
```

In [45]:
class Sampler():
    def __init__(self, ds, bs, shuffle=False):
        self.n, self.bs, self.shuffle = len(ds), bs, shuffle
    def __iter__(self):
        self.idxs = torch.randperm(self.n) if self.shuffle else torch.arange(self.n)
        for i in range(0, self.n, self.bs): yield self.idxs[i:i+self.bs]

small_ds = Dataset(*train_ds[:50])
os = Sampler(small_ds, 10, True)
[o for o in os]

os = Sampler(small_ds, 10, True)
[o for o in os]

def collate(batch):
    # ipdb.set_trace()
    xs, ys = zip(*batch)
    return torch.stack(xs), torch.stack(ys)

class DataLoader():
    def __init__(self, ds, sampler, collate_fn = collate):
        self.ds, self.sampler, self.collate_fn = ds, sampler, collate_fn
    def __iter__(self):
        # ipdb.set_trace()
        for s in self.sampler: yield self.collate_fn([self.ds[i] for i in s])        

In [46]:
train_samp = Sampler(small_ds, bs, shuffle=True)
train_dl = DataLoader(small_ds, sampler=train_samp, collate_fn=collate)
next(iter(train_dl))

# unpacking test list
test_list = [('x1', 'y1'), ('x2', 'y2'), ('x3', 'y2')]
list(zip(*test_list))
for i in zip(*test_list):
    print(i)
    break

(tensor([[0., 0., 0.,  ..., 0., 0., 0.],
         [0., 0., 0.,  ..., 0., 0., 0.],
         [0., 0., 0.,  ..., 0., 0., 0.],
         ...,
         [0., 0., 0.,  ..., 0., 0., 0.],
         [0., 0., 0.,  ..., 0., 0., 0.],
         [0., 0., 0.,  ..., 0., 0., 0.]]),
 tensor([4, 2, 8, 9, 9, 5, 4, 9, 6, 3, 5, 5, 1, 5, 7, 4, 0, 8, 3, 1, 2, 1, 3, 0,
         3, 0, 3, 7, 6, 1, 8, 1, 6, 6, 0, 2, 3, 9, 9, 6, 2, 1, 8, 9, 4, 7, 9, 3,
         7, 1]))