# Model training
This use-case is model training.  
Previous part is the pipeline building and is taken without comments.  
For more detailed description of it see Pipeline building example.

In [1]:
import cascade.data as cdd
import cascade.models as cdm
import cascade.utils as cdu

import tqdm
import torch
import torchvision
from torchvision.transforms import functional as F
from torch import nn

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
MNIST_ROOT = 'data'
INPUT_SIZE = 784
BATCH_SIZE = 10

In [3]:
class NoiseModifier(cdd.Modifier):
    def __getitem__(self, index):
        img, label = self._dataset[index] # get the data from Wrapper, which is _dataset for this Modifier
        img += torch.rand_like(img) * 0.1 # apply random noise with fixed magnitude
        img = torch.clip(img, 0, 255)
        return img, label


train_ds = torchvision.datasets.MNIST(root=MNIST_ROOT,
                                     train=True, 
                                     transform=F.to_tensor,
                                     download=True)
test_ds = torchvision.datasets.MNIST(root=MNIST_ROOT, 
                                    train=False, 
                                    transform=F.to_tensor)

train_ds = cdd.Wrapper(train_ds, 
    meta_prefix={
        'desc': 'This is MNIST dataset of handwritten images, TRAIN PART'
    })

test_ds = cdd.Wrapper(test_ds,
    meta_prefix={
        'desc': 'This is MNIST dataset of handwritten images, TEST PART'
    })

train_ds = cdd.CyclicSampler(NoiseModifier(train_ds), 200)
test_ds = NoiseModifier(test_ds)

train_dl = torch.utils.data.DataLoader(dataset=train_ds, 
                                       batch_size=BATCH_SIZE,
                                       shuffle=True)
test_dl = torch.utils.data.DataLoader(dataset=test_ds,
                                      batch_size=BATCH_SIZE,
                                      shuffle=False)

In [4]:
train_ds.get_meta()

[{'name': 'cascade.data.cyclic_sampler.CyclicSampler',
  'type': 'dataset',
  'len': 200},
 {'name': '__main__.NoiseModifier', 'type': 'dataset', 'len': 60000},
 {'name': 'cascade.data.dataset.Wrapper',
  'desc': 'This is MNIST dataset of handwritten images, TRAIN PART',
  'type': 'dataset',
  'len': 60000,
  'obj_type': torchvision.datasets.mnist.MNIST}]

In [65]:
class SimpleNN(nn.Module):
    def __init__(self, input_size, hidden_size, num_classes, *args, **kwargs):
        super().__init__()

        self.input_size = input_size
        self.hidden_size = hidden_size
        self.l1 = nn.Linear(input_size, hidden_size)
        self.l2 = nn.Linear(hidden_size, num_classes)
        self.relu = nn.ReLU()

    def forward(self, y):
         out = self.l1(y)
         out = self.relu(out)
         out = self.l2(out)

         return out


class Classifier(cdu.TorchModel):
    criterion = nn.CrossEntropyLoss()
    optim = torch.optim.Adam(self._model.parameters(), lr=lr)

    ds_size = len(train_dl)
    for epoch in range(num_epochs):
        for x, (imgs, labels) in enumerate(train_dl): 
            imgs = imgs.reshape(-1, self._model.input_size)

            out = self._model(imgs)
            loss = criterion(out, labels)

            optim.zero_grad()
            loss.backward()
            optim.step() 

            if x % 10 == 0:
                print (f'Epochs [{epoch}/{num_epochs}], Step[{x}/{ds_size}], Loss: {loss.item():.4f}')
    

In [68]:
NUM_EPOCHS = 2
LR = 1e-3

model = TorchModel(SimpleModel, INPUT_SIZE, 100, 10,
    num_epochs=NUM_EPOCHS,
    lr=LR, 
    bs=BATCH_SIZE)
model.fit(train_dl, NUM_EPOCHS, LR)

TypeError: __init__() takes from 1 to 2 positional arguments but 4 were given

In [None]:
model.evaluate(test_dl)

In [60]:
model.get_meta()

[{'name': '<__main__.SpecificWrapper object at 0x00000184DBE17BB0>',
  'created_at': DateTime(2022, 7, 24, 20, 25, 25, 794859, tzinfo=Timezone('UTC')),
  'metrics': {},
  'params': {'hidden_size': 100, 'num_epochs': 2, 'lr': 0.001, 'bs': 10},
  'type': 'model'}]