In [1]:
import torch
from torch import nn
import pytorch_lightning as pl
from torch.utils.data import DataLoader, random_split
from torch.nn import functional as F
from torchvision.datasets import MNIST
from torchvision import datasets, transforms
import os

class LightningMNISTClassifier(pl.LightningModule):

    def __init__(self):
        super(LightningMNISTClassifier, self).__init__()

        self.enable = False

        # mnist images are (1, 28, 28) (channels, width, height) 
        self.layer_1 = torch.nn.Linear(28 * 28, 128)
        self.layer_2 = torch.nn.Linear(128, 256)
        self.layer_3 = torch.nn.Linear(256, 10)

    def forward(self, x):
        batch_size, channels, width, height = x.size()

        # (b, 1, 28, 28) -> (b, 1*28*28)
        x = x.view(batch_size, -1)

        # layer 1 (b, 1*28*28) -> (b, 128)
        x = self.layer_1(x)
        x = torch.relu(x)

        # layer 2 (b, 128) -> (b, 256)
        x = self.layer_2(x)
        x = torch.relu(x)

        # layer 3 (b, 256) -> (b, 10)
        x = self.layer_3(x)

        # probability distribution over labels
        x = torch.log_softmax(x, dim=1)

        return x

    def cross_entropy_loss(self, logits, labels):
        return F.nll_loss(logits, labels)

    def training_step(self, train_batch, batch_idx):
        x, y = train_batch
        logits = self.forward(x)
        loss = self.cross_entropy_loss(logits, y)

        logs = {'train_loss': loss}
        return {'loss': loss, 'log': logs}

    def test_step(self, test_batch, batch_idx):
        x, y = test_batch
        logits = self.forward(x)
        loss = self.cross_entropy_loss(logits, y)
        logs = {'test_loss': loss}
        return {'test_loss': loss, 'log': logs}

    def validation_step(self, val_batch, batch_idx):
        x, y = val_batch
        logits = self.forward(x)
        loss = self.cross_entropy_loss(logits, y)
        return {'val_loss': loss}

    def validation_epoch_end(self, outputs):
        # called at the end of the validation epoch
        # outputs is an array with what you returned in validation_step for each batch
        # outputs = [{'loss': batch_0_loss}, {'loss': batch_1_loss}, ..., {'loss': batch_n_loss}] 
        avg_loss = torch.stack([x['val_loss'] for x in outputs]).mean()
        tensorboard_logs = {'val_loss': avg_loss}
        return {'avg_val_loss': avg_loss, 'log': tensorboard_logs}

    def test_epoch_end(self, outputs):
        # called at the end of the validation epoch
        # outputs is an array with what you returned in validation_step for each batch
        # outputs = [{'loss': batch_0_loss}, {'loss': batch_1_loss}, ..., {'loss': batch_n_loss}] 
        avg_loss = torch.stack([x['test_loss'] for x in outputs]).mean()
        tensorboard_logs = {'test_loss': avg_loss}
        return {'avg_test_loss': avg_loss, 'log': tensorboard_logs}

    def prepare_data(self):
        # transforms for images
        transform=transforms.Compose([transforms.ToTensor(), 
                                    transforms.Normalize((0.1307,), (0.3081,))])
        
        # prepare transforms standard to MNIST
        mnist_train = MNIST(os.getcwd(), train=True, download=True, transform=transform)
        mnist_test = MNIST(os.getcwd(), train=False, download=True, transform=transform)
        
        self.mnist_train, self.mnist_val = random_split(mnist_train, [55000, 5000])
        self.mnist_test = mnist_test

    def train_dataloader(self):
        return DataLoader(self.mnist_train, batch_size=64)

    def val_dataloader(self):
        return DataLoader(self.mnist_val, batch_size=64)

    def test_dataloader(self):
        return DataLoader(self.mnist_test, batch_size=64)

    def configure_optimizers(self):
        optimizer = torch.optim.Adam(self.parameters(), lr=1e-3)
        return optimizer

# train
#model = LightningMNISTClassifier()
#trainer = pl.Trainer(gpus=1, max_epochs=1)

#trainer.fit(model)

#print(trainer.test(model))

#model.layer_1 = torch.nn.Sequential(model.layer_1, torch.nn.Softmax())
#print(trainer.test(model))

# trainer.test() does not work if we change the model
#for i, test_batch in enumerate(model.test_dataloader()):
#    x, y = test_batch
#    logits = model.forward(x.to(model.device))
#    loss = model.cross_entropy_loss(logits, y.to(model.device))
#    print({'test_loss': loss})

In [12]:
# testing callbacks for new implementation of fault injection

import copy

import torch
import pytorch_lightning as pl

class A(torch.nn.Module):
    def forward(self, x):
        x[0] = 1000
        return x

class Callback(pl.Callback):
    def on_test_start(self, trainer, pl_module):
        pl_module.layer_1 = torch.nn.Sequential(pl_module.layer_1, A())
        #print('test start', pl_module.layer_1)

    def on_test_end(self, trainer, pl_module):
        #print('test end', pl_module.layer_1)
        pl_module.layer_1 = pl_module.layer_1[0]


model = LightningMNISTClassifier()
trainer1 = pl.Trainer(max_epochs=1, callbacks=[Callback()])
trainer2 = pl.Trainer(max_epochs=1)

print(trainer2.test(model))

trainer2.fit(model)

model_copy = copy.deepcopy(model)
model_copy.load_state_dict(model.state_dict())

print(trainer2.test(model_copy))
print(trainer1.test(model_copy))

GPU available: False, used: False
TPU available: False, using: 0 TPU cores
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
Testing:  96%|█████████▌| 150/157 [00:01<00:00, 122.24it/s]--------------------------------------------------------------------------------
TEST RESULTS
{'avg_test_loss': tensor(2.3066), 'test_loss': tensor(2.3066)}
--------------------------------------------------------------------------------
Testing: 100%|██████████| 157/157 [00:01<00:00, 119.36it/s]
  | Name    | Type   | Params
-----------------------------------
0 | layer_1 | Linear | 100 K 
1 | layer_2 | Linear | 33 K  
2 | layer_3 | Linear | 2 K   

{'avg_test_loss': 2.3066279888153076, 'test_loss': 2.3066279888153076}
Epoch 1:  92%|█████████▏| 860/939 [00:10<00:00, 79.05it/s, loss=0.103, v_num=3]
Epoch 1:  92%|█████████▏| 862/939 [00:10<00:00, 79.07it/s, loss=0.103, v_num=3]
Epoch 1:  93%|█████████▎| 873/939 [00:11<00:00, 79.35it/s, loss=0.103, v_num=3]
Epoch 1:  94%|█████████▍|

In [15]:
model_copy.state_dict()

OrderedDict([('layer_1.weight',
              tensor([[-2.9835e-02, -1.0588e-02,  2.3553e-02,  ..., -9.3428e-03,
                       -2.3920e-02, -7.5001e-03],
                      [-2.9323e-03,  3.1080e-02, -2.2315e-02,  ...,  2.2397e-02,
                       -8.4179e-04, -1.8313e-02],
                      [-6.5292e-03,  9.8052e-03,  2.7756e-02,  ..., -2.9047e-02,
                       -9.5493e-03, -5.1596e-03],
                      ...,
                      [ 3.5459e-02,  3.4426e-02,  6.8589e-03,  ..., -8.5028e-03,
                        3.2896e-02,  6.1409e-03],
                      [-7.2285e-03,  9.5989e-03,  4.0405e-03,  ...,  4.4267e-02,
                       -1.0017e-02,  1.1489e-02],
                      [ 5.1586e-03, -2.3910e-05, -2.8683e-02,  ..., -3.1008e-02,
                        2.4692e-02, -1.1857e-02]])),
             ('layer_1.bias',
              tensor([ 0.0248,  0.0043,  0.0127,  0.0349, -0.0297,  0.0080,  0.0109,  0.0394,
                       0.018