In [1]:
import os
import logging
from pathlib import Path
import click

import torch
from model import Classifier, Classifier2
import pytorch_lightning as pl
import sys

In [2]:
input_filepath = "/Users/kristianernst/Work/Learning/MLOps/DTU/S4/exercise/crpt_mnist/data/processed"
output_filepath = "/Users/kristianernst/Work/Learning/MLOps/DTU/S4/exercise/crpt_mnist/models"

In [12]:
from torch.utils.data import TensorDataset, DataLoader
import numpy as np
import sys
from torchvision import transforms

class MNISTDataModule(pl.LightningDataModule):
    def __init__(self, data_dir: str = "/Users/kristianernst/Work/Learning/MLOps/DTU/S4/exercise/crpt_mnist/data/raw", batch_size: int = 64):
        super().__init__()
        self.data_dir = data_dir
        self.batch_size = batch_size
    
    def prepare_data(self):
        # Load the data
        tr0 = np.load(self.data_dir + '/train_0.npz')
        tr1 = np.load(self.data_dir + '/train_1.npz')
        tr2 = np.load(self.data_dir + '/train_2.npz')
        tr3 = np.load(self.data_dir + '/train_3.npz')
        tr4 = np.load(self.data_dir + '/train_4.npz')
        test = np.load(self.data_dir + '/test.npz')
        
        # select and concatenate data
        x_train = np.concatenate((tr0['images'], tr1['images'], tr2['images'], tr3['images'], tr4['images']))
        y_train = np.concatenate((tr0['labels'], tr1['labels'], tr2['labels'], tr3['labels'], tr4['labels']))
        x_test = test['images']
        y_test = test['labels']
        
        # convert to tensor from numpy
        x_train = torch.from_numpy(x_train).float()
        y_train = torch.from_numpy(y_train).int()
        x_test = torch.from_numpy(x_test).float()
        y_test = torch.from_numpy(y_test).int()
        
        #  # normalize dataset, 0 mean and 1 std
        # train_mean = torch.mean(x_train, dim=(0, 2, 3))  # Calculate mean across channel dimension
        # train_std = torch.std(x_train, dim=(0, 2, 3))    # Calculate std across channel dimension
        # transform = transforms.Compose([
        #     transforms.Normalize(mean=train_mean, std=train_std)
        # ])
        
        # x_train = transform(x_train)
        # x_test = transform(x_test)
        
        # create dataset
    
        self.train_dataset = TensorDataset(x_train, y_train)
        self.test_dataset = TensorDataset(x_test, y_test)
     
    def train_dataloader(self):
        return  DataLoader(self.train_dataset, batch_size=self.batch_size, shuffle=True)

    def test_dataloader(self):
        return DataLoader(self.test_dataset, batch_size=self.batch_size, shuffle=True)

In [4]:
cmnist = MNISTDataModule()

In [5]:

class LitProgressBar(pl.callbacks.ProgressBar):

    def __init__(self):
        super().__init__()  # don't forget this :)
        self.enable = True

    def disable(self):
        self.enable = False

    def on_train_batch_end(self, trainer, pl_module, outputs, batch, batch_idx):
        super().on_train_batch_end(trainer, pl_module, outputs, batch, batch_idx)  # don't forget this :)
        percent = (batch_idx / self.total_train_batches) * 100
        sys.stdout.flush()
        sys.stdout.write(f'{percent:.01f} percent complete \r')

pl.seed_everything(42)
trainloader = torch.load(input_filepath + '/trainloader.pt')
testloader = torch.load(input_filepath + '/testloader.pt')
clf = Classifier()

# set callbacks
checkpoint_clb = pl.callbacks.ModelCheckpoint(
    dirpath="/Users/kristianernst/Work/Learning/MLOps/DTU/S4/exercise/crpt_mnist/models/",
    filename='best-checkpoint',
    save_top_k=1,
    auto_insert_metric_name=True,
    verbose=True,
    monitor = 'train_loss'
)

Global seed set to 42


In [8]:
bar_clb = LitProgressBar()


trainer = pl.Trainer(callbacks=[checkpoint_clb, summary_clb, bar_clb], max_epochs=4)
trainer.fit(clf, cmnist)

Trainer already configured with model summary callbacks: [<class 'pytorch_lightning.callbacks.model_summary.ModelSummary'>]. Skipping setting a default `ModelSummary` callback.
GPU available: True (mps), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
  rank_zero_warn(f"Checkpoint directory {dirpath} exists and is not empty.")

   | Name         | Type             | Params
---------------------------------------------------
0  | backbone     | Sequential       | 24.9 K
1  | backbone.0   | Conv2d           | 640   
2  | backbone.1   | LeakyReLU        | 0     
3  | backbone.2   | Conv2d           | 18.5 K
4  | backbone.3   | LeakyReLU        | 0     
5  | backbone.4   | Conv2d           | 4.6 K 
6  | backbone.5   | LeakyReLU        | 0     
7  | backbone.6   | Conv2d           | 1.2 K 
8  | backbone.7   | LeakyReLU        | 0     
9  | classifier   | Sequential       | 804 K 
10 | classifier.0 | Flatten         

99.5 percent complete 

Epoch 0, global step 391: 'train_loss' reached 0.06245 (best 0.06245), saving model to '/Users/kristianernst/Work/Learning/MLOps/DTU/S4/exercise/crpt_mnist/models/best-checkpoint.ckpt' as top 1


99.5 percent complete 

Epoch 1, global step 782: 'train_loss' reached 0.04368 (best 0.04368), saving model to '/Users/kristianernst/Work/Learning/MLOps/DTU/S4/exercise/crpt_mnist/models/best-checkpoint.ckpt' as top 1


99.5 percent complete 

Epoch 2, global step 1173: 'train_loss' reached 0.03718 (best 0.03718), saving model to '/Users/kristianernst/Work/Learning/MLOps/DTU/S4/exercise/crpt_mnist/models/best-checkpoint.ckpt' as top 1


99.5 percent complete 

Epoch 3, global step 1564: 'train_loss' reached 0.02687 (best 0.02687), saving model to '/Users/kristianernst/Work/Learning/MLOps/DTU/S4/exercise/crpt_mnist/models/best-checkpoint.ckpt' as top 1


99.7 percent complete 

`Trainer.fit` stopped: `max_epochs=4` reached.


In [13]:
import logging

In [9]:
trainer.logged_metrics

{'train_loss_step': tensor(0.0204),
 'train_acc_step': tensor(1.),
 'train_loss_epoch': tensor(0.0269),
 'train_acc_epoch': tensor(0.9907)}

In [11]:
trainer._logger_connector.logged_metrics

{'train_loss_step': tensor(0.0204),
 'train_acc_step': tensor(1.),
 'train_loss_epoch': tensor(0.0269),
 'train_acc_epoch': tensor(0.9907)}

In [15]:
# print training loss
logger = logging.getLogger(__name__)
logger.info("Training loss:")


AttributeError: 'ExperimentWriter' object has no attribute 'log_table'

In [None]:
# training:
clf = Classifier2()



In [None]:
# train loop
num_epochs = 10
device = "cpu"
model = Classifier2()
criterion = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(clf.parameters(), lr=0.001)

train_loss_log = []
train_accuracy_log = []
test_loss_log = []
test_accuracy_log = []

test_acc = 0

for epoch in range(num_epochs):
        running_loss = 0
        running_accuracy = 0
        test_loss = 0
        test_accuracy = 0

        model.train()

        # iter over data
        for images, labels in trainloader:
            images, labels = images.to(device), labels.to(device)
            print(images.shape)
            images = images.unsqueeze(1)
            print(images.shape)
            optimizer.zero_grad()

            _, ps = model(images)
            loss = criterion(ps, labels)
            loss.backward()
            optimizer.step()
            running_loss += loss.item()

            # calculate accuracy
            top_p, top_class = ps.topk(1, dim=1)
            equals = top_class == labels.view(*top_class.shape)
            running_accuracy += torch.mean(equals.type(torch.FloatTensor))

        else:
            print(f"Training loss: {running_loss/len(trainloader)}")
            print(f"Training accuracy: {running_accuracy/len(trainloader)}")
            train_loss_log.append([epoch, running_loss / len(trainloader)])
            train_accuracy_log.append([epoch, running_accuracy / len(trainloader)])

            # evaluate
            model.eval()
            with torch.inference_mode():
                # iter over data
                for images, labels in testloader:
                    images, labels = images.to(device), labels.to(device)
                    print(images.shape)
                    _, ps = model(images)
                    loss = criterion(ps, labels)
                    test_loss += loss.item()

                    # calculate accuracy
                    top_p, top_class = ps.topk(1, dim=1)
                    equals = top_class == labels.view(*top_class.shape)
                    test_accuracy += torch.mean(equals.type(torch.FloatTensor))

                else:
                    print(f"Test loss: {test_loss/len(testloader)}")
                    print(f"Test accuracy: {test_accuracy/len(testloader)}")

                    test_loss_log.append([epoch, test_loss / len(testloader)])
                    test_accuracy_log.append([epoch, test_accuracy / len(testloader)])

                    if test_accuracy / len(testloader) > test_acc:
                        torch.save(model.state_dict(), output_filepath + "/model.pt")
                        print("Model saved")
                        test_acc = test_accuracy / len(testloader)