In [65]:
import torch
import torch.nn as nn

from torch.utils.data import DataLoader
import torchvision.transforms as transforms
import torchvision.datasets as datasets

import lightning.pytorch as pl
import torchmetrics

In [66]:
batch = 128
input_len = 28
# set device
device = torch.device('cuda' if torch.cuda.is_available() else 'mps')
device

device(type='cuda')

In [67]:
mean_grey = .1307
std_grey = .3081
transform = transforms.Compose([transforms.ToTensor(),
                               transforms.Normalize(mean_grey,std_grey)])

In [84]:
class MnistDataLoader(pl.LightningDataModule):
    def __init__(self,root,batch_size,num_workers):
        super(MnistDataLoader,self).__init__()
        self.root = root
        self.batch_size = batch_size
        self.num_workers = num_workers
        
        
    def prepare_data(self):
        datasets.MNIST(root = self.root,train=True,download=True)
        datasets.MNIST(root = self.root,train=False,download=True)

    def setup(self, stage):
        train_dataset = datasets.MNIST(root = self.root,train=True,download=False,transform= transform)
        self.test_dataset =  datasets.MNIST(root = self.root,train=False,download=False,transform= transform)
        # Define the proportions for the split
        train_proportion = 0.8  # 80% for training
        val_proportion = 0.2  # 20% for validation

        # Calculate the sizes of training and validation sets based on the proportions
        train_size = int(train_proportion * len(train_dataset))
        val_size = len(train_dataset) - train_size
        
        # Use random_split to split the dataset
        self.train_dataset, self.val_dataset = torch.utils.data.random_split(train_dataset, [train_size, val_size])
        

    def train_dataloader(self):
        return torch.utils.data.DataLoader(dataset=self.train_dataset,batch_size=self.batch_size,shuffle=True,num_workers=self.num_workers,pin_memory=True)
    
    def val_dataloader(self):
        return torch.utils.data.DataLoader(dataset=self.val_dataset,batch_size=self.batch_size,shuffle=False,num_workers=self.num_workers,pin_memory=True)
    
    def test_dataloader(self):
        return torch.utils.data.DataLoader(dataset=self.test_dataset,batch_size=self.batch_size,shuffle=False,num_workers=self.num_workers,pin_memory=True)
   

In [85]:
root = '/home/pranav-pc/projects/applied_theories/pytorch examples/datasets/'
batch_size = 128
num_workers = 4
ds = MnistDataLoader(root, batch_size, num_workers)

In [128]:
class MNIST(pl.LightningModule):

    def __init__(self):
        super().__init__()

        self.layers = nn.Sequential(nn.Conv2d(in_channels=1,out_channels=8,kernel_size=3,stride=1,padding=1),
                 nn.BatchNorm2d(8),
                 nn.ReLU(),
                 nn.MaxPool2d(kernel_size=2),
                 nn.Conv2d(in_channels=8,out_channels=32,kernel_size=5,stride=1,padding=2),
                    nn.BatchNorm2d(32),
                 nn.ReLU(),
                  nn.MaxPool2d(kernel_size=2),
                  nn.Flatten(),
                  nn.Linear(in_features=32*7*7, out_features=600),
                  nn.Dropout(0.3),
                  nn.LeakyReLU(),
                  nn.Linear(600,10)
                 )
        self.accuracy = torchmetrics.Accuracy(task="multiclass",num_classes=10)
        self.f1_score = torchmetrics.F1Score(task="multiclass",num_classes=10)

        self.lr = 1e-3

    def forward(self,x):
        return self.layers(x)

    def _common_step(self,batch,batch_index):
        x, y = batch
        # x = torch.unsqueeze(x,1)
        
        y_hat = self(x)
        loss = nn.functional.cross_entropy(y_hat,y)
        return loss , y_hat


    def training_step(self,batch, batch_idx):
        x,y = batch
        loss , y_hat = self._common_step(batch,batch_idx)
        accuracy, f1_score = self.accuracy(y_hat,y), self.f1_score(y_hat,y)
        self.log_dict({'train_loss':loss,
                      'train_accuracy':accuracy,
                      'train_f1score':f1_score},prog_bar=True,on_step=False,on_epoch=True)
        return loss

    def validation_step(self, batch, batch_idx):
        x,y = batch
        loss, y_hat = self._common_step(batch,batch_idx)
        
        accuracy,f1_score = self.accuracy(y_hat,y), self.f1_score(y_hat,y)
        self.log_dict({'val_loss':loss,
                      'val_accuracy':accuracy,
                      'val_f1score':f1_score},prog_bar=True,on_step=False,on_epoch=True)
        return loss

    def test_step(self, batch, batch_idx):
        x,y = batch
        loss, y_hat = self._common_step(batch,batch_idx)
        
        accuracy,f1_score = self.accuracy(y_hat,y), self.f1_score(y_hat,y)
        self.log_dict({'test_loss':loss,
                      'test_accuracy':accuracy,
                      'test_f1score':f1_score},prog_bar=True,on_step=False,on_epoch=True)
        return loss

    def configure_optimizers(self):
        return torch.optim.Adam(self.parameters(),lr=self.lr)
        

In [129]:
batch = 128
num_epoch = 50

model = MNIST()

In [130]:
logger = pl.loggers.TensorBoardLogger(save_dir='./log/', name='mnist_cnn', version=0.1)
trainer = pl.Trainer(logger=logger,max_epochs=num_epoch,enable_model_summary=True, enable_progress_bar=True,callbacks=[pl.callbacks.EarlyStopping('val_loss',patience=10,verbose=True)])

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs


In [131]:
trainer.fit(model,ds)

LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name     | Type               | Params | Mode 
--------------------------------------------------------
0 | layers   | Sequential         | 954 K  | train
1 | accuracy | MulticlassAccuracy | 0      | train
2 | f1_score | MulticlassF1Score  | 0      | train
--------------------------------------------------------
954 K     Trainable params
0         Non-trainable params
954 K     Total params
3.816     Total estimated model params size (MB)


Sanity Checking: |                                        | 0/? [00:00<?, ?it/s]

Training: |                                               | 0/? [00:00<?, ?it/s]

Validation: |                                             | 0/? [00:00<?, ?it/s]

Metric val_loss improved. New best score: 0.067


Validation: |                                             | 0/? [00:00<?, ?it/s]

Metric val_loss improved by 0.017 >= min_delta = 0.0. New best score: 0.050


Validation: |                                             | 0/? [00:00<?, ?it/s]

Validation: |                                             | 0/? [00:00<?, ?it/s]

Metric val_loss improved by 0.010 >= min_delta = 0.0. New best score: 0.040


Validation: |                                             | 0/? [00:00<?, ?it/s]

Metric val_loss improved by 0.005 >= min_delta = 0.0. New best score: 0.035


Validation: |                                             | 0/? [00:00<?, ?it/s]

Metric val_loss improved by 0.002 >= min_delta = 0.0. New best score: 0.032


Validation: |                                             | 0/? [00:00<?, ?it/s]

Validation: |                                             | 0/? [00:00<?, ?it/s]

Validation: |                                             | 0/? [00:00<?, ?it/s]

Validation: |                                             | 0/? [00:00<?, ?it/s]

Validation: |                                             | 0/? [00:00<?, ?it/s]

Validation: |                                             | 0/? [00:00<?, ?it/s]

Validation: |                                             | 0/? [00:00<?, ?it/s]

Validation: |                                             | 0/? [00:00<?, ?it/s]

Validation: |                                             | 0/? [00:00<?, ?it/s]

Validation: |                                             | 0/? [00:00<?, ?it/s]

Monitored metric val_loss did not improve in the last 10 records. Best score: 0.032. Signaling Trainer to stop.


In [132]:
trainer.validate(model, ds)

LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Validation: |                                             | 0/? [00:00<?, ?it/s]

[{'val_loss': 0.013247158378362656,
  'val_accuracy': 0.996666669845581,
  'val_f1score': 0.996666669845581}]

In [133]:
trainer.test(model, ds)

LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: |                                                | 0/? [00:00<?, ?it/s]

[{'test_loss': 0.03954754024744034,
  'test_accuracy': 0.9908000230789185,
  'test_f1score': 0.9908000230789185}]