In [1]:
import torch
import torchvision
from torchvision import datasets
import torch.nn as nn
import lightning.pytorch as pl
import os
from torch import optim
import torch.nn.functional as F
from lightning.pytorch import Trainer

In [2]:
input_size = 784
hidden_size = 100
num_classes = 10
num_epochs = 20
batch_size = 100
learning_rate = 0.001

In [3]:
# define the LightningModule
class LitNeuralNet(pl.LightningModule):
    def __init__(self, input_size, hidden_size, num_classes):
        super(LitNeuralNet, self).__init__()   
        self.l1 = nn.Linear(input_size, hidden_size)
        self.relu = nn.ReLU()
        self.l2 = nn.Linear(hidden_size, num_classes)
        self.validation_step_outputs = []
    
    def forward(self, x):
        out = self.l1(x)
        out = self.relu(out)
        out = self.l2(out)
        return out

    def training_step(self, batch, batch_idx):
        # training_step defines the train loop.
        # it is independent of forward
        images, labels  = batch
        images = images.reshape(-1, 28*28)
        
        outputs = self(images)
        loss = F.cross_entropy(outputs, labels)
        accu = ((torch.argmax(outputs, dim=1) == labels).sum()/images.shape[0]).item()
        self.log("accuracy", accu, prog_bar=True)
        self.log("loss", loss, prog_bar=True)
        return {'loss':loss}
    
    
    def validation_step(self, batch, batch_idx):
        # training_step defines the train loop.
        # it is independent of forward
        images, labels  = batch
        images = images.reshape(-1, 28*28)
        
        outputs = self(images)
        loss = F.cross_entropy(outputs, labels)
        self.validation_step_outputs.append(loss)
        return {'val_loss':loss}
    
    def on_validation_epoch_end(self):
        avg_loss = torch.stack(self.validation_step_outputs).mean()
        self.log("validation_epoch_average", avg_loss)
        self.validation_step_outputs.clear()  # free memory
        return {'val_loss': avg_loss}
    
    
    
    def configure_optimizers(self):
        return torch.optim.Adam(model.parameters(), lr=learning_rate)
    
    
    def train_dataloader(self):
        train_dataset = torchvision.datasets.MNIST(root='./data', train=True, 
                                  transform=torchvision.transforms.ToTensor(), download=True)
        train_loader = torch.utils.data.DataLoader(dataset=train_dataset, batch_size=batch_size , shuffle=True , num_workers=8, persistent_workers=True)
        return train_loader
    def val_dataloader(self):
        test_dataset = torchvision.datasets.MNIST(root='./data', train=False, 
                                  transform=torchvision.transforms.ToTensor(), download=True)
        test_loader = torch.utils.data.DataLoader(dataset=test_dataset, batch_size=batch_size , shuffle=False,num_workers=1,persistent_workers=True)
        return test_loader
        

    def configure_optimizers(self):
        optimizer = optim.Adam(self.parameters(), lr=1e-3)
        return optimizer
        
        
if __name__ == '__main__':
    trainer = Trainer(max_epochs= num_epochs, fast_dev_run=False)
    model = LitNeuralNet(input_size=input_size, hidden_size=hidden_size, num_classes=num_classes)
    trainer.fit(model)



GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
c:\Users\hosei\anaconda3\lib\site-packages\lightning\pytorch\trainer\connectors\logger_connector\logger_connector.py:67: Starting from v1.9.0, `tensorboardX` has been removed as a dependency of the `lightning.pytorch` package, due to potential conflicts with other packages in the ML ecosystem. For this reason, `logger=True` will use `CSVLogger` as the default logger, unless the `tensorboard` or `tensorboardX` packages are found. Please `pip install lightning[extra]` or one of them to enable TensorBoard support by default
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name | Type   | Params
--------------------------------
0 | l1   | Linear | 78.5 K
1 | relu | ReLU   | 0     
2 | l2   | Linear | 1.0 K 
--------------------------------
79.5 K    Trainable params
0         Non-trainable params
79.5 K    Total params
0.318     Total estimat

Sanity Checking: |          | 0/? [00:00<?, ?it/s]

c:\Users\hosei\anaconda3\lib\site-packages\lightning\pytorch\trainer\connectors\data_connector.py:441: The 'val_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=15` in the `DataLoader` to improve performance.


Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

`Trainer.fit` stopped: `max_epochs=20` reached.
