In [34]:
import os
import torch
from torch import optim, nn, utils, Tensor
from torchvision.datasets import MNIST
from torchvision.transforms import ToTensor
import pytorch_lightning as pl
from torch.utils.data import DataLoader
from torchmetrics.functional import accuracy
from torch.utils.data import random_split

In [73]:

# define any number of nn.Modules (or use your current ones)
from pytorch_lightning.utilities.types import TRAIN_DATALOADERS


encoder = nn.Sequential(nn.Linear(28 * 28, 64), nn.ReLU(), nn.Linear(64, 64))
decoder = nn.Sequential(nn.Linear(64, 64), nn.ReLU(), nn.Linear(64,10))


# define the LightningModule
class LitAutoEncoder(pl.LightningModule):
    def __init__(self, encoder, decoder):
        super().__init__()
        self.encoder = encoder
        self.decoder = decoder
        self.save_hyperparameters()
    
    def training_step(self, batch, batch_idx):
        # training_step defines the train loop.
        # it is independent of forward
        x, y = batch
        x = x.view(x.size(0), -1)
        z = self.encoder(x)
        x_hat = self.decoder(z)
        loss = nn.functional.cross_entropy(x_hat,y)
        # Logging to TensorBoard (if installed) by default
        self.log("train_loss", loss)
        return loss
    def validation_step(self,batch,batch_idx):
        x, y = batch
        x = x.view(x.size(0), -1)
        z = self.encoder(x)
        x_hat = self.decoder(z)
        loss =nn.functional.cross_entropy(x_hat,y)
        acc=accuracy(x_hat,y,task='multiclass',num_classes=10)
        log={'val_loss':loss,'val_acc':acc}
        # Logging to TensorBoard (if installed) by default
        self.log_dict(log)
    def predict_step(self,batch,batch_idx,dataloader_idx=0):
        x, y = batch
        x = x.view(x.size(0), -1)
        z = self.encoder(x)
        x_hat = self.decoder(z)
        loss =nn.functional.cross_entropy(x_hat,y)
        acc=accuracy(x_hat,y,task='multiclass',num_classes=10)
        log={'test_loss':loss,'test_acc':acc}
        return log
    def configure_optimizers(self):
        optimizer = optim.Adam(self.parameters(), lr=1e-3)
        return optimizer
    def train_dataloader(self):
        train_set=MNIST(os.getcwd(), download=True,train=True, transform=ToTensor())
        return DataLoader(train_set,batch_size=128,num_workers=2)
    def val_dataloader(self):
        train_set=MNIST(os.getcwd(), download=True,train=True, transform=ToTensor())
        _,val_set=random_split(train_set,[55000,5000])
        return DataLoader(val_set,batch_size=128,num_workers=2)
    
# init the autoencoder
autoencoder = LitAutoEncoder(encoder, decoder)

/root/miniconda3/envs/dsi_exp/lib/python3.11/site-packages/pytorch_lightning/utilities/parsing.py:199: Attribute 'encoder' is an instance of `nn.Module` and is already saved during checkpointing. It is recommended to ignore them using `self.save_hyperparameters(ignore=['encoder'])`.
/root/miniconda3/envs/dsi_exp/lib/python3.11/site-packages/pytorch_lightning/utilities/parsing.py:199: Attribute 'decoder' is an instance of `nn.Module` and is already saved during checkpointing. It is recommended to ignore them using `self.save_hyperparameters(ignore=['decoder'])`.


In [48]:
checkpoint_callback = pl.callbacks.ModelCheckpoint(dirpath=f"./checkpoint", 
                                                    monitor="val_loss", mode="min", save_top_k=1)


In [49]:
# train the model (hint: here are some helpful Trainer arguments for rapid idea iteration)
trainer = pl.Trainer(max_epochs=5,callbacks=[checkpoint_callback])
trainer.fit(autoencoder)

Trainer will use only 1 of 4 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=4)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
/root/miniconda3/envs/dsi_exp/lib/python3.11/site-packages/pytorch_lightning/callbacks/model_checkpoint.py:653: Checkpoint directory /home/tmp/code/pc/study_vlog/lightning/checkpoint exists and is not empty.
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3]

  | Name    | Type       | Params
---------------------------------------
0 | encoder | Sequential | 54.4 K
1 | decoder | Sequential | 4.8 K 
---------------------------------------
59.2 K    Trainable params
0         Non-trainable params
59.2 K    Total params
0.237     Total estimated model params size (MB)

Epoch 4: 100%|██████████| 469/469 [00:02<00:00, 157.45it/s, v_num=8]       

`Trainer.fit` stopped: `max_epochs=5` reached.


Epoch 4: 100%|██████████| 469/469 [00:02<00:00, 157.03it/s, v_num=8]


In [74]:
model=LitAutoEncoder.load_from_checkpoint("./checkpoint/epoch=4-step=2345.ckpt",encoder=encoder,decoder=decoder)
trainer.validate(model)


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3]


Validation: |          | 40/? [00:00<00:00, 148.34it/s]
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
     Validate metric           DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
         val_acc            0.9855999946594238
        val_loss            0.0422264039516449
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────


[{'val_loss': 0.0422264039516449, 'val_acc': 0.9855999946594238}]

In [67]:
model=LitAutoEncoder.load_from_checkpoint("./lightning_logs/version_0/checkpoints/epoch=0-step=100.ckpt",encoder=encoder,decoder=decoder)
trainer.validate(model)

LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3]


Validation DataLoader 0: 100%|██████████| 40/40 [00:00<00:00, 198.78it/s]
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
     Validate metric           DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
         val_acc            0.8452000021934509
        val_loss            0.5094771385192871
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────


[{'val_loss': 0.5094771385192871, 'val_acc': 0.8452000021934509}]

In [68]:
test_set=MNIST(os.getcwd(),train=False,transform=ToTensor())
test_load=DataLoader(test_set)

In [107]:
i=next(iter(test_load))
i[1].shape

torch.Size([1])

In [103]:
trainer.predict(model,zip(i[0],i[1].unsqueeze(dim=0)))

LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3]


Predicting DataLoader 0: |          | 1/? [00:00<00:00, 119.31it/s]


[{'test_loss': tensor(0.0002), 'test_acc': tensor(1.)}]

In [109]:
embeddings=model.encoder(i[0].view(1,-1))
decoder=model.decoder(embeddings)
decoder


tensor([[-17.0678,  -5.6373,  -3.2509,   4.1435, -15.9761,  -8.6385, -32.6251,
          12.6797,  -8.1633,  -2.4982]], grad_fn=<AddmmBackward0>)

In [110]:
torch.argmax(decoder)

tensor(7)

In [111]:
i[1]

tensor([7])

: 