In [1]:
import os
import torch
from torch import nn
import torch.nn.functional as F
from torchvision import transforms
from torchvision.datasets import MNIST, FashionMNIST
from torch.utils.data import DataLoader
import lightning as L

In [2]:
class Encoder(nn.Module):
    def __init__(self):
        super().__init__()
        self.l1 = nn.Sequential(nn.Linear(28 * 28, 64), nn.ReLU(), nn.Linear(64, 3))

    def forward(self, x):
        return self.l1(x)


class Decoder(nn.Module):
    def __init__(self):
        super().__init__()
        self.l1 = nn.Sequential(nn.Linear(3, 64), nn.ReLU(), nn.Linear(64, 28 * 28))

    def forward(self, x):
        return self.l1(x)

In [2]:
class LitAutoEncoder(L.LightningModule):
    def __init__(self):
        super().__init__()
        self.save_hyperparameters()
        self.example_input_array = torch.Tensor(64, 1, 28, 28)
        self.encoder = nn.Sequential(nn.Linear(28 * 28, 64), nn.ReLU(), nn.Linear(64, 3))
        self.decoder = nn.Sequential(nn.Linear(3, 64), nn.ReLU(), nn.Linear(64, 28 * 28))

    def forward(self, x):
        x = x.view(x.size(0), -1)
        return self.encoder(x)
    def predict_step(self, batch, batch_idx, dataloader_idx=0):
        x, y = batch
        x = x.view(x.size(0), -1)
        z = self.encoder(x)
        return z
        
    def training_step(self, batch, batch_idx):
        # training_step defines the train loop.
        x, y = batch
        x = x.view(x.size(0), -1)
        z = self.encoder(x)
        x_hat = self.decoder(z)
        loss = F.mse_loss(x_hat, x)
        self.log("train_loss", loss)
        return loss
    
    def validation_step(self, batch, batch_idx):
        # this is the validation loop
        x, y = batch
        x = x.view(x.size(0), -1)
        z = self.encoder(x)
        x_hat = self.decoder(z)
        val_loss = F.mse_loss(x_hat, x)
        self.log("val_loss", val_loss, prog_bar=True)
        # return val_loss
        
    def test_step(self, batch, batch_idx):
        # this is the validation loop
        x, y = batch
        x = x.view(x.size(0), -1)
        z = self.encoder(x)
        x_hat = self.decoder(z)
        test_loss = F.mse_loss(x_hat, x)
        self.log("test", test_loss, prog_bar=True)
        # return val_loss

    def configure_optimizers(self):
        optimizer = torch.optim.Adam(self.parameters(), lr=1e-3)
        return optimizer

In [15]:

dataset = FashionMNIST(os.getcwd(), download=True, transform=transforms.ToTensor())
train_loader = DataLoader(dataset, batch_size=64, num_workers=19,persistent_workers=True,pin_memory=True,
                          shuffle=True)
val_loader = DataLoader(dataset, len(dataset), num_workers=19, persistent_workers=True,pin_memory=True,
                          shuffle=False)

In [20]:
from lightning.pytorch.callbacks import DeviceStatsMonitor
from lightning.pytorch.loggers import TensorBoardLogger
from lightning.pytorch.tuner import Tuner

logger = TensorBoardLogger("tb_logs", name="my_model")
# model
autoencoder = LitAutoEncoder()
from lightning.pytorch.callbacks.early_stopping import EarlyStopping
early_stop_callback = EarlyStopping(monitor="train_loss", 
                                    min_delta=0.00, 
                                    patience=0, 
                                    verbose=True, 
                                    mode="min")

# train model
trainer = L.Trainer(max_epochs=2, default_root_dir=os.path.join(os.getcwd(), 'lightning_ckpts'),
                    callbacks=[], logger=logger, precision="16-mixed", profiler="simple")
model=autoencoder

trainer.fit(model, train_dataloaders=train_loader, val_dataloaders=val_loader)

Using 16bit Automatic Mixed Precision (AMP)
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name    | Type       | Params | In sizes  | Out sizes
---------------------------------------------------------------
0 | encoder | Sequential | 50.4 K | [64, 784] | [64, 3]  
1 | decoder | Sequential | 51.2 K | ?         | ?        
---------------------------------------------------------------
101 K     Trainable params
0         Non-trainable params
101 K     Total params
0.407     Total estimated model params size (MB)


Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

`Trainer.fit` stopped: `max_epochs=2` reached.
FIT Profiler Report

---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
|  Action                                                                                                                                                         	|  Mean duration (s)	|  Num calls      	|  Total time (s) 	|  Percentage %   	|
---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
|  Total                                                                                                                                                 

In [13]:
trainer.test(model=autoencoder, dataloaders=val_loader)

LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: |          | 0/? [00:00<?, ?it/s]

[{'test': 0.026734165847301483}]

In [11]:
%reload_ext tensorboard
%tensorboard --logdir=./lightning_ckpts/lightning_logs/version_8/

Reusing TensorBoard on port 6010 (pid 28848), started 0:02:05 ago. (Use '!kill 28848' to kill it.)

In [151]:
model = LitAutoEncoder.load_from_checkpoint(os.path.join(os.getcwd(), 
                                        'lightning_ckpts', 'lightning_logs',
                                        'version_4', 'checkpoints', 
                                        'epoch=1-step=1876.ckpt'))
# model.eval()
# model(torch.rand(1, 28, 28, device=model.device))
trainer = L.Trainer()
predictions = trainer.predict(model, val_loader)

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Predicting: |          | 0/? [00:00<?, ?it/s]

In [155]:
def __init__(self, in_channels: int, out_channels: int, hidden_size: int, patch_size: int, num_layers: int):
        super().__init__()
        self.positional_embeds = nn.Embedding(100000, hidden_size)
        self.register_buffer("positional_ids", torch.arange(100000).unsqueeze(0))
        self.patch_conv = nn.Conv2d(in_channels, hidden_size, patch_size, stride=patch_size)
        self.patch_deconv = nn.ConvTranspose2d(hidden_size, out_channels, patch_size, stride=patch_size)
        self.model = nn.TransformerEncoder(
            nn.TransformerEncoderLayer(
                hidden_size,
                hidden_size // 64,
                hidden_size * 8 // 3,
                activation=F.silu,
                batch_first=True,
                norm_first=True,
            ),
            num_layers,
        )

    def forward(self, x: torch.Tensor) -> torch.Tensor:
        hidden = self.patch_conv(x)
        n_seq = hidden.shape[2] * hidden.shape[3]
        pos_embeds = self.positional_embeds(self.positional_ids[:, :n_seq]).expand(hidden.shape[0], -1, -1)
        output = self.model(hidden.flatten(2).transpose(1, 2) + pos_embeds).transpose(-1, -2).view_as(hidden)
        output = self.patch_deconv(output)
        return output

torch.Size([60000, 3])

In [34]:
in_channels=8
out_channels=32
hidden_size=32
patch_size=(2,4)
num_layers=1
a = torch.rand((64, 8, 32, 64))

patch_conv = nn.Conv2d(in_channels, hidden_size, patch_size, stride=patch_size)
a=patch_conv(a)
# torch.Size([64, 32, 16, 16])
print(a.shape)
n_seq = a.shape[2] * a.shape[3]
positional_embeds = nn.Embedding(100000, hidden_size)
# register_buffer("positional_ids", torch.arange(100000).unsqueeze(0))
positional_ids = torch.arange(100000).unsqueeze(0).detach()
pos_embeds = positional_embeds(positional_ids[:, :n_seq]).expand(a.shape[0], -1, -1)
print(pos_embeds.shape)
print(a.flatten(2).shape)
print(a.flatten(2).transpose(1, 2).shape)

torch.Size([64, 32, 16, 16])
torch.Size([64, 256, 32])
torch.Size([64, 32, 256])
torch.Size([64, 256, 32])
