**Basic Dataloader**

In [5]:
import pytorch_lightning as pl
from torchvision import transforms, datasets
from torch.utils.data import DataLoader

class basicLoader(pl.LightningDataModule):
    def __init__(self, batch_size=32, num_workers=4):
        super().__init__()
        self.batch_size=batch_size
        self.num_workers=num_workers
    
    def setup(self, stage=None):
        self.dataset_train = datasets.FashionMNIST(root='root', train=True, download=True, transform=transforms.ToTensor())
        self.dataset_test = datasets.FashionMNIST(root='root', train=False, download=True, transform=transforms.ToTensor())

def train_dataloader(self):
    return DataLoader(dataset=self.dataset_train, batch_size=self.batch_size, shuffle=True, drop_last=True, num_workers=self.num_workers)

def test_dataloader(self):
    return DataLoader(dataset=self.dataset_test, batch_size=self.batch_size, shuffle=False, drop_last=True, num_workers=self.num_workers)


**Custom Loader**

In [None]:
import pytorch_lightning as pl
from torchvision import transforms, datasets
from torch.utils.data import DataLoader, random_split, Subset
import torch

class customLoader(pl.LightningDataModule):
    def __init__(self, batch_size=32, num_workers=4, root_dir="animals/animals/animals/"):
        super().__init__()
        self.batch_size=batch_size
        self.num_workers=num_workers
        self.root_dir = root_dir
    
    def setup(self, stage=None):
        self.train_transform = transforms.Compose([
            transforms.Resize(300),
            transforms.CenterCrop(224),
            transforms.ToTensor(),
            transforms.RandomVerticalFlip(0.5),
            transforms.Normalize(mean=[0.485, 0.456, 0.406],std=[0.229, 0.224, 0.225])
        ])
        self.test_transform = transforms.Compose([
            transforms.Resize(300),
            transforms.CenterCrop(224),
            transforms.ToTensor(),
            transforms.Normalize(mean=[0.485, 0.456, 0.406],std=[0.229, 0.224, 0.225])
        ])

        full_dataset = datasets.ImageFolder(root=self.root_dir, transform=None)
        total_len = len(full_dataset)
        val_len = int(total_len * 0.2)
        train_len = total_len - val_len

        subset_train_idx, subset_val_idx = random_split(
            full_dataset, [train_len, val_len],
            generator=torch.Generator().manual_seed(42)
        )
        train_indices = subset_train_idx.indices
        val_indices = subset_val_idx.indices

        train_folder   = datasets.ImageFolder(root=self.root_dir, transform=self.train_transform)
        test_folder    = datasets.ImageFolder(root=self.root_dir, transform=self.test_transform)

        self.train_dataset = Subset(train_folder, train_indices)
        self.val_dataset   = Subset(test_folder,  val_indices)


    def train_dataloader(self):
        return DataLoader(dataset=self.train_dataset, batch_size=self.batch_size, shuffle=True, drop_last=True, num_workers=self.num_workers)

    def val_dataloader(self):
        return DataLoader(dataset=self.val_dataset, batch_size=self.batch_size, shuffle=False, drop_last=True, num_workers=self.num_workers)

**Basic Net**

In [None]:
from torch import optim, nn
import pytorch_lightning as pl
import torch.nn.functional as F
import torchmetrics

class basicEncoder(pl.LightningModule):
    def __init__(self, num_classes=90):
        super().__init__()
        self.num_classes=num_classes
        in_channels = 3
        out_channels = [64,128,256,512]
        kernel_size = 3
        stride = 2
        padding = 1

        conv_output_size = out_channels[-1] * (224 // (2**len(out_channels)))**2

        self.encoder = nn.Sequential(
            nn.Conv2d(in_channels=in_channels, out_channels=out_channels[0], kernel_size=kernel_size, stride=stride, padding=padding),
            nn.BatchNorm2d(out_channels[0]),
            nn.ReLU(),
            nn.Conv2d(in_channels=out_channels[0], out_channels=out_channels[1], kernel_size=kernel_size, stride=stride, padding=padding),
            nn.BatchNorm2d(out_channels[1]),
            nn.ReLU(),
            nn.Conv2d(in_channels=out_channels[1], out_channels=out_channels[2], kernel_size=kernel_size, stride=stride, padding=padding),
            nn.BatchNorm2d(out_channels[2]),
            nn.ReLU(),
            nn.Conv2d(in_channels=out_channels[2], out_channels=out_channels[3], kernel_size=kernel_size, stride=stride, padding=padding),
            nn.BatchNorm2d(out_channels[3]),
            nn.ReLU(),
        )

        self.fc1 = nn.Linear(conv_output_size,512)
        self.fc2 = nn.Linear(512,256)
        self.classifier = nn.Linear(256,self.num_classes)
        
        self.relu = nn.ReLU()
        self.flatten = nn.Flatten()

        self.loss_fn = nn.CrossEntropyLoss()

        self.training_acc = torchmetrics.Accuracy(task='multiclass', num_classes=self.num_classes)
        self.val_acc = torchmetrics.Accuracy(task='multiclass', num_classes=self.num_classes)

        self.training_f1 = torchmetrics.F1Score(task='multiclass', num_classes=self.num_classes)
        self.val_f1 = torchmetrics.F1Score(task='multiclass', num_classes=self.num_classes)

    def configure_optimizers(self):
        return optim.AdamW(self.parameters(), lr=1e-4)
    
    def forward(self, x):
        x = self.encoder(x)
        x = self.flatten(x)
        x = self.relu(self.fc1(x))
        x = self.relu(self.fc2(x))
        x = self.classifier(x)
        return x
    
    def training_step(self, training_batch, batch_idx):
        x, y = training_batch
        y_hat = self(x.float())
        loss = self.loss_fn(y_hat, y)
        y_hat = F.softmax(y_hat, dim=1)

        self.log('training_loss', loss, on_epoch=True, on_step=True)
        self.training_acc.update(y_hat, y)
        self.log('training_acc', self.training_acc, on_epoch=True, on_step=False)
        self.training_f1.update(y_hat, y)
        self.log('training_f1', self.training_f1, on_epoch=True, on_step=False)
        return loss
    
    def validation_step(self, val_batch, batch_idx):
        x, y = val_batch
        y_hat = self(x.float())
        loss = self.loss_fn(y_hat, y)
        y_hat = F.softmax(y_hat, dim=1)

        self.log('val_loss', loss, on_epoch=True, on_step=True)
        self.val_acc.update(y_hat, y)
        self.log('val_acc', self.val_acc, on_epoch=True, on_step=False)
        self.val_f1.update(y_hat, y)
        self.log('val_f1', self.val_f1, on_epoch=True, on_step=False)
        return loss


In [None]:
from torch import optim, nn
import pytorch_lightning as pl

class AutoEncoder(pl.LightningModule):
    def __init__(self, latent_dim=128):
        super().__init__()
        in_channels = 3
        kernel_size = 3
        stride = 2
        padding = 1
        output_padding = 1

        self.encoder = nn.Sequential(
            nn.Conv2d(in_channels,  64, kernel_size, stride, padding),
            nn.BatchNorm2d(64), nn.ReLU(inplace=True),

            nn.Conv2d(64, 128, kernel_size, stride, padding),
            nn.BatchNorm2d(128), nn.ReLU(inplace=True),

            nn.Conv2d(128, 256, kernel_size, stride, padding),
            nn.BatchNorm2d(256), nn.ReLU(inplace=True),

            nn.Conv2d(256, 512, kernel_size, stride, padding),
            nn.BatchNorm2d(512), nn.ReLU(inplace=True),
        )
        conv_output_size = 512 * 14 * 14

        self.enc_latent = nn.Sequential(
            nn.Flatten(),
            nn.Linear(conv_output_size, 512),
            nn.ReLU(inplace=True),
            nn.Linear(512, 256),
            nn.ReLU(inplace=True),
            nn.Linear(256, latent_dim),
        )

        self.dec_latent = nn.Sequential(
            nn.Linear(latent_dim, 256),
            nn.ReLU(inplace=True),
            nn.Linear(256, 512),
            nn.ReLU(inplace=True),
            nn.Linear(512, 512 * 14 * 14),
            nn.ReLU(inplace=True),
        )


        self.decoder = nn.Sequential(
            nn.ConvTranspose2d(512, 256, kernel_size, stride, padding, output_padding),
            nn.BatchNorm2d(256), nn.ReLU(inplace=True),

            nn.ConvTranspose2d(256, 128, kernel_size, stride, padding, output_padding),
            nn.BatchNorm2d(128), nn.ReLU(inplace=True),

            nn.ConvTranspose2d(128,  64, kernel_size, stride, padding, output_padding),
            nn.BatchNorm2d(64), nn.ReLU(inplace=True),

            nn.ConvTranspose2d(64,   3,  kernel_size, stride, padding, output_padding),
            nn.Sigmoid(),
        )

        self.loss_fn = nn.MSELoss()

    def forward(self, x):
        x = self.encoder(x)
        x = self.enc_latent(x)
        x = self.dec_latent(x)
        x = x.view(-1, 512, 14, 14)
        x = self.decoder(x)
        return x

    def configure_optimizers(self):
        return optim.AdamW(self.parameters(), lr=1e-4)

    def training_step(self, batch, batch_idx):
        x, _ = batch
        x_hat = self(x)
        loss = self.loss_fn(x_hat, x)
        self.log('train_loss', loss, prog_bar=True)
        return loss

    def validation_step(self, batch, batch_idx):
        x, _ = batch
        x_hat = self(x)
        loss = self.loss_fn(x_hat, x)
        self.log('val_loss', loss, prog_bar=True)
        return loss


**Trening**

In [None]:
from pytorch_lightning.loggers import TensorBoardLogger
model = basicEncoder()
logger = TensorBoardLogger("lightning_logs", name="model")
trainer = pl.Trainer(logger = logger, max_epochs = 15, log_every_n_steps =1)
trainer.fit(model, dm)

In [None]:
%load_ext tensorboard
%tensorboard --logdir lightning_logs

**Visualize reconstructions**

In [None]:
import torch
import matplotlib.pyplot as plt

def visualize_reconstructions(model, dataloader, device='cpu', num_images=8):
    model.eval()
    model.to(device)
    
    with torch.no_grad():
        for batch in dataloader:
            x, _ = batch
            x = x.to(device)
            x_hat = model(x)
            x = x.cpu()
            x_hat = x_hat.cpu()
            break
    
    n = min(num_images, x.size(0))
    plt.figure(figsize=(n * 2, 4))
    
    for i in range(n):
        # Oryginał
        ax = plt.subplot(2, n, i + 1)
        img = x[i].permute(1, 2, 0).numpy()
        plt.imshow(img)
        ax.axis('off')
        if i == 0:
            ax.set_title('Original')
        
        # Rekonstrukcja
        ax = plt.subplot(2, n, n + i + 1)
        recon = x_hat[i].permute(1, 2, 0).numpy()
        plt.imshow(recon)
        ax.axis('off')
        if i == 0:
            ax.set_title('Reconstruction')
    
    plt.tight_layout()
    plt.show()

dataset = customLoader()
dataset.setup()
visualize_reconstructions(model, dataset.val_dataloader(), device='cuda', num_images=8)

**Model pretrenowany**

In [None]:
import torchvision.models as models
from torch import optim, nn
import pytorch_lightning as pl
import torch.nn.functional as F

model = models.efficientnet_b4(pretrained=True)

class SneakerModel(pl.LightningModule):
  def __init__(self, num_classes=50):
    super().__init__()
    layers = list(model.children())
    self.backbone = nn.Sequential(*layers[:-1])

    self.avgpool = nn.AdaptiveAvgPool2d(output_size=1)
    self.dropout = nn.Dropout(p=0.4)
    self.fc1 = nn.LazyLinear(500)
    self.fc2 = nn.Linear(500,num_classes)


    self.loss_fn = nn.CrossEntropyLoss()
    self.relu = nn.ReLU()

    self.train_acc = torchmetrics.Accuracy(task="multiclass", num_classes=num_classes)
    self.val_acc = torchmetrics.Accuracy(task='multiclass', num_classes=num_classes)
    self.train_macro_f1 = torchmetrics.F1Score(num_classes=num_classes, task="multiclass", average='macro')
    self.val_macro_f1 = torchmetrics.F1Score(num_classes=num_classes, task="multiclass", average='macro')

  def forward(self, x):
    self.backbone.eval()
    with torch.no_grad():
      x = self.backbone(x)
    x = self.avgpool(x).flatten(1)
    x = self.dropout(x)
    x = self.fc1(x)
    x = self.relu(x)
    x = self.fc2(x)
    return x

  def configure_optimizers(self):
    return optim.AdamW(self.parameters(), lr=1e-4)

  def training_step(self, train_batch, batch_idx):
    inputs, labels = train_batch
    y_hat = self(inputs.float())
    loss = self.loss_fn(y_hat, labels)
    y_hat = F.softmax(y_hat, dim=1)

    self.log('train_loss', loss, on_step=True, on_epoch=True)
    self.train_acc(y_hat, labels)
    self.log('train_acc', self.train_acc, on_epoch=True, on_step=False)
    self.train_macro_f1(y_hat, labels)
    self.log('train_macro_f1', self.train_macro_f1, on_epoch=True, on_step=False)

    return loss

  def validation_step(self, val_batch, val_idx):
    inputs, labels = val_batch
    y_hat = self(inputs.float())
    loss = self.loss_fn(y_hat, labels)
    y_hat = F.softmax(y_hat, dim=1)

    self.log('val_loss', loss, on_step=True, on_epoch=True)
    self.val_acc(y_hat, labels)
    self.log('val_acc', self.val_acc, on_epoch=True, on_step=False)
    self.val_macro_f1(y_hat, labels)
    self.log('val_macro_f1', self.val_macro_f1, on_epoch=True, on_step=False)

    

**Wizualizacja sieci**

In [None]:
import graphviz
from torchview import draw_graph

graphviz.set_jupyter_format('png')
model_graph = draw_graph(
    model,
    input_size=(BATCH_SIZE, IMG_CH, IMG_SIZE, IMG_SIZE),
    device='meta',
    expand_nested=True
)
model_graph.resize_graph(scale=1.5)
model_graph.visual_graph

**Transformer z Macierzą pomyłek**

In [None]:
import pytorch_lightning as pl
from torch import nn
from torch import optim
import torchmetrics
from transformers import SwinModel
import matplotlib.pyplot as plt
import seaborn as sns


class SneakerModel(pl.LightningModule):
    def __init__(self, num_classes=50, lr=1e-4):
        super().__init__()
        self.num_classes = num_classes
        self.lr = lr

        self.backbone = SwinModel.from_pretrained(
            "microsoft/swin-tiny-patch4-window7-224",
            add_pooling_layer=True
        )
        hidden_size = self.backbone.config.hidden_size

        for p in self.backbone.parameters():
            p.requires_grad = False


        self.fc1 = nn.Linear(hidden_size, 500)
        self.relu = nn.ReLU()
        self.fc2 = nn.Linear(500, self.num_classes)

        self.loss_fn = nn.CrossEntropyLoss()

        self.train_acc = torchmetrics.Accuracy(task="multiclass", num_classes=self.num_classes)
        self.val_acc   = torchmetrics.Accuracy(task="multiclass", num_classes=self.num_classes)
        self.test_acc  = torchmetrics.Accuracy(task="multiclass", num_classes=self.num_classes)

        self.train_f1 = torchmetrics.F1Score(task="multiclass", num_classes=self.num_classes)
        self.val_f1   = torchmetrics.F1Score(task="multiclass", num_classes=self.num_classes)
        self.test_f1  = torchmetrics.F1Score(task="multiclass", num_classes=self.num_classes)

        self.train_cm = torchmetrics.ConfusionMatrix(task="multiclass", num_classes=self.num_classes)
        self.val_cm   = torchmetrics.ConfusionMatrix(task="multiclass", num_classes=self.num_classes)
        self.test_cm  = torchmetrics.ConfusionMatrix(task="multiclass", num_classes=self.num_classes)

    def configure_optimizers(self):
        return optim.AdamW(self.parameters(), lr=self.lr)

    def forward(self, x):
        outputs = self.backbone(pixel_values=x)
        x = outputs.last_hidden_state[:, 0, :]
        x = self.relu(self.fc1(x))
        logits = self.fc2(x)
        return logits

    def training_step(self, batch, batch_idx):
        x, y = batch
        logits = self(x.float())
        loss = self.loss_fn(logits, y)

        probs = nn.functional.softmax(logits, dim=1)

        self.log('train_loss', loss, on_step=False, on_epoch=True)
        self.train_acc.update(probs, y)
        self.log('train_acc', self.train_acc, on_step=False, on_epoch=True)
        self.train_f1.update(probs, y)
        self.log('train_f1', self.train_f1, on_step=False, on_epoch=True)
        self.train_cm.update(probs, y)

        return loss

    def validation_step(self, batch, batch_idx):
        x, y = batch
        logits = self(x.float())
        loss = self.loss_fn(logits, y)

        probs = nn.functional.softmax(logits, dim=1)
        self.log('val_loss', loss, on_step=False, on_epoch=True)
        self.val_acc.update(probs, y)
        self.log('val_acc', self.val_acc, on_step=False, on_epoch=True)
        self.val_f1.update(probs, y)
        self.log('val_f1', self.val_f1, on_step=False, on_epoch=True)
        self.val_cm.update(probs, y)

        return loss

    def test_step(self, batch, batch_idx):
        x, y = batch
        logits = self(x.float())
        loss = self.loss_fn(logits, y)

        probs = nn.functional.softmax(logits, dim=1)
        self.log('test_loss', loss, on_step=False, on_epoch=True)
        self.test_acc.update(probs, y)
        self.log('test_acc', self.test_acc, on_step=False, on_epoch=True)
        self.test_f1.update(probs, y)
        self.log('test_f1', self.test_f1, on_step=False, on_epoch=True)
        self.test_cm.update(probs, y)

        return loss

    def on_validation_epoch_end(self):
        self.log("val_acc", self.val_acc.compute(), prog_bar=True)
        self.log("val_f1",  self.val_f1.compute(),  prog_bar=True)

        cm = self.val_cm.compute().cpu().numpy()
        fig, ax = plt.subplots(figsize=(8, 6))
        sns.heatmap(
            cm, annot=True, fmt="d", cmap="Blues", cbar=False,
            xticklabels=[str(i) for i in range(self.num_classes)],
            yticklabels=[str(i) for i in range(self.num_classes)],
            ax=ax
        )
        ax.set_ylabel("True")
        ax.set_xlabel("Predicted")
        ax.set_title(f"Val Confusion Matrix (epoch {self.current_epoch})")

        self.logger.experiment.add_figure("val_confusion_matrix", fig, self.current_epoch)
        plt.close(fig)

        self.val_acc.reset()
        self.val_f1.reset()
        self.val_cm.reset()


**Skipped connections U-net**

In [None]:
import pytorch_lightning as pl
from torch import nn
from torch import optim
import torch.nn.functional as F
import torch

class unet(pl.LightningModule):
    def __init__(self, num_class=90):
        super().__init__()

        self.e11 = nn.Conv2d(3, 64, kernel_size=3, padding=1)
        self.e12 = nn.Conv2d(64, 64, kernel_size=3, padding=1) 
        self.pool1 = nn.MaxPool2d(kernel_size=2, stride=2) 

        self.e21 = nn.Conv2d(64, 128, kernel_size=3, padding=1) 
        self.e22 = nn.Conv2d(128, 128, kernel_size=3, padding=1) 
        self.pool2 = nn.MaxPool2d(kernel_size=2, stride=2)

        self.e31 = nn.Conv2d(128, 256, kernel_size=3, padding=1)
        self.e32 = nn.Conv2d(256, 256, kernel_size=3, padding=1)
        self.pool3 = nn.MaxPool2d(kernel_size=2, stride=2)

        self.e41 = nn.Conv2d(256, 512, kernel_size=3, padding=1)
        self.e42 = nn.Conv2d(512, 512, kernel_size=3, padding=1) 
        self.pool4 = nn.MaxPool2d(kernel_size=2, stride=2) 

        self.e51 = nn.Conv2d(512, 1024, kernel_size=3, padding=1) 
        self.e52 = nn.Conv2d(1024, 1024, kernel_size=3, padding=1) 

        self.upconv1 = nn.ConvTranspose2d(1024, 512, kernel_size=2, stride=2)
        self.d11 = nn.Conv2d(1024, 512, kernel_size=3, padding=1)
        self.d12 = nn.Conv2d(512, 512, kernel_size=3, padding=1)

        self.upconv2 = nn.ConvTranspose2d(512, 256, kernel_size=2, stride=2)
        self.d21 = nn.Conv2d(512, 256, kernel_size=3, padding=1)
        self.d22 = nn.Conv2d(256, 256, kernel_size=3, padding=1)

        self.upconv3 = nn.ConvTranspose2d(256, 128, kernel_size=2, stride=2)
        self.d31 = nn.Conv2d(256, 128, kernel_size=3, padding=1)
        self.d32 = nn.Conv2d(128, 128, kernel_size=3, padding=1)

        self.upconv4 = nn.ConvTranspose2d(128, 64, kernel_size=2, stride=2)
        self.d41 = nn.Conv2d(128, 64, kernel_size=3, padding=1)
        self.d42 = nn.Conv2d(64, 64, kernel_size=3, padding=1)

        self.outconv = nn.Conv2d(64, num_class, kernel_size=1)

        self.loss_fn = nn.MSELoss()

    def configure_optimizers(self):
        return optim.AdamW(self.parameters(), lr=1e-4)
    
    def forward(self, x):
        # Encoder
        xe11 = F.relu(self.e11(x))
        xe12 = F.relu(self.e12(xe11))
        xp1 = self.pool1(xe12)

        xe21 = F.relu(self.e21(xp1))
        xe22 = F.relu(self.e22(xe21))
        xp2 = self.pool2(xe22)

        xe31 = F.relu(self.e31(xp2))
        xe32 = F.relu(self.e32(xe31))
        xp3 = self.pool3(xe32)

        xe41 = F.relu(self.e41(xp3))
        xe42 = F.relu(self.e42(xe41))
        xp4 = self.pool4(xe42)

        xe51 = F.relu(self.e51(xp4))
        xe52 = F.relu(self.e52(xe51))
        
        # Decoder
        xu1 = self.upconv1(xe52)
        xu11 = torch.cat([xu1, xe42], dim=1)
        xd11 = F.relu(self.d11(xu11))
        xd12 = F.relu(self.d12(xd11))

        xu2 = self.upconv2(xd12)
        xu22 = torch.cat([xu2, xe32], dim=1)
        xd21 = F.relu(self.d21(xu22))
        xd22 = F.relu(self.d22(xd21))

        xu3 = self.upconv3(xd22)
        xu33 = torch.cat([xu3, xe22], dim=1)
        xd31 = F.relu(self.d31(xu33))
        xd32 = F.relu(self.d32(xd31))

        xu4 = self.upconv4(xd32)
        xu44 = torch.cat([xu4, xe12], dim=1)
        xd41 = F.relu(self.d41(xu44))
        xd42 = F.relu(self.d42(xd41))

        # Output layer
        out = self.outconv(xd42)

        return out
    
    def training_step(self, train_batch, batch_idx):
        inputs, labels = train_batch
        y_hat = self(inputs.float())
        loss = self.loss_fn(y_hat, labels)
        y_hat = F.softmax(y_hat, dim=1)
        return loss

In [None]:
import graphviz
from torchview import draw_graph
model = unet()

graphviz.set_jupyter_format('png')
model_graph = draw_graph(
    model,
    input_size=(1, 3, 224, 224),
    device='cpu'
)
model_graph.resize_graph(scale=1.5)
model_graph.visual_graph