In [None]:
#!pip install https://github.com/google-research/perceptual-quality/archive/master.zip

Collecting https://github.com/google-research/perceptual-quality/archive/master.zip
  Downloading https://github.com/google-research/perceptual-quality/archive/master.zip
     - 0 bytes ? 0:00:00
     \ 45.3 kB 466.5 kB/s 0:00:00
  Preparing metadata (setup.py): started
  Preparing metadata (setup.py): finished with status 'done'
Collecting tensorflow_probability>=0.9 (from perceptual-quality==0.1.dev0)
  Downloading tensorflow_probability-0.25.0-py2.py3-none-any.whl.metadata (13 kB)
Collecting cloudpickle>=1.3 (from tensorflow_probability>=0.9->perceptual-quality==0.1.dev0)
  Downloading cloudpickle-3.1.1-py3-none-any.whl.metadata (7.1 kB)
Collecting dm-tree (from tensorflow_probability>=0.9->perceptual-quality==0.1.dev0)
  Downloading dm_tree-0.1.9-cp311-cp311-win_amd64.whl.metadata (2.5 kB)
Downloading tensorflow_probability-0.25.0-py2.py3-none-any.whl (7.0 MB)
   ---------------------------------------- 0.0/7.0 MB ? eta -:--:--
   --- ------------------------------------ 0.5/7.0 MB


[notice] A new release of pip is available: 24.3.1 -> 25.0.1
[notice] To update, run: python.exe -m pip install --upgrade pip


In [1]:
from LoadingDefault import LoadData

from torch import nn
import torch.optim as optim
from torchinfo import summary

import torch
import torch.nn.functional as F

from perceptual_quality.nlpd import nlpd
import numpy as np



In [2]:
dataloader = LoadData(limit=100, batch_size=8)



In [4]:
class EntropyLimitedAutoencoder(nn.Module):
    def __init__(self):
        super().__init__()

        self.centers = torch.Tensor([-1, 1])
        self.sigma = 10
        
        # Encoder
        self.encoder = nn.Sequential(
            nn.Conv2d(in_channels=1, out_channels=128, kernel_size=3, stride=2, padding=1),  # Reduce tamaño a 128x128
            nn.BatchNorm2d(128),
            nn.LeakyReLU(0.2),
            nn.Conv2d(in_channels=128, out_channels=128, kernel_size=3, stride=2, padding=1),
            nn.BatchNorm2d(128),
            nn.LeakyReLU(0.2),
            nn.Conv2d(in_channels=128, out_channels=128, kernel_size=3, stride=2, padding=1),
            nn.BatchNorm2d(128),
            nn.LeakyReLU(0.2),
            nn.Conv2d(in_channels=128, out_channels=128, kernel_size=3, stride=2, padding=1),  # Reduce tamaño a 128x128
            nn.BatchNorm2d(128),
            nn.Tanh()
        )

        # Decoder
        self.decoder = nn.Sequential(
            nn.ConvTranspose2d(in_channels=128, out_channels=128, kernel_size=3, stride=2, padding=1, output_padding=1),
            nn.BatchNorm2d(128),
            nn.LeakyReLU(0.2),
            nn.ConvTranspose2d(in_channels=128, out_channels=128, kernel_size=3, stride=2, padding=1, output_padding=1),
            nn.BatchNorm2d(128),
            nn.LeakyReLU(0.2),
            nn.ConvTranspose2d(in_channels=128, out_channels=128, kernel_size=3, stride=2, padding=1, output_padding=1),
            nn.BatchNorm2d(128),
            nn.LeakyReLU(0.2),
            nn.ConvTranspose2d(in_channels=128, out_channels=128, kernel_size=3, stride=2, padding=1, output_padding=1),
            nn.BatchNorm2d(128),
            nn.LeakyReLU(0.2),
            nn.Conv2d(in_channels=128, out_channels=1, kernel_size=3, stride=1, padding=1),
            nn.Sigmoid()
        )


    def encode(self, x):
        y = self.encoder(x)
        return y

    
    def quantise(self, y):
        if self.centers is None:
            return y
        y_flat = y.reshape(y.size(0), y.size(1), y.size(2)*y.size(3), 1)
        dist = torch.abs((y_flat - self.centers))**2
        if self.train:
            phi = F.softmax(-self.sigma * dist, dim=-1)
        else:
            phi = F.softmax(-1e7 * dist, dim=-1)
            symbols_hard = torch.argmax(phi, axis=-1)
            phi = F.one_hot(symbols_hard, num_classes=self.centers.size(0))
        inner_product = phi * self.centers
        y_hat = torch.sum(inner_product, axis=-1)
        y_hat = y_hat.reshape(y.shape)
        return y_hat
    

    def decode(self, y):
        x = self.decoder(y)
        return x
    

    def forward(self, x):
        encoded = self.encode(x)
        limit_entropy = self.quantise(encoded)
        decoded = self.decode(limit_entropy)
        return decoded

In [38]:
import tensorflow as tf

def format(x):
    return tf.convert_to_tensor(np.transpose((x)*255, (1, 2, 0)))

class NLPDLoss(nn.Module):
    def __init__(self, num_levels=3):
        """
        Implementación de la pérdida Normalized Laplacian Pyramid Distance (NLPD)
        como criterio de entrenamiento para un autoencoder.

        num_levels: Número de niveles en la pirámide Laplaciana.
        """
        super(NLPDLoss, self).__init__()
        self.num_levels = num_levels

    def forward(self, original, reconstructed):
        """
        Calcula la pérdida NLPD entre la imagen original y la reconstruida.

        original: Tensor de PyTorch con la imagen original (B, C, H, W).
        reconstructed: Tensor de PyTorch con la imagen reconstruida (B, C, H, W).

        Retorna: Valor escalar de la pérdida NLPD.
        """
        batch_size = original.shape[0]
        total_loss = 0.0

        for i in range(batch_size):
            # Convertir a numpy
            original_np = original[i].detach().cpu().numpy()
            reconstructed_np = reconstructed[i].detach().cpu().numpy()

            total_loss += nlpd(format(original[i].detach().numpy()),
                               format(reconstructed[i].detach().numpy()),
                               num_levels = self.num_levels).numpy()[0]
            print(total_loss)
        return torch.tensor(total_loss / batch_size, requires_grad=True)

In [39]:
ae = EntropyLimitedAutoencoder()
criterion = NLPDLoss()
optimizer = optim.AdamW(ae.parameters(), lr=1e-3, weight_decay=1e-4)
#summary(ae)

In [40]:
i = 0
for batch in dataloader:
    if i == 0:
        aux1 = batch[0][0].numpy()
    if i == 1:
        aux2 = batch[0][0].numpy()
    i += 1
    if i>1:
        break
    print(type(batch))
    print(len(batch))
    print(type(batch[0]))
    print(len(batch[0]))
    print(batch[0].shape)
    print(batch[0][0].shape)
    print(batch[0][0][0][3][8])

nlpd(format(aux1), format(aux2)).numpy()[0]

<class 'list'>
1
<class 'torch.Tensor'>
8
torch.Size([8, 1, 256, 256])
torch.Size([1, 256, 256])
tensor(0.1262)


0.7087023

In [None]:
num_epochs = 3

for epoch in range(num_epochs):
    total_loss = 0
    for batch in dataloader:  # dataloader ya tiene los batches de 64x1x256x256
        batch = batch[0] # Extraer tensor

        optimizer.zero_grad()  # Reiniciar gradientes

        outputs = ae(batch)  # Forward pass
        loss = criterion(outputs, batch)  # Comparar con entrada

        loss.backward()  # Backpropagation
        optimizer.step()  # Actualizar pesos

        total_loss += loss.item()
        print(f"Pérdida: {loss.item():.6f}")
    avg_loss = total_loss / len(dataloader)
    print(f"Época [{epoch+1}/{num_epochs}], Pérdida: {avg_loss:.6f}")

0.5652077794075012
1.1320967078208923
1.687343180179596
2.249794065952301
2.8036998510360718
3.3370769023895264
3.924818515777588
4.464272201061249
Pérdida: 0.558034
0.5572550296783447
1.1090576648712158
1.656432867050171
2.2312901616096497
2.779008984565735
3.3373003602027893
3.9048714637756348
4.4746628403663635
Pérdida: 0.559333
0.5320616364479065
1.1147554516792297
1.6653363108634949
2.218120038509369
2.7477316856384277
3.3165122270584106
3.8900914788246155
4.437107741832733
Pérdida: 0.554638
0.558226466178894
1.110119342803955
1.6811766624450684
2.247038245201111
2.8041699528694153
3.3550907969474792
3.917350113391876
4.437267899513245
Pérdida: 0.554658
0.5821675658226013
1.1175047755241394
1.6754329204559326
2.239479124546051
2.815777599811554
3.3571736812591553
3.9202005863189697
4.476107895374298
Pérdida: 0.559513
0.550211489200592
1.1038737297058105
1.6697062849998474
2.2622791528701782
2.843780755996704
3.4045568704605103
3.9800620675086975
4.5343862771987915
Pérdida: 0.56679