#I used a convolutional autoencoder and collected the representation of the bottleneck to create rgb images.

This code implements a convolutional autoencoder (CAE) designed to compress and reconstruct grayscale images of size 1024x1024. Here's a step-by-step breakdown:

Data Preprocessing:

CustomPadCrop: Ensures images are padded (if smaller) or center-cropped (if larger) to the target size (1024x1024).

ConditionalTransform: Dynamically applies resizing (for narrow images) or CustomPadCrop (for wider images) to standardize input dimensions.

ImageDataset: Loads images from specified directories (malignant/benign tumor images), filters valid files, and applies transformations.

Model Architecture:

Encoder: Downsamples images via convolutional layers (Conv2d) and max pooling:

Input: 1x1024x1024 → 16x512x512 → 3x256x256 (bottleneck).

Decoder: Upsamples the bottleneck back to the original size using transposed convolutions (ConvTranspose2d):

Bottleneck: 3x256x256 → 16x512x512 → 16x1024x1024 → 1x1024x1024 (sigmoid output).

Training Setup:

Loss Function: Uses Mean Squared Error (MSE) to measure reconstruction error.

Optimizer: Adam optimizer with learning rate 1e-3 and weight decay 1e-5 for regularization.

Data Pipeline: Batches images via DataLoader (batch size=32) for efficient training.

The autoencoder learns to compress input images into a compact latent representation (3x256x256) and reconstruct them.

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import transforms
from PIL import Image
import os
from torch.utils.data import Dataset
import glob

class CustomPadCrop:
    def __init__(self, target_size):
        self.target_size = target_size

    def __call__(self, img):
        width, height = img.size
        if width < self.target_size[0] or height < self.target_size[1]:
            padding_width = max((self.target_size[0] - width) // 2, 0)
            padding_height = max((self.target_size[1] - height) // 2, 0)
            padded_img = Image.new("L", self.target_size, color=0)
            padded_img.paste(img, (padding_width, padding_height))
            return padded_img
        elif width > self.target_size[0] or height > self.target_size[1]:
            return transforms.functional.center_crop(img, self.target_size)
        return img

class ConditionalTransform:
    def __init__(self, target_size):
        self.target_size = target_size
        self.resize_transform = transforms.Resize(self.target_size)
        self.custom_transform = CustomPadCrop(self.target_size)

    def __call__(self, img):
        width, _ = img.size
        if width < self.target_size[0]:
            # Use Resize to make it 1024x1024, aspect ratio might not be maintained
            img = self.resize_transform(img)
        else:
            # Use custom pad crop
            img = self.custom_transform(img)
        return img


class ImageDataset(Dataset):
    def __init__(self, root_dirs, transform=None):
        self.root_dirs = root_dirs
        self.transform = transform
        self.file_paths = self.collect_file_paths()

    def __len__(self):
        return len(self.file_paths)

    def __getitem__(self, idx):
        image_path = self.file_paths[idx]
        try:
            image = Image.open(image_path).convert('L')
            if self.transform:
                image = self.transform(image)
            return image
        except Exception as e:
            print(f"Error loading image {image_path}: {e}")
            return None

    def collect_file_paths(self):
        file_paths = []
        image_extensions = ['*.jpg', '*.jpeg', '*.png', '*.bmp', '*.tiff', '*.gif', '*.webp']
        for root_dir in self.root_dirs:
            for pattern in image_extensions:
                files = glob.glob(os.path.join(root_dir, '**', pattern), recursive=True)
                for file in files:
                    if os.path.getsize(file) > 0:
                        try:
                            img = Image.open(file)
                            file_paths.append(file)
                        except Exception as e:
                            print(f"Cannot open {file}: {e}")
        return file_paths

# Define transformations
target_size = (1024, 1024)
transform = transforms.Compose([
    ConditionalTransform(target_size),
    transforms.ToTensor(),
])

# Define directories containing the images
root_dirs = [
    'path/to/my/image1',
    'path/to/my/images2
]

# Create a custom dataset using the directories and transformation
custom_dataset = ImageDataset(root_dirs, transform=transform)

# Create a DataLoader to load the data in batches during training
data_loader = torch.utils.data.DataLoader(dataset=custom_dataset, batch_size=32, shuffle=True)

class Autoencoder(nn.Module):
    def __init__(self):
        super(Autoencoder, self).__init__()

        # Encoder
        self.encoder = nn.Sequential(
            nn.Conv2d(1, 16, 3, padding=1),       # -> [N, 16, 1024, 1024]
            nn.ReLU(),
            nn.MaxPool2d(2, 2),                   # -> [N, 16, 512, 512]
            nn.Conv2d(16, 3, 3, padding=1),       # -> [N, 3, 512, 512]
            nn.ReLU(),
            nn.MaxPool2d(2, 2),                   # -> [N, 3, 256, 256]  (Bottleneck size)
        )

        # Decoder
        self.decoder = nn.Sequential(
            nn.ConvTranspose2d(3, 16, kernel_size=2, stride=2, padding=0, output_padding=0),  # -> [N, 16, 512, 512]
            nn.ReLU(),
            nn.ConvTranspose2d(16, 16, kernel_size=2, stride=2, padding=0, output_padding=0), # -> [N, 16, 1024, 1024]
            nn.ReLU(),
            nn.Conv2d(16, 1, kernel_size=1, stride=1, padding=0),  # -> [N, 1, 1024, 1024]
            nn.Sigmoid()  # Output between 0 and 1 for grayscale images
        )

    def forward(self, x):
        encoded = self.encoder(x)
        decoded = self.decoder(encoded)
        return decoded

# Instantiate the model
model = Autoencoder()

# Verify model parameters
params = list(model.parameters())
print(len(params))  # Should be greater than 0

# Define loss function and optimizer
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=1e-3, weight_decay=1e-5)



#here we implement the autoencoders training

In [None]:
import torch
from tqdm import tqdm
from PIL import Image

# Disable the DecompressionBombError by setting the MAX_IMAGE_PIXELS to None
Image.MAX_IMAGE_PIXELS = None

# Device setup (if not already done)
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model.to(device)  # Ensure model is on the same device

# Training loop with tqdm
num_epochs = 3  # Adjust this as necessary

for epoch in range(num_epochs):
    running_loss = 0.0

    # Use tqdm to wrap the data loader for a progress bar
    with tqdm(data_loader, unit="batch") as tepoch:
        tepoch.set_description(f"Epoch [{epoch+1}/{num_epochs}]")

        for data in tepoch:
            # Move images to device (GPU/CPU)
            data = data.to(device)  # Ensure data is on the same device as the model

            # Forward pass: input -> autoencoder -> output
            outputs = model(data)
            loss = criterion(outputs, data)

            # Backward pass and optimization
            optimizer.zero_grad()  # Clear previous gradients
            loss.backward()        # Backpropagation
            optimizer.step()       # Update weights

            # Accumulate loss and update progress bar description
            running_loss += loss.item()
            tepoch.set_postfix(loss=loss.item())

    # Average loss for the epoch
    avg_loss = running_loss / len(data_loader)
    print(f"Epoch [{epoch+1}/{num_epochs}], Average Loss: {avg_loss:.4f}")

print("Training Complete")
# Save the model
save_path = 'path/to/save'
torch.save(model.state_dict(), save_path)
print(f"Model saved to {save_path}")
