# Auto-Encoders

## 1 - Convolutional AE

In [5]:
import PIL.ImageOps
!pip3 install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu124

Looking in indexes: https://download.pytorch.org/whl/cu124
Collecting torch
  Downloading https://download.pytorch.org/whl/cu124/torch-2.4.1%2Bcu124-cp312-cp312-win_amd64.whl (2506.2 MB)
     ---------------------------------------- 0.0/2.5 GB ? eta -:--:--
     ---------------------------------------- 0.0/2.5 GB 31.2 MB/s eta 0:01:21
     ---------------------------------------- 0.0/2.5 GB 34.4 MB/s eta 0:01:13
     ---------------------------------------- 0.0/2.5 GB 33.7 MB/s eta 0:01:15
     ---------------------------------------- 0.0/2.5 GB 34.3 MB/s eta 0:01:13
     ---------------------------------------- 0.0/2.5 GB 37.4 MB/s eta 0:01:07
     ---------------------------------------- 0.0/2.5 GB 40.9 MB/s eta 0:01:01
     ---------------------------------------- 0.0/2.5 GB 40.9 MB/s eta 0:01:01
     ---------------------------------------- 0.0/2.5 GB 43.7 MB/s eta 0:00:57
     ---------------------------------------- 0.0/2.5 GB 40.9 MB/s eta 0:01:01
     --------------------------


[notice] A new release of pip is available: 23.2.1 -> 24.2
[notice] To update, run: python.exe -m pip install --upgrade pip


## 2 - U-Net

In [29]:
import os
import PIL
import torch
import torchvision

from torch import nn
from PIL import Image
from torch.utils.data import DataLoader
from torchvision import datasets, transforms

In [3]:
device = (
    "cuda" if torch.cuda.is_available() 
    else "cpu"
)

print(f"Using {device} device.")

Using cuda device.


In [38]:
class UNet(nn.Module):
    def __init__(self):
        super().__init__()
        
        self.f1 = self.create_conv_block(1, 64)
        self.p1 = nn.MaxPool2d(kernel_size=2, stride=2)
        
        self.f2 = self.create_conv_block(64, 128)
        self.p2 = nn.MaxPool2d(kernel_size=2, stride=2)
        
        self.f3 = self.create_conv_block(128, 256)
        self.p3 = nn.MaxPool2d(kernel_size=2, stride=2)
        
        self.f4 = self.create_conv_block(256, 512)
        self.p4 = nn.MaxPool2d(kernel_size=2, stride=2)
        
        self.f5 = self.create_conv_block(512, 1024)
        self.p5 = nn.ConvTranspose2d(1024, 512, kernel_size=3, stride=2)
        
        self.f6 = self.create_conv_block(1024, 512)
        self.p6 = nn.ConvTranspose2d(512, 256, kernel_size=3, stride=2)
        
        self.f7 = self.create_conv_block(512, 256)
        self.p7 = nn.ConvTranspose2d(256, 128, kernel_size=2, stride=2)
        
        self.f8 = self.create_conv_block(256, 128)
        self.p8 = nn.ConvTranspose2d(128, 64, kernel_size=2, stride=2)
        
        self.f9 = self.create_conv_block(128, 64)
        self.p9 = nn.Conv2d(64, 1, kernel_size=1)
        
    def forward(self, x):
        x = x.to(device)
        
        f1 = self.f1(x)
        p1 = self.p1(f1)
        
        f2 = self.f2(p1)
        p2 = self.p2(f2)
        
        f3 = self.f3(p2)
        p3 = self.p3(f3)
        
        f4 = self.f4(p3)
        p4 = self.p4(f4)
        
        f5 = self.f5(p4)
        p5 = self.p5(f5)
        
        f6 = self.f6(torch.cat([f4, p5], dim=0))
        p6 = self.p6(f6)
        
        f7 = self.f7(torch.cat([f3, p6], dim=0))
        p7 = self.p7(f7)
        
        f8 = self.f8(torch.cat([f2, p7], dim=0))
        p8 = self.p8(f8)
        
        f9 = self.f9(torch.cat([f1, p8], dim=0))
        p9 = self.p9(f9)
        
        return p9
        
    def create_conv_block(self, num_channels_in, num_channels_out):
        block = nn.Sequential(
            nn.Conv2d(num_channels_in, num_channels_out, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.Conv2d(num_channels_out, num_channels_out, kernel_size=3, padding=1),
            nn.ReLU()
        )
        
        return block

In [39]:
unet = UNet().to(device)
# print(unet)

image = Image.open('C:\\Users\\britt\\IdeaProjects\\Ludii-XAI\\XAI-module\\outputs\\treemaps\\mcts.png')
image = PIL.ImageOps.grayscale(image)
input_img = torchvision.transforms.ToTensor()(image)

output = unet(input_img)

In [40]:
print(output)

tensor([[[-0.0619, -0.0619, -0.0602,  ..., -0.0617, -0.0608, -0.0624],
         [-0.0590, -0.0617, -0.0626,  ..., -0.0622, -0.0612, -0.0615],
         [-0.0593, -0.0635, -0.0652,  ..., -0.0635, -0.0631, -0.0614],
         ...,
         [-0.0620, -0.0675, -0.0703,  ..., -0.0655, -0.0639, -0.0629],
         [-0.0602, -0.0637, -0.0653,  ..., -0.0642, -0.0631, -0.0612],
         [-0.0583, -0.0604, -0.0627,  ..., -0.0589, -0.0587, -0.0592]]],
       device='cuda:0', grad_fn=<SqueezeBackward1>)
