In [11]:
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
from PIL import Image
import os


In [12]:
IMG_HEIGHT, IMG_WIDTH = 128, 128
BATCH_SIZE = 8
EPOCHS = 10
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
DATA_DIR = "train/real"   # same images for input & target


In [13]:
class AutoencoderDataset(Dataset):
    def __init__(self, folder):
        self.images = os.listdir(folder)
        self.folder = folder

        self.transform = transforms.Compose([
            transforms.Resize((IMG_HEIGHT, IMG_WIDTH)),
            transforms.ToTensor()
        ])

    def __len__(self):
        return len(self.images)

    def __getitem__(self, idx):
        img = Image.open(os.path.join(self.folder, self.images[idx])).convert("RGB")
        x = self.transform(img)
        return x, x


In [14]:
class Autoencoder(nn.Module):
    def __init__(self):
        super().__init__()

        # ðŸ”¹ Encoder
        self.encoder = nn.Sequential(
            nn.Conv2d(3, 64, 3, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(2),      # 256Ã—256

            nn.Conv2d(64, 128, 3, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(2),      # 128Ã—128

            nn.Conv2d(128, 256, 3, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(2)       # 64Ã—64
        )

        # ðŸ”¹ Decoder
        self.decoder = nn.Sequential(
            nn.Upsample(scale_factor=2),
            nn.ConvTranspose2d(256, 128, 3, padding=1),
            nn.ReLU(),

            nn.Upsample(scale_factor=2),
            nn.ConvTranspose2d(128, 64, 3, padding=1),
            nn.ReLU(),

            nn.Upsample(scale_factor=2),
            nn.ConvTranspose2d(64, 3, 3, padding=1),
            nn.Sigmoid()
        )

    def forward(self, x):
        z = self.encoder(x)
        return self.decoder(z)


In [15]:
dataset = AutoencoderDataset(DATA_DIR)
loader = DataLoader(dataset, batch_size=BATCH_SIZE, shuffle=True)


In [16]:
model = Autoencoder().to(DEVICE)
criterion = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)


In [17]:
for epoch in range(EPOCHS):
    total_loss = 0
    for x, y in loader:
        x, y = x.to(DEVICE), y.to(DEVICE)

        out = model(x)
        loss = criterion(out, y)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        total_loss += loss.item()

    print(f"Epoch {epoch+1}/{EPOCHS} | Loss: {total_loss/len(loader):.4f}")


Epoch 1/10 | Loss: 0.0561
Epoch 2/10 | Loss: 0.0465
Epoch 3/10 | Loss: 0.0356
Epoch 4/10 | Loss: 0.0225
Epoch 5/10 | Loss: 0.1045
Epoch 6/10 | Loss: 0.0134
Epoch 7/10 | Loss: 0.0372
Epoch 8/10 | Loss: 0.0344
Epoch 9/10 | Loss: 0.0255
Epoch 10/10 | Loss: 0.0212


In [18]:
torch.save(model.state_dict(), "autoencoder_512.pth")
print("Autoencoder saved")


Autoencoder saved


In [25]:
@torch.no_grad()
def reconstruct_image(img_path, out_path):
    model.eval()
    img = Image.open(img_path).convert("RGB")
    x = transforms.ToTensor()(img.resize((512,512))).unsqueeze(0).to(DEVICE)

    recon = model(x)
    recon_img = transforms.ToPILImage()(recon.squeeze().cpu())
    recon_img.save(out_path)


In [26]:
reconstruct_image("face.6.jpg", "face3.png")
