In [1]:
import os
import torch
import torch.nn as nn
from torch.utils.data import DataLoader, Dataset
from torchvision import transforms
from PIL import Image


In [2]:
class CustomImageDataset(Dataset):
    def __init__(self, root_dir):
        self.image_paths = [os.path.join(root_dir, f) for f in os.listdir(root_dir) if f.endswith('.jpg')]
        self.transform = transforms.Compose([
            transforms.Resize((256, 256)),
            transforms.RandomHorizontalFlip(),
            transforms.ToTensor()
        ])

    def __len__(self):
        return len(self.image_paths)

    def __getitem__(self, idx):
        img = Image.open(self.image_paths[idx]).convert('RGB')
        return self.transform(img)


In [3]:
dataset = CustomImageDataset('F:/ML PROJECT/data/natural_images\dog')
train_loader = DataLoader(dataset, batch_size=8, shuffle=True)


In [4]:
class Encoder(nn.Module):
    def __init__(self):
        super(Encoder, self).__init__()
        self.encoder = nn.Sequential(
            nn.Conv2d(3, 32, 3, stride=2, padding=1),
            nn.ReLU(),
            nn.Conv2d(32, 64, 3, stride=2, padding=1),
            nn.ReLU(),
            nn.Conv2d(64, 128, 3, stride=2, padding=1),
            nn.ReLU()
        )

    def forward(self, x):
        return self.encoder(x)

class Decoder(nn.Module):
    def __init__(self):
        super(Decoder, self).__init__()
        self.decoder = nn.Sequential(
            nn.ConvTranspose2d(128, 64, 3, stride=2, padding=1, output_padding=1),
            nn.ReLU(),
            nn.ConvTranspose2d(64, 32, 3, stride=2, padding=1, output_padding=1),
            nn.ReLU(),
            nn.ConvTranspose2d(32, 3, 3, stride=2, padding=1, output_padding=1),
            nn.Sigmoid()
        )

    def forward(self, x):
        return self.decoder(x)


In [5]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
encoder = Encoder().to(device)
decoder = Decoder().to(device)
optimizer = torch.optim.Adam(list(encoder.parameters()) + list(decoder.parameters()), lr=0.001)
criterion = nn.MSELoss()

for epoch in range(10):
    total_loss = 0
    for batch in train_loader:
        batch = batch.to(device)
        encoded = encoder(batch)
        decoded = decoder(encoded)
        loss = criterion(decoded, batch)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        total_loss += loss.item()
    print(f"Epoch {epoch+1}, Loss: {total_loss/len(train_loader):.4f}")


Epoch 1, Loss: 0.0246
Epoch 2, Loss: 0.0061
Epoch 3, Loss: 0.0039
Epoch 4, Loss: 0.0032
Epoch 5, Loss: 0.0028
Epoch 6, Loss: 0.0027
Epoch 7, Loss: 0.0025
Epoch 8, Loss: 0.0025
Epoch 9, Loss: 0.0022
Epoch 10, Loss: 0.0020


In [6]:
torch.save(encoder.state_dict(), 'encoder_custom.pth')
torch.save(decoder.state_dict(), 'decoder_custom.pth')
