# 01_data_prep.ipynb

Prepare and visualize your dataset.

In [None]:
import os
from PIL import Image
import torch
from torchvision import transforms
import matplotlib.pyplot as plt

img_dir = '../data/processed'
img_size = 32

transform = transforms.Compose([
    transforms.ToTensor()
])

img_list = []
for fname in os.listdir(img_dir):
    if fname.endswith('.jpg') or fname.endswith('.png'):
        img = Image.open(os.path.join(img_dir, fname)).convert('RGB')
        img = img.resize((img_size, img_size))
        img = transform(img)
        img_list.append(img)
imgs = torch.stack(img_list)
print('Loaded', imgs.shape[0], 'images')

# Visualize a few images
fig, axs = plt.subplots(1, 5, figsize=(15,3))
for i in range(5):
    axs[i].imshow(imgs[i].permute(1,2,0))
    axs[i].axis('off')
plt.show()

## Add Gaussian noise and save noisy images

In [None]:
import numpy as np
noisy_imgs = imgs + 0.2 * torch.randn_like(imgs)
noisy_imgs = torch.clamp(noisy_imgs, 0., 1.)

# Visualize
fig, axs = plt.subplots(1, 5, figsize=(15,3))
for i in range(5):
    axs[i].imshow(noisy_imgs[i].permute(1,2,0))
    axs[i].axis('off')
plt.show()

# Save for later use
os.makedirs('../results/denoised', exist_ok=True)
for i in range(len(noisy_imgs)):
    img = transforms.ToPILImage()(noisy_imgs[i])
    img.save(f'../results/denoised/noisy_{i}.png')