# Imports

In [10]:
import zipfile
from torchvision import datasets, transforms
from torch.utils.data import DataLoader
from PIL import Image

# Load in Data

In [None]:
# Data extration
with zipfile.ZipFile("Alex.zip") as z:
    z.extractall("alex_data")

with zipfile.ZipFile("Kelly.zip") as z:
    z.extractall("kelly_data")


After loading each folder, I created a new folder in the environment called "data" and put both the alex_data and kelly_data into that.

In [None]:
# Transforms the images into tensors which is the data structure for PyTorch
transform = transforms.Compose([
    transforms.Lambda(lambda img: img.convert("RGB")), # ensures each images has RGB components
    transforms.Resize((224, 224)),
    transforms.ToTensor()
])

dataset = datasets.ImageFolder("data", transform = transform)
loader = DataLoader(dataset, batch_size=32, shuffle=True)

I resized each of the images to 224 x 224 but we can adjust this if needed. I read that each of the images should be the same size but I'm not too sure how much data we are using by downsizing the images.

The batch size is the number of samples in one batch which we can change if we need. In the network, it will process 32 images at a time, update the weights, then move to the next 32 images.

# Inspect image properties

In [16]:
img_tensor, label = dataset[0]
print(img_tensor.shape)        
print(img_tensor.min(), img_tensor.max())  


torch.Size([3, 224, 224])
tensor(0.0353) tensor(0.8431)


3 is the number of channels. In this case it is (R, G, B)

224 x 224 is the height x width of the image

Also shows the min amd max pixel values and they are normalized to [0,1]

In [None]:
images, labels = next(iter(loader))

img = images[0]
print(img.shape)

r, g, b = img[:, 100, 150]
print("R:", r.item(), "G:", g.item(), "B:", b.item())

torch.Size([32, 3, 224, 224])
torch.Size([3, 224, 224])
R: 0.30980393290519714 G: 0.2666666805744171 B: 0.2862745225429535


First line: 3 is the RGB channels, 224 x 224 is the size of the image

Second line: The RGB values for a specific pixel in image 0. The values are between 0 and 1 because of ToTensor()