# 1. Setting up

In [1]:
import torch

device = "cuda" if torch.cuda.is_available() else "cpu"
print(device)

cuda


In [2]:
!nvidia-smi

Fri Feb 14 20:10:56 2025       
+-----------------------------------------------------------------------------------------+
| NVIDIA-SMI 550.120                Driver Version: 550.120        CUDA Version: 12.4     |
|-----------------------------------------+------------------------+----------------------+
| GPU  Name                 Persistence-M | Bus-Id          Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |           Memory-Usage | GPU-Util  Compute M. |
|                                         |                        |               MIG M. |
|   0  NVIDIA GeForce RTX 4050 ...    Off |   00000000:01:00.0 Off |                  N/A |
| N/A   32C    P3              8W /   55W |      12MiB /   6141MiB |      0%      Default |
|                                         |                        |                  N/A |
+-----------------------------------------+------------------------+----------------------+
                                                

# 2. Data preparation

## Checking images

In [3]:
import os

data_path = "data/"
test_path = data_path + "/test"
train_path = data_path + "/train"

In [4]:
def walk_through_dir(dir):
    for dirpaths, dirnames, filenames in os.walk(dir):
        print(f"There are {len(dirnames)} directories and {len(filenames)} images in {dirpaths}")
walk_through_dir(data_path)

There are 2 directories and 0 images in data/
There are 4 directories and 0 images in data/train
There are 0 directories and 274 images in data/train/Asian
There are 0 directories and 115 images in data/train/Indian
There are 0 directories and 268 images in data/train/Negroids
There are 0 directories and 117 images in data/train/Caucasian
There are 4 directories and 0 images in data/test
There are 0 directories and 69 images in data/test/Asian
There are 0 directories and 29 images in data/test/Indian
There are 0 directories and 68 images in data/test/Negroids
There are 0 directories and 30 images in data/test/Caucasian


### Check the sizes of the images

In [5]:
from PIL import Image
import os
from collections import Counter

In [13]:
size_counts = Counter()
for subfolder in os.listdir(train_path):
    subfolder_path = train_path + "/" + subfolder
    for filename in os.listdir(subfolder_path):
            file_path = subfolder_path + "/" + filename
            with Image.open(file_path) as img:
                size = img.size  # (width, height)
                size_counts[size] += 1

for size, count in size_counts.items():
    print(f'Size {size}: {count} images')

Size (224, 224): 774 images


## Creating transforms and loading data

In [14]:
from torchvision import transforms

img_transform = transforms.Compose([
    transforms.Resize(size=(128, 128)),
    transforms.ToTensor()
])


In [15]:
from torchvision import datasets

train_data = datasets.ImageFolder(root=train_path, transform=img_transform)
test_data = datasets.ImageFolder(root=test_path, transform=img_transform)

In [None]:
from torch.utils.data import DataLoader

BATCH_SIZE = 32
num_workers = os.cpu_count()

train_dataloader = DataLoader(dataset=train_data, batch_size=BATCH_SIZE, num_workers=num_workers, shuffle=True)
test_dataloader = DataLoader(dataset=test_data, batch_size=BATCH_SIZE, num_workers=num_workers, shuffle=False)
