# PyTorch: CUDA vs CPU

Step 1: importing libraries, and checking if CUDA is available

In [1]:
import sys

In [2]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms
from torch.utils.data import DataLoader
import time

In [3]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)

cuda


Step 2: Prepare dataset + Define the architecture

In [4]:
# Hyperparameters
num_epochs = 3
batch_size = 64
learning_rate = 0.0002
latent_size = 64
image_size = 784
hidden_size = 256

# Load the MNIST dataset
transform = transforms.Compose([
	transforms.ToTensor(),
	transforms.Normalize((0.5,), (0.5,))
])

train_dataset = torchvision.datasets.MNIST(root="./data/", train=True, transform=transform, download=True)
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)

In [5]:
class Generator(nn.Module):
    def __init__(self, latent_size, hidden_size, image_size):
        super(Generator, self).__init__()
        self.main = nn.Sequential(
            nn.Linear(latent_size, hidden_size),
            nn.ReLU(),
            nn.Linear(hidden_size, hidden_size),
            nn.ReLU(),
            nn.Linear(hidden_size, image_size),
            nn.Tanh()  # Output pixel values in range [-1, 1]
        )

    def forward(self, x):
        return self.main(x)

class Discriminator(nn.Module):
    def __init__(self, image_size, hidden_size):
        super(Discriminator, self).__init__()
        self.main = nn.Sequential(
            nn.Linear(image_size, hidden_size),
            nn.LeakyReLU(0.2),
            nn.Linear(hidden_size, hidden_size),
            nn.LeakyReLU(0.2),
            nn.Linear(hidden_size, 1),
            nn.Sigmoid()  # Output probability of being real
        )

    def forward(self, x):
        return self.main(x)

Step 3: Function to train the model

In [6]:
def train_model(device, num_epochs=3):
	
	generator = Generator(latent_size, hidden_size, image_size).to(device)
	discriminator = Discriminator(image_size, hidden_size).to(device)
	criterion = nn.BCELoss()
	optimizer_g = optim.Adam(generator.parameters(), lr=learning_rate)
	optimizer_d = optim.Adam(discriminator.parameters(), lr=learning_rate)

	for epoch in range(num_epochs):
		for i, (real_images, _) in enumerate(train_loader):
			batch_size = real_images.size(0)
			real_images = real_images.view(batch_size, -1).to(device)

			# Labels for real and fake images
			real_labels = torch.ones(batch_size, 1).to(device)
			fake_labels = torch.zeros(batch_size, 1).to(device)

			# Train the discriminator
				# Real images
			outputs = discriminator(real_images)
			loss_real = criterion(outputs, real_labels)
			real_score = outputs

				# Fake images
			z = torch.randn(batch_size, latent_size).to(device)
			fake_images = generator(z)
			outputs = discriminator(fake_images)
			loss_fake = criterion(outputs, fake_labels)
			fake_score = outputs

			# Total discriminator loss
			loss_d = loss_real + loss_fake
			optimizer_d.zero_grad()
			loss_d.backward()
			optimizer_d.step()


			# Train the generator
			z = torch.randn(batch_size, latent_size).to(device)
			fake_images = generator(z)
			outputs = discriminator(fake_images)
			loss_g = criterion(outputs, real_labels)
			optimizer_g.zero_grad()
			loss_g.backward()
			optimizer_g.step()

			# Log progress
			sys.stdout.write(f'\rEpoch [{epoch+1}/{num_epochs}], Batch [{i+1}/{len(train_loader)}], '
                             f'Loss_D: {loss_d.item():.4f}, Loss_G: {loss_g.item():.4f}')
			sys.stdout.flush()

Step 4: Timing function


In [7]:
def time_training(device):
    start_time = time.time()
    train_model(device, num_epochs=num_epochs)
    end_time = time.time()
    return end_time - start_time

## Comparison of `CPU` vs `CUDA`

Train on CPU

In [8]:
print("Training on CPU")
cpu_time = time_training(torch.device("cpu"))
print(f"\nCPU training time: {cpu_time:.4f} seconds")

Training on CPU
Epoch [3/3], Batch [938/938], Loss_D: 0.3493, Loss_G: 2.7791
CPU training time: 57.9113 seconds


Train on GPU (if available)

In [9]:
if torch.cuda.is_available():
    print("Training on GPU")
    gpu_time = time_training(torch.device("cuda"))
    print(f"\nGPU training time: {gpu_time:.4f} seconds")
else:
    print("\nGPU is not available.")

Training on GPU
Epoch [3/3], Batch [938/938], Loss_D: 0.3088, Loss_G: 2.3058
GPU training time: 48.3173 seconds


Difference

In [10]:
print(f"GPU is {((cpu_time / gpu_time)-1)*100:.2f}% faster than CPU.")

GPU is 19.86% faster than CPU.
