# GPU PyTorch Demo

This notebook demonstrates GPU PyTorch functionality in your development container.

In [None]:
import torch
import torch.nn as nn
import numpy as np
import matplotlib.pyplot as plt
import time

print(f"PyTorch version: {torch.__version__}")
print(f"CUDA available: {torch.cuda.is_available()}")
if torch.cuda.is_available():
    print(f"GPU: {torch.cuda.get_device_name(0)}")
    print(f"CUDA version: {torch.version.cuda}")

## Performance Comparison: CPU vs GPU

In [None]:
# Define matrix size for performance test
size = 2048

# CPU computation
x_cpu = torch.randn(size, size)
y_cpu = torch.randn(size, size)

start_time = time.time()
z_cpu = torch.matmul(x_cpu, y_cpu)
cpu_time = time.time() - start_time

print(f"CPU computation time: {cpu_time:.4f} seconds")

# GPU computation (if available)
if torch.cuda.is_available():
    device = torch.device('cuda')
    x_gpu = torch.randn(size, size, device=device)
    y_gpu = torch.randn(size, size, device=device)
    
    # Warm up GPU
    _ = torch.matmul(x_gpu, y_gpu)
    torch.cuda.synchronize()
    
    start_time = time.time()
    z_gpu = torch.matmul(x_gpu, y_gpu)
    torch.cuda.synchronize()
    gpu_time = time.time() - start_time
    
    print(f"GPU computation time: {gpu_time:.4f} seconds")
    print(f"Speedup: {cpu_time/gpu_time:.2f}x")
else:
    print("GPU not available for comparison")

## Simple Neural Network on GPU

In [None]:
# Check if GPU is available
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Using device: {device}")

# Create a simple neural network
class SimpleNet(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(SimpleNet, self).__init__()
        self.fc1 = nn.Linear(input_size, hidden_size)
        self.relu = nn.ReLU()
        self.fc2 = nn.Linear(hidden_size, output_size)
    
    def forward(self, x):
        x = self.fc1(x)
        x = self.relu(x)
        x = self.fc2(x)
        return x

# Initialize the network and move to GPU
model = SimpleNet(784, 256, 10).to(device)
print(f"Model is on: {next(model.parameters()).device}")

# Generate some dummy data
batch_size = 64
input_data = torch.randn(batch_size, 784).to(device)
target = torch.randint(0, 10, (batch_size,)).to(device)

print(f"Input data shape: {input_data.shape}")
print(f"Input data device: {input_data.device}")

# Forward pass
output = model(input_data)
print(f"Output shape: {output.shape}")
print(f"Output device: {output.device}")

## Memory Usage

In [None]:
if torch.cuda.is_available():
    print(f"GPU Memory allocated: {torch.cuda.memory_allocated()/1e9:.2f} GB")
    print(f"GPU Memory cached: {torch.cuda.memory_reserved()/1e9:.2f} GB")
    
    # Clear cache
    torch.cuda.empty_cache()
    print("\nAfter clearing cache:")
    print(f"GPU Memory allocated: {torch.cuda.memory_allocated()/1e9:.2f} GB")
    print(f"GPU Memory cached: {torch.cuda.memory_reserved()/1e9:.2f} GB")
else:
    print("GPU memory information not available (CPU only)")