In [5]:
!nvidia-smi

Tue Oct 24 02:58:04 2023       
+---------------------------------------------------------------------------------------+
| NVIDIA-SMI 537.58                 Driver Version: 537.58       CUDA Version: 12.2     |
|-----------------------------------------+----------------------+----------------------+
| GPU  Name                     TCC/WDDM  | Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |         Memory-Usage | GPU-Util  Compute M. |
|                                         |                      |               MIG M. |
|   0  NVIDIA GeForce RTX 3070      WDDM  | 00000000:10:00.0  On |                  N/A |
|  0%   36C    P8              11W / 240W |    329MiB /  8192MiB |      9%      Default |
|                                         |                      |                  N/A |
+-----------------------------------------+----------------------+----------------------+
                                                                    

In [6]:
!nvcc --version

nvcc: NVIDIA (R) Cuda compiler driver
Copyright (c) 2005-2022 NVIDIA Corporation
Built on Wed_Sep_21_10:41:10_Pacific_Daylight_Time_2022
Cuda compilation tools, release 11.8, V11.8.89
Build cuda_11.8.r11.8/compiler.31833905_0


In [7]:
import torch
x = torch.rand(5, 3)
print(x)


tensor([[0.7765, 0.5723, 0.8604],
        [0.7060, 0.5255, 0.0353],
        [0.1027, 0.7389, 0.0668],
        [0.0931, 0.9616, 0.2180],
        [0.6402, 0.7939, 0.7504]])


In [8]:
import torch
torch.cuda.is_available()

True

In [9]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision.transforms as transforms
from torch.utils.data import DataLoader
from torchvision.datasets import MNIST
import time

# Definir la arquitectura de la red neuronal
class SimpleNN(nn.Module):
    def __init__(self):
        super(SimpleNN, self).__init__()
        self.fc1 = nn.Linear(28 * 28, 128)
        self.relu = nn.ReLU()
        self.fc2 = nn.Linear(128, 10)

    def forward(self, x):
        x = x.view(-1, 28 * 28)
        x = self.fc1(x)
        x = self.relu(x)
        x = self.fc2(x)
        return x

# Configurar la red neuronal y los hiperparámetros
model = SimpleNN()

# Descargar el conjunto de datos MNIST
transform = transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.5,), (0.5,))])
train_dataset = MNIST(root='./data', train=True, transform=transform, download=True)
train_loader = DataLoader(dataset=train_dataset, batch_size=64, shuffle=True)

# Definir la función de pérdida y el optimizador
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Medir el tiempo de ejecución en CPU
device = torch.device("cpu")
model.to(device)
start_time = time.time()
for epoch in range(5):
    for data, labels in train_loader:
        data, labels = data.to(device), labels.to(device)
        optimizer.zero_grad()
        outputs = model(data)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
    print(f'Epoch {epoch+1}/{5}, Loss: {loss.item()}')
end_time = time.time()
cpu_time = end_time - start_time
print(f'Tiempo de ejecución en CPU: {cpu_time} segundos')

# Guardar el modelo entrenado en CPU
torch.save(model.state_dict(), 'cpu_model.pth')


Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz


1.0%

Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz to ./data\MNIST\raw\train-images-idx3-ubyte.gz


100.0%


Extracting ./data\MNIST\raw\train-images-idx3-ubyte.gz to ./data\MNIST\raw

Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz to ./data\MNIST\raw\train-labels-idx1-ubyte.gz


100.0%


Extracting ./data\MNIST\raw\train-labels-idx1-ubyte.gz to ./data\MNIST\raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz


4.0%

Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz to ./data\MNIST\raw\t10k-images-idx3-ubyte.gz


100.0%


Extracting ./data\MNIST\raw\t10k-images-idx3-ubyte.gz to ./data\MNIST\raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz


100.0%

Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz to ./data\MNIST\raw\t10k-labels-idx1-ubyte.gz
Extracting ./data\MNIST\raw\t10k-labels-idx1-ubyte.gz to ./data\MNIST\raw






Epoch 1/5, Loss: 0.05355485901236534
Epoch 2/5, Loss: 0.18006432056427002
Epoch 3/5, Loss: 0.11960174143314362
Epoch 4/5, Loss: 0.01966056041419506
Epoch 5/5, Loss: 0.11833243072032928
Tiempo de ejecución en CPU: 66.61269354820251 segundos


In [10]:
# Medir el tiempo de ejecución en GPU
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)
optimizer = optim.Adam(model.parameters(), lr=0.001)  # Mover también el optimizador a GPU si es necesario

start_time = time.time()
for epoch in range(5):
    for data, labels in train_loader:
        data, labels = data.to(device), labels.to(device)
        optimizer.zero_grad()
        outputs = model(data)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
    print(f'Epoch {epoch+1}/{5}, Loss: {loss.item()}')
end_time = time.time()
gpu_time = end_time - start_time
print(f'Tiempo de ejecución en GPU: {gpu_time} segundos')


Epoch 1/5, Loss: 0.34630927443504333
Epoch 2/5, Loss: 0.013981365598738194
Epoch 3/5, Loss: 0.07661918550729752
Epoch 4/5, Loss: 0.0031458360608667135
Epoch 5/5, Loss: 0.16479092836380005
Tiempo de ejecución en GPU: 66.74514603614807 segundos


In [15]:
import time
import numpy as np
import torch

# Tamaño de la matriz grande
matrix_size = (10000, 10000)

# Matriz aleatoria en CPU (NumPy)
start_time_cpu = time.time()
matrix_cpu = np.random.rand(*matrix_size)
result_cpu = np.sin(matrix_cpu)  # Realiza alguna operación en la matriz
end_time_cpu = time.time()
cpu_time = end_time_cpu - start_time_cpu
print(f'Tiempo en CPU: {cpu_time} segundos')

# Matriz aleatoria en GPU (PyTorch)
if torch.cuda.is_available():
    device = torch.device("cuda")
    matrix_gpu = torch.rand(*matrix_size).to(device)
    start_time_gpu = time.time()
    result_gpu = torch.sin(matrix_gpu)  # Realiza alguna operación en la matriz
    torch.cuda.synchronize()  # Asegura que las operaciones en GPU se completen antes de medir el tiempo
    end_time_gpu = time.time()
    
    gpu_time = end_time_gpu - start_time_gpu
    print(f'Tiempo en GPU: {gpu_time} segundos')
    print(cpu_time/gpu_time)


Tiempo en CPU: 1.3939461708068848 segundos
Tiempo en GPU: 0.08122634887695312 segundos
17.16125604658808


In [11]:
import tensorflow as tf

# Obtener la lista de dispositivos físicos (por ejemplo, GPU)
tf.config.experimental.list_physical_devices()


[PhysicalDevice(name='/physical_device:CPU:0', device_type='CPU'),
 PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]