To practice processing over GPU/TPU for parallel processing through Keggle/Github. Try for automated swithcing of GPU/TPU in code only.

In [1]:
import tensorflow as tf
print("TensorFlow version:", tf.__version__)


TensorFlow version: 2.19.0


In [None]:
import time
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, transforms


def get_device():
    if torch.cuda.is_available():
        return torch.device("cuda"), "GPU"
    else:
        return torch.device("cpu"), "CPU"

device, device_name = get_device()
print("Using:", device_name)


transform = transforms.ToTensor()

train_loader = torch.utils.data.DataLoader(
    datasets.MNIST('.', train=True, download=True, transform=transform),
    batch_size=128,
    shuffle=True
)


class SimpleNN(nn.Module):
    def __init__(self):
        super().__init__()
        self.fc = nn.Sequential(
            nn.Flatten(),
            nn.Linear(28*28, 256),
            nn.ReLU(),
            nn.Linear(256, 10)
        )

    def forward(self, x):
        return self.fc(x)

model = SimpleNN().to(device)

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)


def train():
    model.train()
    start = time.time()

    for epoch in range(3):
        for images, labels in train_loader:
            images, labels = images.to(device), labels.to(device)

            optimizer.zero_grad()
            output = model(images)
            loss = criterion(output, labels)
            loss.backward()
            optimizer.step()

    end = time.time()
    return end - start


training_time = train()

print(f"\n Device: {device_name}")
print(f" Training Time: {training_time:.2f} seconds")


Using: GPU

 Device: GPU
 Training Time: 22.73 seconds


In [1]:
import tensorflow as tf

try:
    resolver = tf.distribute.cluster_resolver.TPUClusterResolver(tpu="local")
    tf.config.experimental_connect_to_cluster(resolver)
    tf.tpu.experimental.initialize_tpu_system(resolver)
    strategy = tf.distribute.TPUStrategy(resolver)
    print("Running on TPU")
    print("Replicas:", strategy.num_replicas_in_sync)

except Exception as e:
    print("TPU not connected")
    print(e)


ModuleNotFoundError: No module named 'tensorflow'

In [14]:
import tensorflow as tf

try:
    tpu = tf.distribute.cluster_resolver.TPUClusterResolver()  # auto-detect
    tf.config.experimental_connect_to_cluster(tpu)
    tf.tpu.experimental.initialize_tpu_system(tpu)
    strategy = tf.distribute.TPUStrategy(tpu)
    device = "TPU"

except ValueError:
    strategy = tf.distribute.get_strategy()
    device = "CPU/GPU"

print("Running on:", device)
print("Replicas:", strategy.num_replicas_in_sync)


Running on: CPU/GPU
Replicas: 1


In [11]:
import tensorflow as tf

print("GPUs:", tf.config.list_physical_devices('GPU'))

try:
    resolver = tf.distribute.cluster_resolver.TPUClusterResolver()
    print("TPU detected")
except:
    print("No TPU detected")


GPUs: []
No TPU detected


In [None]:
import tensorflow as tf
import time

try:
    resolver = tf.distribute.cluster_resolver.TPUClusterResolver()
    tf.config.experimental_connect_to_cluster(resolver)
    tf.tpu.experimental.initialize_tpu_system(resolver)
    strategy = tf.distribute.TPUStrategy(resolver)
    device_name = "TPU"
except:
    strategy = tf.distribute.get_strategy()
    device_name = "CPU/GPU"

print("Using:", device_name)

# DATA
(x_train, y_train), _ = tf.keras.datasets.mnist.load_data()
x_train = x_train / 255.0

with strategy.scope():
    model = tf.keras.Sequential([
        tf.keras.layers.Flatten(),
        tf.keras.layers.Dense(256, activation='relu'),
        tf.keras.layers.Dense(10, activation='softmax')
    ])

    model.compile(
        optimizer='adam',
        loss='sparse_categorical_crossentropy',
        metrics=['accuracy']
    )

start = time.time()

model.fit(x_train, y_train, epochs=3, batch_size=128)

end = time.time()

print(f"\n Device: {device_name}")
print(f" Training Time: {end - start:.2f} seconds")


Using: CPU/GPU
Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/mnist.npz
[1m11490434/11490434[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step
Epoch 1/3
[1m469/469[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 3ms/step - accuracy: 0.8558 - loss: 0.5285
Epoch 2/3
[1m469/469[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - accuracy: 0.9599 - loss: 0.1422
Epoch 3/3
[1m469/469[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.9723 - loss: 0.0965

 Device: CPU/GPU
 Training Time: 4.86 seconds
