In [2]:
# Import required libraries
import tensorflow as tf
from tensorflow.keras.layers import Embedding, Bidirectional, LSTM, Dense, Dropout
from tensorflow.keras.models import Sequential
from tensorflow.keras.preprocessing.sequence import pad_sequences
import tensorflow_datasets as tfds

# Force TensorFlow to use CPU (uncomment the next line to run on CPU only)
# tf.config.set_visible_devices([], 'GPU')  # Hide GPU

# Check device in use
print("Devices in use:")
print(tf.config.list_physical_devices())

# Load the AG News dataset
dataset, info = tfds.load('ag_news_subset', with_info=True, as_supervised=True)
train_data, test_data = dataset['train'], dataset['test']

# Preprocessing parameters
vocab_size = 20000  # Vocabulary size
max_length = 250    # Max sentence length
embedding_dim = 128 # Embedding dimensions
batch_size = 128    # Batch size for training

# Tokenizer setup
tokenizer = tfds.deprecated.text.Tokenizer()
vocab_set = set()

# Build vocabulary from the training data
for text, _ in tfds.as_numpy(train_data):
    vocab_set.update(tokenizer.tokenize(text.decode('utf-8')))

# Create a TokenTextEncoder using the vocabulary
encoder = tfds.deprecated.text.TokenTextEncoder(vocab_set)

# Encode function
def encode_text(text, label):
    encoded_text = encoder.encode(text.numpy())
    encoded_text = pad_sequences([encoded_text], maxlen=max_length, padding='post', truncating='post')[0]
    return encoded_text, label

def tf_encode_text(text, label):
    return tf.py_function(func=encode_text, inp=[text, label], Tout=(tf.int32, tf.int64))

# Apply preprocessing to train and test datasets
train_data = train_data.map(lambda x, y: tf_encode_text(x, y))
test_data = test_data.map(lambda x, y: tf_encode_text(x, y))

# Shuffle, batch, and prefetch
train_data = train_data.shuffle(10000).batch(batch_size).prefetch(tf.data.AUTOTUNE)
test_data = test_data.batch(batch_size).prefetch(tf.data.AUTOTUNE)

# Build the model
model = Sequential([
    Embedding(input_dim=vocab_size, output_dim=embedding_dim, input_length=max_length),
    Bidirectional(LSTM(64, return_sequences=True)),
    Dropout(0.3),
    Bidirectional(LSTM(32)),
    Dropout(0.3),
    Dense(64, activation='relu'),
    Dense(4, activation='softmax')  # Output layer for 4-class classification
])

# Compile the model
model.compile(optimizer='adam',
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])

# Print model summary
model.summary()

# Train the model
history = model.fit(train_data,
                    epochs=5,
                    validation_data=test_data,
                    verbose=1)

# Evaluate the model on the test set
test_loss, test_accuracy = model.evaluate(test_data)
print(f"Test Accuracy: {test_accuracy * 100:.2f}%")


Devices in use:
[PhysicalDevice(name='/physical_device:CPU:0', device_type='CPU'), PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]


I0000 00:00:1736066766.401703    5474 gpu_device.cc:2022] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 9363 MB memory:  -> device: 0, name: NVIDIA GeForce RTX 4070 SUPER, pci bus id: 0000:01:00.0, compute capability: 8.9
2025-01-05 14:16:06.547829: I tensorflow/core/kernels/data/tf_record_dataset_op.cc:376] The default buffer size is 262144, which is overridden by the user specified `buffer_size` of 8388608
2025-01-05 14:16:13.643034: I tensorflow/core/framework/local_rendezvous.cc:405] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence


Epoch 1/5


ValueError: as_list() is not defined on an unknown TensorShape.

In [1]:
import tensorflow as tf
from tensorflow.python.platform import build_info as tf_build_info
print(tf_build_info.build_info['cudnn_version'])  # Displays cuDNN version


2025-01-05 14:15:59.738695: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2025-01-05 14:15:59.901065: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1736066759.978226    5474 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1736066760.003066    5474 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2025-01-05 14:16:00.166621: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instr

9


In [3]:
import tensorflow as tf

# Check if TensorFlow can detect a GPU
if tf.test.is_gpu_available():
    print("GPU is available!")
    print(f"GPU Name: {tf.config.list_physical_devices('GPU')}")
    
    # Perform a small computation on the GPU
    with tf.device('/GPU:0'):
        a = tf.random.uniform([10000, 10000])
        b = tf.random.uniform([10000, 10000])
        c = tf.matmul(a, b)  # Matrix multiplication on GPU
    print("Computation successful!")
    print(f"Result shape: {c.shape}")
else:
    print("No GPU detected or TensorFlow is not using the GPU.")


Instructions for updating:
Use `tf.config.list_physical_devices('GPU')` instead.


Instructions for updating:
Use `tf.config.list_physical_devices('GPU')` instead.


GPU is available!
GPU Name: [PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]
Computation successful!
Result shape: (10000, 10000)


I0000 00:00:1736066821.042786    5474 gpu_device.cc:2022] Created device /device:GPU:0 with 9363 MB memory:  -> device: 0, name: NVIDIA GeForce RTX 4070 SUPER, pci bus id: 0000:01:00.0, compute capability: 8.9


In [4]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense
from tensorflow.keras.datasets import cifar10
from tensorflow.keras.utils import to_categorical
import time

# Check if GPU is available
if tf.test.is_gpu_available():
    print("GPU is available!")
    print(f"GPU Name: {tf.config.list_physical_devices('GPU')}")
else:
    print("No GPU detected or TensorFlow is not using the GPU.")
    exit()

# Load CIFAR-10 dataset (small dataset but useful for testing)
(x_train, y_train), (x_test, y_test) = cifar10.load_data()
x_train, x_test = x_train / 255.0, x_test / 255.0  # Normalize to [0, 1]
y_train, y_test = to_categorical(y_train), to_categorical(y_test)  # One-hot encode labels

# Define a simple CNN model
model = Sequential([
    Conv2D(32, (3, 3), activation='relu', input_shape=(32, 32, 3)),
    MaxPooling2D((2, 2)),
    Conv2D(64, (3, 3), activation='relu'),
    MaxPooling2D((2, 2)),
    Conv2D(64, (3, 3), activation='relu'),
    Flatten(),
    Dense(64, activation='relu'),
    Dense(10, activation='softmax')
])

# Compile the model
model.compile(optimizer='adam',
              loss='categorical_crossentropy',
              metrics=['accuracy'])

# Train the model and measure time
start_time = time.time()
history = model.fit(x_train, y_train, epochs=3, batch_size=64, validation_data=(x_test, y_test))
end_time = time.time()

# Print summary
print("\nTraining completed!")
print(f"Total Training Time: {end_time - start_time:.2f} seconds")
print("Model Performance:")
test_loss, test_accuracy = model.evaluate(x_test, y_test, verbose=2)
print(f"Test Loss: {test_loss}")
print(f"Test Accuracy: {test_accuracy}")


GPU is available!
GPU Name: [PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]


I0000 00:00:1736066865.459690    5474 gpu_device.cc:2022] Created device /device:GPU:0 with 9363 MB memory:  -> device: 0, name: NVIDIA GeForce RTX 4070 SUPER, pci bus id: 0000:01:00.0, compute capability: 8.9
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 1/3


I0000 00:00:1736066868.165844    9024 service.cc:148] XLA service 0x7b4430015f70 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:
I0000 00:00:1736066868.165946    9024 service.cc:156]   StreamExecutor device (0): NVIDIA GeForce RTX 4070 SUPER, Compute Capability 8.9
2025-01-05 14:17:48.186357: I tensorflow/compiler/mlir/tensorflow/utils/dump_mlir_util.cc:268] disabling MLIR crash reproducer, set env var `MLIR_CRASH_REPRODUCER_DIRECTORY` to enable.
I0000 00:00:1736066868.272166    9024 cuda_dnn.cc:529] Loaded cuDNN version 90600


[1m121/782[0m [32m━━━[0m[37m━━━━━━━━━━━━━━━━━[0m [1m0s[0m 1ms/step - accuracy: 0.2000 - loss: 2.1361

I0000 00:00:1736066869.285575    9024 device_compiler.h:188] Compiled cluster using XLA!  This line is logged at most once for the lifetime of the process.


[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 3ms/step - accuracy: 0.3406 - loss: 1.7908 - val_accuracy: 0.5020 - val_loss: 1.3509
Epoch 2/3
[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 3ms/step - accuracy: 0.5487 - loss: 1.2644 - val_accuracy: 0.5828 - val_loss: 1.1807
Epoch 3/3
[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 4ms/step - accuracy: 0.6198 - loss: 1.0749 - val_accuracy: 0.6304 - val_loss: 1.0497

Training completed!
Total Training Time: 9.93 seconds
Model Performance:
313/313 - 1s - 3ms/step - accuracy: 0.6304 - loss: 1.0497
Test Loss: 1.0496565103530884
Test Accuracy: 0.6304000020027161
