# Tensorflow with GPU

This notebook provides an introduction to computing on a [GPU](https://cloud.google.com/gpu) in Colab. In this notebook you will connect to a GPU, and then run some basic TensorFlow operations on both the CPU and a GPU, observing the speedup provided by using the GPU.


## Enabling and testing the GPU

First, you'll need to enable GPUs for the notebook:

- Navigate to Edit→Notebook Settings
- select GPU from the Hardware Accelerator drop-down

Next, we'll confirm that we can connect to the GPU with tensorflow:

In [1]:
import tensorflow as tf
device_name = tf.test.gpu_device_name()
if device_name != '/device:GPU:0':
  raise SystemError('GPU device not found')
print('Found GPU at: {}'.format(device_name))

Found GPU at: /device:GPU:0


## Observe TensorFlow speedup on GPU relative to CPU

This example constructs a typical convolutional neural network layer over a
random image and manually places the resulting ops on either the CPU or the GPU
to compare execution speed.

In [2]:
import tensorflow as tf
import timeit

device_name = tf.test.gpu_device_name()
if device_name != '/device:GPU:0':
  print(
      '\n\nThis error most likely means that this notebook is not '
      'configured to use a GPU.  Change this in Notebook Settings via the '
      'command palette (cmd/ctrl-shift-P) or the Edit menu.\n\n')
  raise SystemError('GPU device not found')

def cpu():
  with tf.device('/cpu:0'):
    random_image_cpu = tf.random.normal((100, 100, 100, 3))
    net_cpu = tf.keras.layers.Conv2D(32, 7)(random_image_cpu)
    return tf.math.reduce_sum(net_cpu)

def gpu():
  with tf.device('/device:GPU:0'):
    random_image_gpu = tf.random.normal((100, 100, 100, 3))
    net_gpu = tf.keras.layers.Conv2D(32, 7)(random_image_gpu)
    return tf.math.reduce_sum(net_gpu)

# We run each op once to warm up; see: https://stackoverflow.com/a/45067900
cpu()
gpu()

# Run the op several times.
print('Time (s) to convolve 32x7x7x3 filter over random 100x100x100x3 images '
      '(batch x height x width x channel). Sum of ten runs.')
print('CPU (s):')
cpu_time = timeit.timeit('cpu()', number=10, setup="from __main__ import cpu")
print(cpu_time)
print('GPU (s):')
gpu_time = timeit.timeit('gpu()', number=10, setup="from __main__ import gpu")
print(gpu_time)
print('GPU speedup over CPU: {}x'.format(int(cpu_time/gpu_time)))

Time (s) to convolve 32x7x7x3 filter over random 100x100x100x3 images (batch x height x width x channel). Sum of ten runs.
CPU (s):
8.293390190000025
GPU (s):
0.10579952200004072
GPU speedup over CPU: 78x


In [6]:
import numpy
from keras.datasets import cifar10
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import Dropout
from keras.layers import Flatten
from keras.constraints import max_norm
from keras.optimizers import SGD
from keras.layers import Conv2D, MaxPooling2D
from keras.utils import to_categorical
from keras import backend as K
#K.set_image_dim_ordering("th")
# fix random seed for reproducibility
seed = 7
numpy.random.seed(seed)

In [7]:
# load data
(X_train, y_train), (X_test, y_test) = cifar10.load_data()
# normalize inputs from 0-255 to 0.0-1.0
X_train = X_train.astype( "float32" )
X_test = X_test.astype( "float32" )
X_train = X_train / 255.0
X_test = X_test / 255.0
# one hot encode outputs
y_train = to_categorical(y_train)
y_test = to_categorical(y_test)
num_classes = y_test.shape[1]

Downloading data from https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz


In [18]:
from keras.models import Sequential
from keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout
from keras.constraints import max_norm

model = Sequential()
model.add(Conv2D(32,(3, 3), input_shape=(32, 32, 3), padding= "same" ,activation= "relu" , kernel_constraint=max_norm(3)))
model.add(Dropout(0.2))
model.add(Conv2D(32,(3, 3), activation= "relu" , padding= "same" ,kernel_constraint=max_norm(3)))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Flatten())
model.add(Dense(512, activation= "relu" , kernel_constraint=max_norm(3)))
model.add(Dropout(0.5))
model.add(Dense(num_classes, activation= "softmax" ))

In [19]:
from keras.optimizers import SGD
from keras.losses import categorical_crossentropy
import tensorflow as tf

epochs = 25
initial_learning_rate = 0.01
decay_rate = 0.1
decay_steps = epochs
# Define a learning rate schedule using ExponentialDecay
lr_schedule = tf.keras.optimizers.schedules.ExponentialDecay(
    initial_learning_rate,
    decay_steps=decay_steps,
    decay_rate=decay_rate,
    staircase=True)

# Initialize the SGD optimizer with the learning rate schedule
sgd = SGD(learning_rate=lr_schedule, momentum=0.9, nesterov=False)

# Compile the model
model.compile(loss=categorical_crossentropy, optimizer=sgd, metrics=["accuracy"])
print(model.summary())

Model: "sequential_4"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d_26 (Conv2D)          (None, 32, 32, 32)        896       
                                                                 
 dropout_3 (Dropout)         (None, 32, 32, 32)        0         
                                                                 
 conv2d_27 (Conv2D)          (None, 32, 32, 32)        9248      
                                                                 
 max_pooling2d_2 (MaxPoolin  (None, 16, 16, 32)        0         
 g2D)                                                            
                                                                 
 flatten_2 (Flatten)         (None, 8192)              0         
                                                                 
 dense_2 (Dense)             (None, 512)               4194816   
                                                      

In [20]:
model.fit(X_train, y_train, validation_data=(X_test, y_test), epochs=epochs, batch_size=32, verbose=2)
# Final evaluation of the model
scores = model.evaluate(X_test, y_test, verbose=0)
print("Accuracy: %.2f%%" % (scores[1]*100))

Epoch 1/25
1563/1563 - 13s - loss: 2.2300 - accuracy: 0.1655 - val_loss: 2.2413 - val_accuracy: 0.1878 - 13s/epoch - 8ms/step
Epoch 2/25
1563/1563 - 8s - loss: 2.2284 - accuracy: 0.1660 - val_loss: 2.2414 - val_accuracy: 0.1878 - 8s/epoch - 5ms/step
Epoch 3/25
1563/1563 - 8s - loss: 2.2287 - accuracy: 0.1663 - val_loss: 2.2416 - val_accuracy: 0.1879 - 8s/epoch - 5ms/step
Epoch 4/25
1563/1563 - 8s - loss: 2.2281 - accuracy: 0.1707 - val_loss: 2.2418 - val_accuracy: 0.1884 - 8s/epoch - 5ms/step
Epoch 5/25
1563/1563 - 8s - loss: 2.2292 - accuracy: 0.1679 - val_loss: 2.2420 - val_accuracy: 0.1887 - 8s/epoch - 5ms/step
Epoch 6/25
1563/1563 - 8s - loss: 2.2290 - accuracy: 0.1712 - val_loss: 2.2422 - val_accuracy: 0.1891 - 8s/epoch - 5ms/step
Epoch 7/25
1563/1563 - 9s - loss: 2.2296 - accuracy: 0.1674 - val_loss: 2.2424 - val_accuracy: 0.1889 - 9s/epoch - 6ms/step
Epoch 8/25
1563/1563 - 9s - loss: 2.2289 - accuracy: 0.1688 - val_loss: 2.2426 - val_accuracy: 0.1897 - 9s/epoch - 6ms/step
Epoch 

In [22]:
# Large CNN model for the CIFAR-10 Dataset
import numpy
from keras.datasets import cifar10
from keras.models import Sequential
from keras.layers import Dense , Dropout , Flatten ,MaxPooling2D ,Conv2D
from keras.constraints import max_norm

from keras.optimizers import SGD
from keras.utils import to_categorical
from keras import backend as K
#K.set_image_dim_ordering( "th" )

In [24]:
(X_train, y_train), (X_test, y_test) = cifar10.load_data()
# normalize inputs from 0-255 to 0.0-1.0
X_train = X_train.astype( "float32" )
X_test = X_test.astype( "float32" )
X_train = X_train / 255.0
X_test = X_test / 255.0

# one hot encode outputs
y_train = to_categorical(y_train)
y_test = to_categorical(y_test)
num_classes = y_test.shape[1]

In [29]:
model = Sequential()
model.add(Conv2D(32, (3, 3), input_shape=(32, 32, 3), activation= "relu" ,padding= "same" ))
model.add(Dropout(0.2))
model.add(Conv2D(32, (3, 3), activation= "relu" , padding= "same" ))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Conv2D(64,(3, 3), activation= "relu" , padding= "same" ))
model.add(Dropout(0.2))
model.add(Conv2D(64, (3, 3), activation= "relu" , padding= "same" ))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Conv2D(128, (3, 3), activation= "relu" , padding= "same" ))
model.add(Dropout(0.2))
model.add(Conv2D(128, (3, 3), activation= "relu" , padding= "same" ))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Flatten())
model.add(Dropout(0.2))
model.add(Dense(1024, activation= "relu" , kernel_constraint=max_norm(3)))
model.add(Dropout(0.2))
model.add(Dense(512, activation= "relu" , kernel_constraint=max_norm(3)))
model.add(Dropout(0.2))
model.add(Dense(num_classes, activation= "softmax" ))

In [31]:
from keras.optimizers import SGD
from keras.losses import categorical_crossentropy
import tensorflow as tf

# Compile model
epochs = 25
initial_learning_rate = 0.01

decay_rate = 0.1

# Define the learning rate schedule using ExponentialDecay
lr_schedule = tf.keras.optimizers.schedules.ExponentialDecay(
    initial_learning_rate,
    decay_steps=epochs,
    decay_rate=decay_rate,
    staircase=True)

# Initialize the SGD optimizer with the learning rate schedule
sgd = SGD(learning_rate=lr_schedule, momentum=0.9, nesterov=False)

# Compile the model
model.compile(loss=categorical_crossentropy, optimizer=sgd, metrics=["accuracy"])

In [32]:
print(model.summary())

Model: "sequential_9"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d_42 (Conv2D)          (None, 32, 32, 32)        896       
                                                                 
 dropout_12 (Dropout)        (None, 32, 32, 32)        0         
                                                                 
 conv2d_43 (Conv2D)          (None, 32, 32, 32)        9248      
                                                                 
 max_pooling2d_10 (MaxPooli  (None, 16, 16, 32)        0         
 ng2D)                                                           
                                                                 
 conv2d_44 (Conv2D)          (None, 16, 16, 64)        18496     
                                                                 
 dropout_13 (Dropout)        (None, 16, 16, 64)        0         
                                                      

In [33]:
model.fit(X_train, y_train, validation_data=(X_test, y_test), epochs=epochs,batch_size=64)
# Final evaluation of the model
scores = model.evaluate(X_test, y_test, verbose=0)
print("Accuracy: %.2f%%" % (scores[1]*100))

Epoch 1/25
Epoch 2/25
Epoch 3/25
Epoch 4/25
Epoch 5/25
Epoch 6/25
Epoch 7/25
Epoch 8/25
Epoch 9/25
Epoch 10/25
Epoch 11/25
Epoch 12/25
Epoch 13/25
Epoch 14/25
Epoch 15/25
Epoch 16/25
Epoch 17/25
Epoch 18/25
Epoch 19/25
Epoch 20/25
Epoch 21/25
Epoch 22/25
Epoch 23/25
Epoch 24/25
Epoch 25/25
Accuracy: 9.99%


In [None]:
import numpy as np
from keras.datasets import cifar10
from keras.models import Sequential
from keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout, BatchNormalization
from keras.optimizers import Adam
from keras.utils import to_categorical

# Load data
(X_train, y_train), (X_test, y_test) = cifar10.load_data()

# Normalize inputs
X_train = X_train.astype('float32') / 255.0
X_test = X_test.astype('float32') / 255.0

# One-hot encode outputs
y_train = to_categorical(y_train)
y_test = to_categorical(y_test)

# Define model
model = Sequential()
model.add(Conv2D(32, (3, 3), activation='relu', padding='same', input_shape=(32, 32, 3)))
model.add(BatchNormalization())
model.add(Conv2D(32, (3, 3), activation='relu', padding='same'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))

model.add(Conv2D(64, (3, 3), activation='relu', padding='same'))
model.add(BatchNormalization())
model.add(Conv2D(64, (3, 3), activation='relu', padding='same'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))

model.add(Flatten())
model.add(Dense(512, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(10, activation='softmax'))

# Compile model
optimizer = Adam(learning_rate=0.0005)  # Adjust learning rate if needed
model.compile(loss='categorical_crossentropy', optimizer=optimizer, metrics=['accuracy'])

# Print model summary
print(model.summary())

# Train model
history = model.fit(X_train, y_train, validation_data=(X_test, y_test), epochs=25, batch_size=64)

# Evaluate model
scores = model.evaluate(X_test, y_test, verbose=0)
print("Test Accuracy: %.2f%%" % (scores[1] * 100))
