In [1]:
import torch

print(torch.__version__)

1.13.0


### which one is better? Pytorch or Tensorflow

# PyTorch Implementation

In [2]:
import torch
from torchvision import datasets
from torchvision.transforms import ToTensor
from torch.utils.data import DataLoader, random_split
from torch import nn
import time

In [3]:
torch.manual_seed(0)

<torch._C.Generator at 0x282867ad050>

In [4]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [5]:
batch_size = 32
learning_rate = 1e-3
validation_split = 0.1
n_epochs = 10

In [6]:
training_set = datasets.CIFAR10(root='data', train=True, download=True, transform=ToTensor())
test_set = datasets.CIFAR10(root='data', train=False, download=True, transform=ToTensor())

Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to data\cifar-10-python.tar.gz


  0%|          | 0/170498071 [00:00<?, ?it/s]

Extracting data\cifar-10-python.tar.gz to data
Files already downloaded and verified


## Note that the ToTensor() transformation from PIL images to tensors automatically turns the pixels’ value range from[0 255] to [0 1].

In [7]:
training_size = int((1 - validation_split) * len(training_set))
validation_size = int(validation_split * len(training_set))
training_set, validation_set = random_split(training_set, [training_size, validation_size])

### We define our data loaders for the training, validation and test sets. A DataLoader is an iterable over a data set that takes care of splitting it into mini-batches and reshuffling it at every epoch of the training to reduce overfitting.

In [8]:
train_loader = DataLoader(training_set, batch_size=batch_size, shuffle=True)
validation_loader = DataLoader(validation_set, batch_size=batch_size, shuffle=False)
test_loader = DataLoader(test_set, batch_size=batch_size, shuffle=False)

### Now we build our neural network model. In this experiment, I’m going to define a CNN with two convolutional layers. The first layer will have 32 filters of size 3 × 3, and the second one will have 64 filters of size 3 × 3. After each convolution layer, we will add a max pooling layer and a dropout layer with a dropout rate of 25%. After the two convolutional layers we have two fully-connected layers, one with 512 neurons and the final output layer with 10 neurons (corresponding to the 10 CIFAR-10 classes).

In [9]:
model = nn.Sequential(   
    # A convolution layer with 32 filters of size 3x3
    nn.Conv2d(3, 32, 3),             
    nn.ReLU(),
    nn.MaxPool2d(2),                 
    nn.Dropout(0.25),

    # A convolutional layer with 64 filters of size 3x3
    nn.Conv2d(32, 64, 3),            
    nn.ReLU(),
    nn.MaxPool2d(2),                 
    nn.Dropout(0.25),

    # A fully-connected layer with 512 neurons
    nn.Flatten(),
    nn.Linear(64 * 6 * 6, 512),      
    nn.ReLU(),
    nn.Dropout(0.5),

    # The final output layer with 10 neurons
    nn.Linear(512, 10)
)

In [10]:
model.to(device) # We also need to place the model on the GPU:

Sequential(
  (0): Conv2d(3, 32, kernel_size=(3, 3), stride=(1, 1))
  (1): ReLU()
  (2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (3): Dropout(p=0.25, inplace=False)
  (4): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1))
  (5): ReLU()
  (6): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (7): Dropout(p=0.25, inplace=False)
  (8): Flatten(start_dim=1, end_dim=-1)
  (9): Linear(in_features=2304, out_features=512, bias=True)
  (10): ReLU()
  (11): Dropout(p=0.5, inplace=False)
  (12): Linear(in_features=512, out_features=10, bias=True)
)

In [11]:
def train_loop(model, data_loader, loss_fn, optimizer):  
    size = len(data_loader.dataset)  

    for batch, (X, y) in enumerate(data_loader):  
        # Place the data on the GPU
        X, y = X.to(device), y.to(device)

        # Compute prediction and loss
        y_pred = model(X)
        loss = loss_fn(y_pred, y)

        # Backpropagation
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()        
        
        # Print the loss every 100 mini-batches
        if (batch + 1) % 100 == 0:
            loss, current = loss.item(), (batch + 1) * len(X)
            print(f'loss: {loss:>7f}  [{current:>5d}/{size:>5d}]')

In [12]:
def evaluate_model(model, data_loader):
    size = len(data_loader.dataset)
    correct = 0

    with torch.no_grad():
        for X, y in data_loader:            
            X, y = X.to(device), y.to(device)
            output = model(X)
            y_pred = output.argmax(1)  
            correct += (y_pred == y).sum().item()
    return 100 * correct / size

In [13]:
def train_model(model, loss_fn, optimizer):
    train_start_time = time.time()

    for epoch in range(n_epochs):
        print(f'Epoch {epoch + 1}\n-------------------------------')
        
        epoch_start_time = time.time()
        model.train() # Ensure the dropout layers are in training mode
        train_loop(model, train_loader, loss_fn, optimizer)        
        model.eval() # Set dropout layers to evaluation mode
        val_accuracy = evaluate_model(model, validation_loader)
        epoch_elapsed_time = time.time() - epoch_start_time      
        
        print(f'Epoch {epoch + 1} completed in {epoch_elapsed_time:.3f}s, ' 
              f'val_accuracy: {val_accuracy:.3f}%\n')
    
    train_elapsed_time = time.time() - train_start_time
    print(f'Training completed in {train_elapsed_time:.3f}s')

    model.eval()
    train_accuracy = evaluate_model(model, train_loader)
    print(f'Accuracy on training set: {train_accuracy:.3f}%')
    test_accuracy = evaluate_model(model, test_loader)
    print(f'Accuracy on test set: {test_accuracy:.3f}%')

In [14]:
loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

train_model(model, loss_fn, optimizer)

Epoch 1
-------------------------------
loss: 1.970100  [ 3200/45000]
loss: 1.650105  [ 6400/45000]
loss: 1.930462  [ 9600/45000]
loss: 1.731111  [12800/45000]
loss: 1.554985  [16000/45000]
loss: 1.849732  [19200/45000]
loss: 1.569666  [22400/45000]
loss: 1.886702  [25600/45000]
loss: 1.291531  [28800/45000]
loss: 1.833465  [32000/45000]
loss: 1.564703  [35200/45000]
loss: 1.482798  [38400/45000]
loss: 1.628996  [41600/45000]
loss: 1.355355  [44800/45000]
Epoch 1 completed in 14.163s, val_accuracy: 52.880%

Epoch 2
-------------------------------
loss: 1.384692  [ 3200/45000]
loss: 1.235805  [ 6400/45000]
loss: 1.273409  [ 9600/45000]
loss: 1.245774  [12800/45000]
loss: 1.297815  [16000/45000]
loss: 1.287188  [19200/45000]
loss: 1.724888  [22400/45000]
loss: 1.475049  [25600/45000]
loss: 1.221546  [28800/45000]
loss: 1.244520  [32000/45000]
loss: 1.501837  [35200/45000]
loss: 1.464387  [38400/45000]
loss: 1.288455  [41600/45000]
loss: 1.869904  [44800/45000]
Epoch 2 completed in 8.152s

## TensorFlow Implementation

In [19]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
import time

In [20]:
tf.random.set_seed(0) # fix the random seed for consistency:

In [21]:
batch_size = 32
learning_rate = 1e-3
validation_split = 0.1
n_epochs = 10

In [22]:
(X_train, y_train), (X_test, y_test) = keras.datasets.cifar10.load_data()

Downloading data from https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz


## We scale the data to be in the range [0, 1] instead of [0, 255] (In PyTorch this was already taken care for us by the ToTensor() transformer):

In [23]:
X_train = X_train / 255.0
X_test = X_test / 255.0

In [24]:
model = keras.models.Sequential([  
    layers.Conv2D(32, 3, input_shape=[32, 32, 3], activation='relu', kernel_initializer='he_uniform'),
    layers.MaxPooling2D(),
    layers.Dropout(0.25),

    layers.Conv2D(64, 3, activation='relu', kernel_initializer='he_uniform'),
    layers.MaxPooling2D(),
    layers.Dropout(0.25),

    layers.Flatten(),
    layers.Dense(512, activation='relu', kernel_initializer='he_uniform'),
    layers.Dropout(0.5),
    layers.Dense(10, activation='softmax', kernel_initializer='he_uniform')
])

In [25]:
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d (Conv2D)             (None, 30, 30, 32)        896       
                                                                 
 max_pooling2d (MaxPooling2D  (None, 15, 15, 32)       0         
 )                                                               
                                                                 
 dropout (Dropout)           (None, 15, 15, 32)        0         
                                                                 
 conv2d_1 (Conv2D)           (None, 13, 13, 64)        18496     
                                                                 
 max_pooling2d_1 (MaxPooling  (None, 6, 6, 64)         0         
 2D)                                                             
                                                                 
 dropout_1 (Dropout)         (None, 6, 6, 64)          0

In [26]:
optimizer = keras.optimizers.Adam(learning_rate=learning_rate)

model.compile(loss='sparse_categorical_crossentropy',
              optimizer=optimizer,
              metrics=['accuracy'])

In [27]:
train_start_time = time.time()
model.fit(X_train, y_train, batch_size=batch_size, 
          epochs=n_epochs, validation_split=validation_split)
train_elapsed_time = time.time() - train_start_time
print(f'Training completed in {train_elapsed_time:.3f}s')

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Training completed in 63.456s


In [29]:
train_results = model.evaluate(X_train, y_train, verbose=0)
print(f'Accuracy on training set: {train_results[1] * 100:.3f}%')

test_results = model.evaluate(X_test, y_test, verbose=0)
print(f'Accuracy on test set: {test_results[1] * 100:.3f}%')

Accuracy on training set: 83.494%
Accuracy on test set: 71.840%


## Conclusions
From this little experiment we can conclude that:

Training the model using TensorFlow is much faster and also accuracy is higher.