## Multilayer Softmax CIFAR-10: Using GPU

In [1]:
import torch
import torch.nn as nn
from torch.optim import Optimizer
from torch.utils.data import DataLoader
from torchvision import datasets
from torchvision.transforms import transforms

import matplotlib.pyplot as plt

In [2]:
torch.cuda.is_available()

True

## CIFAR-10: Softmax (10-classes)

In [3]:
'''
Step 1: Load the entire CIFAR-10 dataset
'''

train_CIFAR = datasets.CIFAR10(root='./cifar_10data/',
                               train=True, 
                               transform=transforms.ToTensor(),
                               download=True)

test_CIFAR = datasets.CIFAR10(root='./cifar_10data/',
                              train=False, 
                              transform=transforms.ToTensor())

'''
Step 2: Since there are 10 classes, the output should be 10
'''
class softmax(nn.Module) :
    '''
    Initialize model
        input_dim : dimension of given input data
    '''
    def __init__(self, input_dim=3*32*32) :
        super().__init__()
        self.linear = nn.Linear(input_dim, 10, bias=True)

    ''' forward given input x '''
    def forward(self, x) :
        return self.linear(x.float().view(-1, 3*32*32))

'''
Step 3: Create the model, specify loss function and optimizer
'''
torch.cuda.empty_cache()
device = "cuda"  # device = "cpu" : another option
model = softmax().to(device)                         # Define a Neural Network Model

loss_function = torch.nn.CrossEntropyLoss()     # Specify loss function
optimizer = torch.optim.SGD(model.parameters(), lr=1e-2)   # specify SGD with learning rate

Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to ./cifar_10data/cifar-10-python.tar.gz


HBox(children=(FloatProgress(value=1.0, bar_style='info', layout=Layout(width='20px'), max=1.0), HTML(value=''…

Extracting ./cifar_10data/cifar-10-python.tar.gz to ./cifar_10data/


In [4]:
'''
Step 4: Train model with SGD
'''
train_loader = DataLoader(dataset=train_CIFAR, batch_size=1024, shuffle=True)

epoch_num = 10

import time
start = time.time()
for epoch in range(epoch_num) :
    total_loss = 0
    for images, labels in train_loader :
        images, labels = images.to(device), labels.to(device)
        # Clear previously computed gradient
        optimizer.zero_grad()

        # then compute gradient with forward and backward passes
        train_loss = loss_function(model(images), labels)
        total_loss += train_loss.item()
        train_loss.backward()

        # perform SGD step (parameter update)
        optimizer.step()
    print(f'Epoch: {epoch+1}/{epoch_num}, loss = {total_loss}')
end = time.time()
print("Time ellapsed in training is: {}".format(end - start))


Epoch: 1/10, loss = 106.49607563018799
Epoch: 2/10, loss = 100.22456419467926
Epoch: 3/10, loss = 97.42050302028656
Epoch: 4/10, loss = 95.63818538188934
Epoch: 5/10, loss = 94.36169767379761
Epoch: 6/10, loss = 93.48389792442322
Epoch: 7/10, loss = 92.73776042461395
Epoch: 8/10, loss = 92.10744881629944
Epoch: 9/10, loss = 91.62287318706512
Epoch: 10/10, loss = 91.19469714164734
Time ellapsed in training is: 100.38860249519348


In [5]:
'''
Step 5: (same step)
'''
test_loss, correct = 0, 0

# Test data
test_loader = DataLoader(dataset=test_CIFAR, batch_size=1, shuffle=False)
# no need to shuffle test data

# Evaluate accuracy using test data
for ind, (image, label) in enumerate(test_loader) :
    image, label = image.to(device), label.to(device)
    # Forward pass
    output = model(image)

    # Calculate cumulative loss
    test_loss += loss_function(output, label).item()

    # Get index of maximum log-probability
    pred = output.max(1, keepdim=True)[1]

    # Trace correct predictions
    correct += pred.eq(label.view_as(pred)).sum().item()

            
# Print out the results
print('[Test set] Average loss: {:.4f}, Accuracy: {}/{} ({:.2f}%)\n'.format(
        test_loss /len(test_loader), correct, len(test_loader),
        100. * correct / len(test_loader)))

[Test set] Average loss: 1.8616, Accuracy: 3579/10000 (35.79%)



## CIFAR-10: MLP4 with Softmax (10-classes)
### Learning rate 0.5

In [6]:
# model
class MLP4(nn.Module) :
    '''
    Initialize model
        input_dim : dimension of given input data
    '''
    # CIFAR-10 data is 32*32 images with 3 RGB channels
    def __init__(self, input_dim=3*32*32) :
        super().__init__()
        self.linear = nn.Linear(input_dim, input_dim//2, bias=True)
        self.linear2 = nn.Linear(input_dim//2, input_dim//4, bias=True)
        self.linear3 = nn.Linear(input_dim//4, input_dim//8, bias=True)
        self.linear4 = nn.Linear(input_dim//8, 10, bias=True)
        
    ''' forward given input x '''
    def forward(self, x) :
        x = x.float().view(-1, 3*32*32)
        x = nn.functional.relu(self.linear(x))
        x = nn.functional.relu(self.linear2(x))
        x = nn.functional.relu(self.linear3(x))
        x = self.linear4(x)
        return x

model = MLP4().to(device)                       # Define a Neural Network Model

loss_function = torch.nn.CrossEntropyLoss()     # Specify loss function
optimizer = torch.optim.SGD(model.parameters(), lr=5e-1)   # specify SGD with learning rate => 너무 커서 epoch 이 진행함에도 training 시간이 줄어들지 않음. 아래 실험에서는 더 작게 해봄.

# train
train_loader = DataLoader(dataset=train_CIFAR, batch_size=4096, shuffle=True)
epoch_num = 10

import time
start = time.time()
for epoch in range(epoch_num) :
    total_loss = 0
    for images, labels in train_loader :
        images, labels = images.to(device), labels.to(device)
        # Clear previously computed gradient
        optimizer.zero_grad()

        # then compute gradient with forward and backward passes
        train_loss = loss_function(model(images), labels)
        total_loss += train_loss.item()
        train_loss.backward()

        # perform SGD step (parameter update)
        optimizer.step()
    print(f'Epoch: {epoch+1}/{epoch_num}, loss = {total_loss}')
end = time.time()
print("Time ellapsed in training is: {}".format(end - start))


# test
test_loss, correct = 0, 0

# Test data
test_loader = DataLoader(dataset=test_CIFAR, batch_size=1, shuffle=False)
# no need to shuffle test data

# Evaluate accuracy using test data
for ind, (image, label) in enumerate(test_loader) :
    image, label = image.to(device), label.to(device)

    # Forward pass
    output = model(image)

    # Calculate cumulative loss
    test_loss += loss_function(output, label).item()

    # Get index of maximum log-probability
    pred = output.max(1, keepdim=True)[1]

    # Trace correct predictions
    correct += pred.eq(label.view_as(pred)).sum().item()

            
# Print out the results
print('[Test set] Average loss: {:.4f}, Accuracy: {}/{} ({:.2f}%)\n'.format(
        test_loss /len(test_loader), correct, len(test_loader),
        100. * correct / len(test_loader)))

Epoch: 1/10, loss = 29.670220136642456
Epoch: 2/10, loss = 29.734856128692627
Epoch: 3/10, loss = 29.620304584503174
Epoch: 4/10, loss = 29.548768758773804
Epoch: 5/10, loss = 29.70564842224121
Epoch: 6/10, loss = 29.583429098129272
Epoch: 7/10, loss = 29.39264225959778
Epoch: 8/10, loss = 29.818995475769043
Epoch: 9/10, loss = 29.434800624847412
Epoch: 10/10, loss = 29.60382890701294
Time ellapsed in training is: 122.34277534484863
[Test set] Average loss: 2.2841, Accuracy: 1110/10000 (11.10%)



### Learning rate 0.1

In [7]:
# model
class MLP4(nn.Module) :
    '''
    Initialize model
        input_dim : dimension of given input data
    '''
    # CIFAR-10 data is 32*32 images with 3 RGB channels
    def __init__(self, input_dim=3*32*32) :
        super().__init__()
        self.linear = nn.Linear(input_dim, input_dim//2, bias=True)
        self.linear2 = nn.Linear(input_dim//2, input_dim//4, bias=True)
        self.linear3 = nn.Linear(input_dim//4, input_dim//8, bias=True)
        self.linear4 = nn.Linear(input_dim//8, 10, bias=True)
        
    ''' forward given input x '''
    def forward(self, x) :
        x = x.float().view(-1, 3*32*32)
        x = nn.functional.relu(self.linear(x))
        x = nn.functional.relu(self.linear2(x))
        x = nn.functional.relu(self.linear3(x))
        x = self.linear4(x)
        return x

model = MLP4().to(device)                       # Define a Neural Network Model

loss_function = torch.nn.CrossEntropyLoss()     # Specify loss function
optimizer = torch.optim.SGD(model.parameters(), lr=1e-1)   # specify SGD with learning rate => 위에서보다 작게하니 epoch 이 진행함에 따라 training 시간이 줄어듬

# train
train_loader = DataLoader(dataset=train_CIFAR, batch_size=4096, shuffle=True)
epoch_num = 10

import time
start = time.time()
for epoch in range(epoch_num) :
    total_loss = 0
    for images, labels in train_loader :
        images, labels = images.to(device), labels.to(device)
        # Clear previously computed gradient
        optimizer.zero_grad()

        # then compute gradient with forward and backward passes
        train_loss = loss_function(model(images), labels)
        total_loss += train_loss.item()
        train_loss.backward()

        # perform SGD step (parameter update)
        optimizer.step()
    print(f'Epoch: {epoch+1}/{epoch_num}, loss = {total_loss}')
end = time.time()
print("Time ellapsed in training is: {}".format(end - start))


# test
test_loss, correct = 0, 0

# Test data
test_loader = DataLoader(dataset=test_CIFAR, batch_size=1, shuffle=False)
# no need to shuffle test data

# Evaluate accuracy using test data
for ind, (image, label) in enumerate(test_loader) :
    image, label = image.to(device), label.to(device)

    # Forward pass
    output = model(image)

    # Calculate cumulative loss
    test_loss += loss_function(output, label).item()

    # Get index of maximum log-probability
    pred = output.max(1, keepdim=True)[1]

    # Trace correct predictions
    correct += pred.eq(label.view_as(pred)).sum().item()

            
# Print out the results
print('[Test set] Average loss: {:.4f}, Accuracy: {}/{} ({:.2f}%)\n'.format(
        test_loss /len(test_loader), correct, len(test_loader),
        100. * correct / len(test_loader)))

Epoch: 1/10, loss = 29.8480441570282
Epoch: 2/10, loss = 29.650339126586914
Epoch: 3/10, loss = 29.327353954315186
Epoch: 4/10, loss = 28.708716869354248
Epoch: 5/10, loss = 28.395109176635742
Epoch: 6/10, loss = 28.09563899040222
Epoch: 7/10, loss = 27.7009699344635
Epoch: 8/10, loss = 27.34601354598999
Epoch: 9/10, loss = 27.038835763931274
Epoch: 10/10, loss = 26.686126232147217
Time ellapsed in training is: 112.82796788215637
[Test set] Average loss: 2.0241, Accuracy: 2703/10000 (27.03%)



### Learning rate 0.1, epoch 100

In [18]:
# model
class MLP4(nn.Module) :
    '''
    Initialize model
        input_dim : dimension of given input data
    '''
    # CIFAR-10 data is 32*32 images with 3 RGB channels
    def __init__(self, input_dim=3*32*32) :
        super().__init__()
        self.linear = nn.Linear(input_dim, input_dim//2, bias=True)
        self.linear2 = nn.Linear(input_dim//2, input_dim//4, bias=True)
        self.linear3 = nn.Linear(input_dim//4, input_dim//8, bias=True)
        self.linear4 = nn.Linear(input_dim//8, 10, bias=True)
        
    ''' forward given input x '''
    def forward(self, x) :
        x = x.float().view(-1, 3*32*32)
        x = nn.functional.relu(self.linear(x))
        x = nn.functional.relu(self.linear2(x))
        x = nn.functional.relu(self.linear3(x))
        x = self.linear4(x)
        return x

model = MLP4().to(device)                       # Define a Neural Network Model

loss_function = torch.nn.CrossEntropyLoss()     # Specify loss function
optimizer = torch.optim.SGD(model.parameters(), lr=1e-1)   # specify SGD with learning rate

# train
train_loader = DataLoader(dataset=train_CIFAR, batch_size=4096, shuffle=True)
epoch_num = 100

import time
start = time.time()
for epoch in range(epoch_num) :
    total_loss = 0
    for images, labels in train_loader :
        images, labels = images.to(device), labels.to(device)
        # Clear previously computed gradient
        optimizer.zero_grad()

        # then compute gradient with forward and backward passes
        train_loss = loss_function(model(images), labels)
        total_loss += train_loss.item()
        train_loss.backward()

        # perform SGD step (parameter update)
        optimizer.step()
    print(f'Epoch: {epoch+1}/{epoch_num}, loss = {total_loss}')
end = time.time()
print("Time ellapsed in training is: {}".format(end - start))


# test
test_loss, correct = 0, 0

# Test data
test_loader = DataLoader(dataset=test_CIFAR, batch_size=1, shuffle=False)
# no need to shuffle test data

# Evaluate accuracy using test data
for ind, (image, label) in enumerate(test_loader) :
    image, label = image.to(device), label.to(device)

    # Forward pass
    output = model(image)

    # Calculate cumulative loss
    test_loss += loss_function(output, label).item()

    # Get index of maximum log-probability
    pred = output.max(1, keepdim=True)[1]

    # Trace correct predictions
    correct += pred.eq(label.view_as(pred)).sum().item()

            
# Print out the results
print('[Test set] Average loss: {:.4f}, Accuracy: {}/{} ({:.2f}%)\n'.format(
        test_loss /len(test_loader), correct, len(test_loader),
        100. * correct / len(test_loader)))

Epoch: 1/100, loss = 29.846237182617188
Epoch: 2/100, loss = 29.602301836013794
Epoch: 3/100, loss = 29.220120429992676
Epoch: 4/100, loss = 28.521347284317017
Epoch: 5/100, loss = 28.29578709602356
Epoch: 6/100, loss = 27.986599922180176
Epoch: 7/100, loss = 27.570213556289673
Epoch: 8/100, loss = 27.07690143585205
Epoch: 9/100, loss = 27.02597403526306
Epoch: 10/100, loss = 26.447596073150635
Epoch: 11/100, loss = 26.506394743919373
Epoch: 12/100, loss = 26.160455226898193
Epoch: 13/100, loss = 26.246181845664978
Epoch: 14/100, loss = 25.834636092185974
Epoch: 15/100, loss = 25.49835455417633
Epoch: 16/100, loss = 25.770874977111816
Epoch: 17/100, loss = 25.171059727668762
Epoch: 18/100, loss = 25.27652657032013
Epoch: 19/100, loss = 25.005683660507202
Epoch: 20/100, loss = 24.892633199691772
Epoch: 21/100, loss = 24.651652216911316
Epoch: 22/100, loss = 24.733332633972168
Epoch: 23/100, loss = 24.325074791908264
Epoch: 24/100, loss = 24.432835817337036
Epoch: 25/100, loss = 24.20593

### Learning rate 0.05, epoch=100

In [19]:
# model
class MLP4(nn.Module) :
    '''
    Initialize model
        input_dim : dimension of given input data
    '''
    # CIFAR-10 data is 32*32 images with 3 RGB channels
    def __init__(self, input_dim=3*32*32) :
        super().__init__()
        self.linear = nn.Linear(input_dim, input_dim//2, bias=True)
        self.linear2 = nn.Linear(input_dim//2, input_dim//4, bias=True)
        self.linear3 = nn.Linear(input_dim//4, input_dim//8, bias=True)
        self.linear4 = nn.Linear(input_dim//8, 10, bias=True)
        
    ''' forward given input x '''
    def forward(self, x) :
        x = x.float().view(-1, 3*32*32)
        x = nn.functional.relu(self.linear(x))
        x = nn.functional.relu(self.linear2(x))
        x = nn.functional.relu(self.linear3(x))
        x = self.linear4(x)
        return x

model = MLP4().to(device)                       # Define a Neural Network Model

loss_function = torch.nn.CrossEntropyLoss()     # Specify loss function
optimizer = torch.optim.SGD(model.parameters(), lr=1e-1)   # specify SGD with learning rate

# train
train_loader = DataLoader(dataset=train_CIFAR, batch_size=4096, shuffle=True)
epoch_num = 100

import time
start = time.time()
for epoch in range(epoch_num) :
    total_loss = 0
    for images, labels in train_loader :
        images, labels = images.to(device), labels.to(device)
        # Clear previously computed gradient
        optimizer.zero_grad()

        # then compute gradient with forward and backward passes
        train_loss = loss_function(model(images), labels)
        total_loss += train_loss.item()
        train_loss.backward()

        # perform SGD step (parameter update)
        optimizer.step()
    print(f'Epoch: {epoch+1}/{epoch_num}, loss = {total_loss}')
end = time.time()
print("Time ellapsed in training is: {}".format(end - start))


# test
test_loss, correct = 0, 0

# Test data
test_loader = DataLoader(dataset=test_CIFAR, batch_size=1, shuffle=False)
# no need to shuffle test data

# Evaluate accuracy using test data
for ind, (image, label) in enumerate(test_loader) :
    image, label = image.to(device), label.to(device)

    # Forward pass
    output = model(image)

    # Calculate cumulative loss
    test_loss += loss_function(output, label).item()

    # Get index of maximum log-probability
    pred = output.max(1, keepdim=True)[1]

    # Trace correct predictions
    correct += pred.eq(label.view_as(pred)).sum().item()

            
# Print out the results
print('[Test set] Average loss: {:.4f}, Accuracy: {}/{} ({:.2f}%)\n'.format(
        test_loss /len(test_loader), correct, len(test_loader),
        100. * correct / len(test_loader)))

Epoch: 1/100, loss = 29.845097064971924
Epoch: 2/100, loss = 29.615514516830444
Epoch: 3/100, loss = 29.21963596343994
Epoch: 4/100, loss = 28.49018669128418
Epoch: 5/100, loss = 28.312822580337524
Epoch: 6/100, loss = 27.964833974838257
Epoch: 7/100, loss = 27.52726936340332
Epoch: 8/100, loss = 27.464649438858032
Epoch: 9/100, loss = 26.85899782180786
Epoch: 10/100, loss = 26.40575611591339
Epoch: 11/100, loss = 26.50705087184906
Epoch: 12/100, loss = 26.27977466583252
Epoch: 13/100, loss = 25.91855764389038
Epoch: 14/100, loss = 25.854105234146118
Epoch: 15/100, loss = 25.657342553138733
Epoch: 16/100, loss = 25.469631552696228
Epoch: 17/100, loss = 25.39148509502411
Epoch: 18/100, loss = 25.00677800178528
Epoch: 19/100, loss = 25.121166348457336
Epoch: 20/100, loss = 24.731921434402466
Epoch: 21/100, loss = 24.566386580467224
Epoch: 22/100, loss = 24.65404236316681
Epoch: 23/100, loss = 24.439467072486877
Epoch: 24/100, loss = 24.27803635597229
Epoch: 25/100, loss = 24.177013397216