In [None]:
# Keras


'''Trains a simple convnet on the MNIST dataset.
Gets to 99.25% test accuracy after 12 epochs
(there is still a lot of margin for parameter tuning).
16 seconds per epoch on a GRID K520 GPU.
'''

from __future__ import print_function
import keras
from keras.datasets import mnist
from keras.models import Sequential
from keras.layers import Dense, Dropout, Flatten
from keras.layers import Conv2D, MaxPooling2D, ZeroPadding2D
from keras import backend as K

batch_size = 100
num_classes = 10
epochs = 6

# input image dimensions
img_rows, img_cols = 28, 28

# the data, split between train and test sets
(x_train, y_train), (x_test, y_test) = mnist.load_data()

if K.image_data_format() == 'channels_first':
    x_train = x_train.reshape(x_train.shape[0], 1, img_rows, img_cols)
    x_test = x_test.reshape(x_test.shape[0], 1, img_rows, img_cols)
    input_shape = (1, img_rows, img_cols)
else:
    x_train = x_train.reshape(x_train.shape[0], img_rows, img_cols, 1)
    x_test = x_test.reshape(x_test.shape[0], img_rows, img_cols, 1)
    input_shape = (img_rows, img_cols, 1)

x_train = x_train.astype('float32')
x_test = x_test.astype('float32')
x_train /= 255
x_test /= 255
print('x_train shape:', x_train.shape)
print(x_train.shape[0], 'train samples')
print(x_test.shape[0], 'test samples')

# convert class vectors to binary class matrices
y_train = keras.utils.to_categorical(y_train, num_classes)
y_test = keras.utils.to_categorical(y_test, num_classes)

model = Sequential()
model.add(ZeroPadding2D(padding=(2, 2))) # Padding of 2
model.add(Conv2D(16, kernel_size=(5, 5),
                 activation='relu',
                 input_shape=input_shape),
                 strides=(1, 1))
model.add(BatchNormalization())
model.add(MaxPooling2D(pool_size=(2, 2), strides=(2, 2)))
model.add(ZeroPadding2D(padding=(2, 2))) # Padding of 2
model.add(Conv2D(32, kernel_size=(5, 5), 
                 activation='relu', 
                 strides=(1, 1)))
model.add(BatchNormalization())
model.add(MaxPooling2D(pool_size=(2, 2), strides=(2, 2)))
model.add(Flatten())
model.add(Dense(num_classes))
#model.add(Dense(128, activation='relu'))
#model.add(Dense(num_classes, activation='softmax'))

model.compile(loss=keras.losses.categorical_crossentropy,
              optimizer=keras.optimizers.Adam(learning_rate=0.0001),
              metrics=['accuracy']) # updated to Adam and learning rate

history = model.fit(x_train, y_train,
          batch_size=batch_size,
          epochs=epochs,
          verbose=1,
          validation_data=(x_test, y_test))
score = model.evaluate(x_test, y_test, verbose=0)
print('Test loss:', score[0])
print('Test accuracy:', score[1])

# # Convolutional neural network (two convolutional layers)
# class ConvNet(nn.Module):
#     def __init__(self, num_classes=10):
#         super(ConvNet, self).__init__()
#         self.layer1 = nn.Sequential(
#             nn.Conv2d(1, 16, kernel_size=5, stride=1, padding=2),
#             nn.BatchNorm2d(16),
#             nn.ReLU(),
#             nn.MaxPool2d(kernel_size=2, stride=2)
#             )
#         self.layer2 = nn.Sequential(
#             nn.Conv2d(16, 32, kernel_size=5, stride=1, padding=2),
#             nn.BatchNorm2d(32),
#             nn.ReLU(),
#             nn.MaxPool2d(kernel_size=2, stride=2))
#         self.fc = nn.Linear(7*7*32, num_classes)
        
#     def forward(self, x):
#         out = self.layer1(x)
#         out = self.layer2(out)
#         out = out.reshape(out.size(0), -1)
#         out = self.fc(out)
#         return out

# model = ConvNet(num_classes).to(device)

In [None]:
# Hyperparameters
num_epochs = 12
num_classes = 10
batch_size = 128
learning_rate = 1.0 #not sure

In [None]:
# Convolutional neural network (two convolutional layers)
class ConvNet(nn.Module):
    def __init__(self, num_classes=10):
        super(ConvNet, self).__init__()
        self.layer1 = nn.Sequential(
            nn.Conv2d(1, 32, kernel_size=3, stride=1, padding=2),
            nn.ReLU()
            )
        self.layer2 = nn.Sequential(
            nn.Conv2d(64, 32, kernel_size=3, stride=1, padding=2),
            nn.ReLU()
            )
        self.maxpool = nn.MaxPool2d(kernel_size=2)
        self.fc1 = nn.Sequential(
            nn.Linear(7*7*32, 128),
            nn.ReLU(),
            )
        self.fc2 = nn.Linear(128, num_classes)
        self.drop_out5 = nn.Dropout(p=0.5)
        self.drop_out25 = nn.Dropout(p=0.25)
        self.flat = nn.Flatten()
        
        
# model = Sequential()
# model.add(Conv2D(32, kernel_size=(3, 3),
#                  activation='relu',
#                  input_shape=input_shape))
# model.add(Conv2D(64, (3, 3), activation='relu'))
# model.add(MaxPooling2D(pool_size=(2, 2)))
# model.add(Dropout(0.25))
# model.add(Flatten())
# model.add(Dense(128, activation='relu'))
# model.add(Dropout(0.5))
# model.add(Dense(num_classes, activation='softmax'))


    def forward(self, x):
        out = self.layer1(x) # Conv2D, relu
        out = self.layer2(out) # Conv2D, relu
        out = self.drop_out25(out) # Dropout 0.25
        out = out.reshape(out.size(0), -1) # Flatten 
        out = self.fc1(out) # Dense
        out = self.drop_out5(out) # Dropout 0.5
        out = self.fc2(out) # Dense
        return out

model = ConvNet(num_classes).to(device)

In [None]:
# Loss and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adadelta(model.parameters(), lr=learning_rate) # Changed to Adadelta with lr=1.0

# Keep track of average losses, training accuracy and validation accuracy for each epoch
train_loss_history = np.zeros(num_epochs)
train_acc_history  = np.zeros(num_epochs)
val_loss_history   = np.zeros(num_epochs)
val_acc_history    = np.zeros(num_epochs)

start_time = datetime.now()

for epoch in range(num_epochs):
    
    # Place network in training mode
    model.train()
    
    # Initialize running epoch loss and number correctly classified
    running_loss   = 0.0
    num_correct    = 0.0
    total_images   = 0.0
    
    for batch_num, (inputs, labels) in enumerate(train_loader):
        # [inputs] and [labels] is one batch of images and their classes
        inputs = inputs.to(device)
        labels = labels.to(device)
        
        # Forward pass
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        
        # Backward and optimize
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        # Update statistics for this batch
        curr_loss = loss.item()
        _, preds = torch.max(outputs, 1)
        curr_images = labels.cpu().data.numpy().size
        curr_correct = torch.sum(preds == labels.data.reshape(-1))

        # Update running statistics for this epoch
        running_loss += curr_loss
        num_correct += curr_correct
        total_images += curr_images

        # Log Progress every 200 batches
        if (batch_num + 1) % 200 == 0:
          log_progress(running_loss, batch_num, batch_size, epoch, num_epochs, train_loader)

    # Update statistics for epoch
    train_loss_history[epoch] = curr_loss #running_loss / total_images
    train_acc_history[epoch]  = float(num_correct)  / float(total_images)
    print("Train Avg. Loss: [{}] Acc: {} on {} images".format(
        round(train_loss_history[epoch],4), round(train_acc_history[epoch],4), total_images) )
    
    # ============================ Validation ==============================
    print("Validating...")
    # Place network in testing mode (won't need to keep track of gradients)
    model.eval()
    
    running_loss   = 0.0
    num_correct    = 0.0
    total_images   = 0.0
    
    for batch_num, (inputs, labels) in enumerate(test_loader):
        inputs = inputs.to(device)
        labels = labels.to(device)

        # Propagate batch through network
        outputs  = model(inputs)
        loss     = criterion(outputs, labels.squeeze())
        _, preds = torch.max(outputs, 1)
        
        # Update statistics for this batch
        running_loss  += loss.item()
        num_correct   += torch.sum(preds == labels.data.reshape(-1))
        total_images  += labels.cpu().data.numpy().size
        
        # Log Progress every 200 batches
        if (batch_num + 1) % 200 == 0:
          log_progress(running_loss, batch_num, batch_size, epoch, num_epochs, test_loader)    

    # Update statistics for validation data
    val_loss_history[epoch] = loss.item() #running_loss / total_images
    val_acc_history[epoch]  = float(num_correct)  / float(total_images) 
    print("Val Avg. Loss: [{}] Acc: {} on {} images\n".format(
        round(val_loss_history[epoch],4), val_acc_history[epoch], total_images))
    
print("Time Elapsed: {} seconds".format((datetime.now() - start_time).total_seconds()))