In [10]:
#import module
import torch
from torch.autograd import Variable
import numpy as np
import torch.nn as nn
from torchvision import datasets,transforms
import os
import random
import torch.nn.functional as F
import torchvision
import matplotlib.pyplot as plt
import itertools

In [11]:
#ignore warnings
import warnings
warnings.filterwarnings('ignore')

In [12]:
def get_device():
    ''' Get device (if GPU is available, use GPU) '''
    return 'cuda' if torch.cuda.is_available() else 'cpu'

In [42]:
#check the device is cpu or gpu
device = get_device()

In [57]:
#define defalut parameters and you can also change them as you wish
epoch_num = 5
loss_func = torch.nn.CrossEntropyLoss()
param_grid = {
    
    'learning_rate': [0.001,0.01],
    'batch_size': [256],
}
paras = list(itertools.product(param_grid['learning_rate'], param_grid['batch_size']))


In [44]:
def set_seed(seed):
    torch.manual_seed(seed)
    np.random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    if torch.cuda.is_available():
        torch.cuda.manual_seed_all(seed)
        torch.backends.cudnn.deterministic = True
        torch.backends.cudnn.benchmark = False

In [45]:
#run this cell to load MNIST Dataset 
def load_MNIST_dataset():
    train_dataset = datasets.MNIST(root='./mnist_data/',
                                  train=True,
                                  transform = transforms.Compose([
                      transforms.ToTensor(),
                      transforms.Normalize((0.1037,), (0.3081,))
                  ]), download = True)
    test_dataset = datasets.MNIST(root='./mnist_data/',
                                  train=False,
                                  transform = transforms.Compose([
                      transforms.ToTensor(),
                      transforms.Normalize((0.1037,), (0.3081,))
                  ]))
    return train_dataset,test_dataset

In [46]:
#run this cell to load cifar10 Dataset
def load_cifar10_dataset():
    train_dataset = datasets.CIFAR10(root='./cifar10_data/',
                                  train=True,
                                  transform = transforms.Compose([
                      transforms.ToTensor(),
                      transforms.Normalize((0.1037,), (0.3081,)),
                      transforms.Resize(224)
                  ]), download = True)
    test_dataset = datasets.CIFAR10(root='./cifar10_data/',
                                  train=False,
                                  transform = transforms.Compose([
                      transforms.ToTensor(),
                      transforms.Normalize((0.1037,), (0.3081,)),
                      transforms.Resize(224)
                  ]))
    return train_dataset,test_dataset


In [48]:
# Data Loader (Input Pipeline)
def train_loader(train_dataset,batch_size):
    
    train_loader = torch.utils.data.DataLoader(dataset=train_dataset,
                                           batch_size= batch_size ,
                                           shuffle=True)
    return train_loader


def test_loader(test_dataset):
    test_loader = torch.utils.data.DataLoader(dataset=test_dataset,
                                           batch_size= 100,
                                           shuffle=True)
    return test_loader


In [49]:
#visualize the dataset
def visualize_dataset(photo_nums,train_dataset):
    fig = plt.figure()
    for i in range(photo_nums):
      plt.subplot(photo_nums,1,photo_nums+1)
      plt.tight_layout()
      image = (train_dataset.data[i])
      plt.imshow(image, interpolation='none')
      
      plt.xticks([])
      plt.yticks([])
    #plt.title("Original Image")
    #plt.rcParams change the pixels of the image generated
    plt.rcParams['savefig.dpi'] = 300 
    plt.rcParams['figure.dpi'] = 300
    plt.show()

In [50]:
# define Networks, there are three networks defined in this code and you can define your own network if needed
class Net(nn.Module):
  
     def __init__(self):
        super().__init__()
        #1*1*28*28
        self.conv1 = nn.Conv2d(1, 10, 5) 
        self.conv2 = nn.Conv2d(10, 20, 3) 
        self.fc1 = nn.Linear(20 * 10 * 10, 500)
        self.fc2 = nn.Linear(500, 10)
        
     def forward(self, x): 
        in_size = x.size(0)
        out= self.conv1(x) # 1* 10 * 24 *24
        out = F.relu(out)
        out = F.max_pool2d(out, 2, 2) # 1* 10 * 12 * 12
        out = self.conv2(out) # 1* 20 * 10 * 10
        out = F.relu(out)
        out = out.view(in_size, -1) # 1 * 2000
        out = self.fc1(out) # 1 * 500
        out = F.relu(out)
        out = self.fc2(out) # 1 * 10
        out = F.log_softmax(out, dim = 1)
        return out


In [51]:
# define Networks
class Net_cifar(nn.Module):
   def __init__(self):
       super().__init__()    
       self.conv1 = nn.Conv2d(3, 6, 3)  
       self.pool = nn.MaxPool2d(2, 2)  #6*15*15
       self.conv2 = nn.Conv2d(6, 16, 3) #16*13*13
       self.fc1 = nn.Linear(16* 6 * 6, 500)  
       self.fc2 = nn.Linear(500, 200)      
       self.fc3 = nn.Linear(200, 10)     

   def forward(self, x):       
       x = self.pool(F.relu(self.conv1(x)))  
       x = self.pool(F.relu(self.conv2(x)))   
       x = x.view(-1, 16* 6* 6)     
       x = F.relu(self.fc1(x))       
       x = F.relu(self.fc2(x))       
       x = self.fc3(x)                   
       return x                   


In [52]:
# define Networks
class ResNet(nn.Module):
    def __init__(self):
        super(ResNet, self).__init__()
        #self.conv = nn.Conv2d(1, 3, kernel_size=1)
        self.resnet = torchvision.models.resnet18(pretrained=False)
 
    def forward(self, x):
        #x= self.conv(x)
        x= self.resnet(x)
        return x


In [53]:
def train(train_set, model, batch_size, learning_rate,loss_func):

    model.train()
    all_loss = 0
    i = 0
  
    for data,target in train_set:
        
        data,target = Variable(data).to(device), Variable(target).to(device)
                
        opt.zero_grad()
        output = model(data)
        
        loss = loss_func(output, target)
        all_loss += loss
        #backpropagation
        loss.backward()
        #update parmeters
        opt.step()
        i = i+len(data)

    print(i)
    ave_loss = all_loss/i
    print('Epoch {}, training loss is {}'.format(epoch,ave_loss))
    return ave_loss


In [54]:

def test(test_set,model,loss_func):
        
        model.eval()
        
        test_loss = 0
        correct = 0
        i=0
        for data,target in test_set:

            data, target = Variable(data,volatile=True).to(device) ,Variable(target).to(device)
            output = model(data)
            loss = loss_func(output, target)
            test_loss += loss.item() 
            pred = torch.max(output.data,1)[1]
            correct += pred.eq(target.data.view_as(pred)).sum()
            i = i+len(data)
                    
        acc = correct/i
        print(i,correct)

        
        ave_test_loss = test_loss/i
        print('test loss is {}, accuracy is {}'.format(ave_test_loss,acc))

        return acc,ave_test_loss


In [74]:
# choose dataset and model
train_dataset, test_dataset = load_MNIST_dataset()
model = Net()

In [75]:
import time
#set_seed(24)

all_time = []
hyper_train_loss = []
hyper_test_loss = []
hyper_test_score = []
all_test_score = 0
for learning_rate in param_grid['learning_rate']:
    for batch_size in param_grid['batch_size']:

        model = model.to(get_device())
        opt = torch.optim.Adam(model.parameters(),lr = learning_rate)
        train_set = train_loader(train_dataset,batch_size)
        test_set = test_loader(test_dataset)

        print('lr is {}, batch size is {}'.format(learning_rate,batch_size))
        start = time.time()
        i = 0
        
        for epoch in range(1, epoch_num):
            train_loss = train(train_set,model,batch_size, learning_rate,loss_func)
            test_score, test_loss  = test(test_set,model,loss_func)

            hyper_train_loss.append(train_loss)
            hyper_test_loss.append(test_loss)
            hyper_test_score.append(test_score)
            
            all_test_score += test_score
            i += 1
            
        ave_all_test_score = all_test_score/i
            
        end = time.time()
        t_each = end-start
        print('run time for lr {},batch size {} is {}'.format(learning_rate,batch_size,t_each))
        all_time.append(t_each)


lr is 0.001, batch size is 256
60000
Epoch 1, training loss is 0.0009132426930591464
10000 tensor(9766, device='cuda:0')
test loss is 0.0007698341075330973, accuracy is 0.9765999913215637
60000
Epoch 2, training loss is 0.0002644273918122053
10000 tensor(9863, device='cuda:0')
test loss is 0.00041218348164111376, accuracy is 0.986299991607666
60000
Epoch 3, training loss is 0.00017757841851562262
10000 tensor(9888, device='cuda:0')
test loss is 0.00033281968769151716, accuracy is 0.9887999892234802
60000
Epoch 4, training loss is 0.00012990704271942377
10000 tensor(9871, device='cuda:0')
test loss is 0.0004205395473050885, accuracy is 0.9870999455451965
run time for lr 0.001,batch size 256 is 62.66845941543579
lr is 0.01, batch size is 256
60000
Epoch 1, training loss is 0.001369639066979289
10000 tensor(9792, device='cuda:0')
test loss is 0.0006198435838334263, accuracy is 0.9791999459266663
60000
Epoch 2, training loss is 0.0002086363674607128
10000 tensor(9838, device='cuda:0')
test

In [76]:
plot_train_loss = []
plot_test_loss = hyper_test_loss
plot_test_score = []
for num in hyper_train_loss:
    plot_train_loss.append(num.item())
for num in hyper_test_score:
    plot_test_score.append(num.item())
all_color = ['darkblue','blue','lightblue','darkred','red','tomato','darkgreen','limegreen','lightgreen']

In [None]:
#plot training loss
fig = plt.figure()
for i in range(len(paras)):
    plt.plot(range(1, epoch_num), plot_train_loss[i*(epoch_num-1):((i*epoch_num)+epoch_num-1-i)], color=all_color[i],alpha=0.8,label=paras[i])
plt.xlabel('epoch')
plt.ylabel('loss')
plt.xticks(range(1,epoch_num))
plt.legend(loc=1,fontsize=6)
plt.title('Training loss')
plt.rcParams['savefig.dpi'] = 300 
plt.rcParams['figure.dpi'] = 300
plt.show()

In [None]:
#plot testing loss
fig = plt.figure()
for i in range(len(paras)):
    plt.plot(range(1, epoch_num), plot_test_loss[i*(epoch_num-1):((i*epoch_num)+epoch_num-1-i)], color=all_color[i],alpha=0.8,label=paras[i])
plt.xlabel('epoch')  
plt.ylabel('loss')
plt.xticks(range(1,epoch_num))
plt.legend(loc=1,fontsize=6)
plt.title('Testing loss')
plt.rcParams['savefig.dpi'] = 300 
plt.rcParams['figure.dpi'] = 300
plt.show()

In [None]:
#plot test score
fig = plt.figure()

for i in range(len(paras)):
    plt.plot(range(1, epoch_num), plot_test_score[i*(epoch_num-1):((i*epoch_num)+epoch_num-1-i)], color=all_color[i],alpha=0.8,label=paras[i])
plt.xlabel('epoch')
plt.ylabel('accuracy')
plt.xticks(range(1,epoch_num))
plt.legend(loc=4,fontsize=6)
plt.title('Testing score')
#plt.rcParams['savefig.dpi'] = 300 
#plt.rcParams['figure.dpi'] = 300
plt.show()