In [1]:
import torch
import torchvision
from torchvision.datasets import ImageFolder
from torchvision import transforms
from torch.utils.data import DataLoader
import torch.nn as nn
from torch import optim
from torch.autograd import Variable
import time

In [3]:
learning_rate = 1e-3
batch_size = 64
epoches = 50

if torch.cuda.is_available():
#     print(torch.cuda.device_count())
    print("CUDA inside")
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

CUDA inside


In [4]:
simple_transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(), # The order should be right
    transforms.Normalize([0.485, 0.456, 0.406],
                        [0.229, 0.224, 0.225])
                     ])
train = ImageFolder('/home/shadow_u/DL_with_Pytorch/Chapter3/dogs-vs-cats/train', simple_transform)
valid = ImageFolder('/home/shadow_u/DL_with_Pytorch/Chapter3/dogs-vs-cats/valid', simple_transform)

In [5]:
trainLoader = DataLoader(train, batch_size=batch_size, shuffle=True, num_workers=4)
validLoader = DataLoader(valid, batch_size=batch_size, shuffle=False, num_workers=4)

In [6]:
class VGG(nn.Module):
    def __init__(self):
        super(VGG, self).__init__()
        
        self.conv = nn.Sequential(
        nn.Conv2d(3, 96, 11, stride=4), # 1
        nn.ReLU(inplace=True),
        nn.MaxPool2d(3, 2),
        nn.Conv2d(96, 256, 5, padding=2), # 2
        nn.ReLU(inplace=True),
        nn.MaxPool2d(3, 2),
        nn.Conv2d(256, 384, 3, padding=1), # 3
        nn.ReLU(inplace=True),
        nn.Conv2d(384, 384, 3, padding=1), # 4
        nn.ReLU(inplace=True),
        nn.Conv2d(384, 256, 3, padding=1), # 5
        nn.ReLU(inplace=True),
        nn.MaxPool2d(3, 2)
        )
        
        
        self.fc = nn.Sequential(
        nn.Linear(256*5*5, 4096),
        nn.ReLU(inplace=True),
        nn.Dropout(0.5),
        nn.Linear(4096, 4096),
        nn.ReLU(inplace=True),
        nn.Dropout(0.5),
        nn.Linear(4096, 2)
        )

    def forward(self, x):
        x = self.conv(x)
#         print(x.shape)
        x = x.view(x.size(0), 256*5*5)
        x = self.fc(x)
        return x

In [7]:
alexnet = AlexNet()
if torch.cuda.is_available():
    alexnet.cuda()
alexnet.load_state_dict(torch.load('params.pkl'))

In [8]:
criterian = nn.CrossEntropyLoss()
optimizer = optim.SGD(alexnet.parameters(), lr=learning_rate, momentum=0.9)

In [14]:
try:
    since = time.time()
    best_acc = 0.0
    best_params = alexnet.state_dict()
    
    for epoch in range(epoches):
        sum_loss_t = 0.0
        sum_acc_t = 0.0
        sum_loss_v = 0.0
        sum_acc_v = 0.0
        since_epoch = time.time()
            
        for (img, label) in trainLoader:
            img = img.to(device)
            label = label.to(device)

            optimizer.zero_grad()
            output = alexnet(img)
            loss = criterian(output, label)
            # backward
            loss.backward()
            optimizer.step()

            sum_loss_t += loss.item()
            _, predict = torch.max(output, 1)
            correct_num = (predict==label).sum()
            sum_acc_t += correct_num.item()

        for (img, label) in validLoader:
            img = img.to(device)
            label = label.to(device)

            output = alexnet(img)
            loss = criterian(output, label)

            sum_loss_v += loss.item()
            _, predict = torch.max(output, 1)
            correct_num = (predict==label).sum()
            sum_acc_v += correct_num.item()
            if sum_acc_v > best_acc:
                best_params = alexnet.state_dict()

        sum_loss_t /= len(train)
        sum_acc_t /= len(train)
        sum_loss_v /= len(valid)
        sum_acc_v /= len(valid)
        time_elapsed = time.time() - since_epoch
        print("[{:d}/{:d}] Training Loss: {:.5f}, Acc: {:.2%}, Time: {:.0f}m {:.0f}s".format(epoch+1, epoches, sum_loss_t, sum_acc_t, time_elapsed//60, time_elapsed%60))
        print("[{:d}/{:d}] Validate Loss: {:.5f}, Acc: {:.2%}\n".format(epoch+1, epoches, sum_loss_v, sum_acc_v))
        
        if (epoch+1) % 10 == 0:
            torch.save(best_params, 'params.pkl')
            torch.cuda.empty_cache()
            print("Saved the model and emptied cache")
        
except Exception as e:
    torch.save(best_params, 'params.pkl')
    torch.cuda.empty_cache()
    raise e
else:
    torch.cuda.empty_cache()
    alltime_elapsed = time.time()-since
    print("Train complete in {:.0f}m {:.0f}s".format(alltime_elapsed//60, alltime_elapsed%60))

[1/50] Training Loss: 0.00838, Acc: 73.52%, Time: 1m 52s
[1/50] Validate Loss: 0.00828, Acc: 75.50%

[2/50] Training Loss: 0.00757, Acc: 77.25%, Time: 1m 52s
[2/50] Validate Loss: 0.00745, Acc: 78.30%

[3/50] Training Loss: 0.00690, Acc: 79.73%, Time: 1m 52s
[3/50] Validate Loss: 0.00775, Acc: 77.80%

[4/50] Training Loss: 0.00637, Acc: 81.55%, Time: 1m 52s
[4/50] Validate Loss: 0.00689, Acc: 80.90%

[5/50] Training Loss: 0.00573, Acc: 83.92%, Time: 1m 53s
[5/50] Validate Loss: 0.00628, Acc: 82.35%

[6/50] Training Loss: 0.00540, Acc: 85.05%, Time: 1m 52s
[6/50] Validate Loss: 0.00591, Acc: 83.60%

[7/50] Training Loss: 0.00485, Acc: 86.85%, Time: 1m 52s
[7/50] Validate Loss: 0.00668, Acc: 81.00%

[8/50] Training Loss: 0.00454, Acc: 87.88%, Time: 1m 53s
[8/50] Validate Loss: 0.00535, Acc: 86.25%

[9/50] Training Loss: 0.00417, Acc: 88.91%, Time: 1m 52s
[9/50] Validate Loss: 0.00482, Acc: 87.50%

[10/50] Training Loss: 0.00380, Acc: 89.88%, Time: 1m 52s
[10/50] Validate Loss: 0.00491, A