In [1]:
from torchvision.models import resnet50
from torch.autograd import Variable
import torch
import time
import importlib
import preprocessing

In [2]:
def get_model():
    # cuda release cached memory
    torch.cuda.empty_cache()
    model_ft = resnet50(pretrained=True)
    input_features = model_ft.fc.in_features
    model_ft.fc = torch.nn.Linear(input_features, 2)
    if torch.cuda.is_available():
        model_ft = model_ft.cuda()
    return model_ft

def training(model_ft, datasets, epochs=25, learning_rate=0.001):
    begin = time.time()
    
    datasets_size = {'train': len(datasets['train'].dataset), 'valid' : len(datasets['valid'].dataset)}
        
    criterion = torch.nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(model_ft.parameters(), lr=learning_rate)
    lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=5, gamma=0.1)
    
    for epoch in range(epochs):
        epoch_time = time.time()
        print(f'----------------- Epoch {epoch} -----------------')
        for mode in ['train', 'valid']:
            if mode == 'train':
                model_ft.train(True)
            else:
                model_ft.train(False)
            
            total_corrects = 0
            total_loss = 0
                        
            # mini batch
            for data in datasets[mode]:
                inputs, labels = data
                if torch.cuda.is_available():
                    inputs = inputs.cuda()
                    labels = labels.cuda()

                if mode == 'train':
                    optimizer.zero_grad()

                outputs = model_ft(inputs)
                loss = criterion(outputs, labels)
                _, preds = torch.max(outputs.data, 1)
                
                if mode == 'train':
                    loss.backward()
                    optimizer.step()
                    
                total_loss += loss.item()
                total_corrects += torch.sum(preds == labels).item()
            
            print(f'{mode} Error : {total_loss / datasets_size[mode]}')
            print(f'{mode} Correct : {total_corrects / datasets_size[mode]}')
            print(f'Time : {epoch_time / (60)}M')
            
        # decay learning rate according to epoch
        lr_scheduler.step()
    
    

In [3]:
train_data = preprocessing.dataset_load('./dogs-vs-cats-redux-kernels-edition/train/train', 32, True, 3)
valid_data = preprocessing.dataset_load('./dogs-vs-cats-redux-kernels-edition/train/valid', 32, True, 3)
datasets = {'train' : train_data, 'valid' : valid_data}

model_ft = get_model()

In [None]:
training(model_ft, datasets)
print('current memory allocated: {}'.format(torch.cuda.memory_allocated() / 1024 ** 2))
print('max memory allocated: {}'.format(torch.cuda.max_memory_allocated() / 1024 ** 2))
print('cached memory: {}'.format(torch.cuda.memory_cached() / 1024 ** 2))

----------------- Epoch 0 -----------------
train Error : 0.007070814488165909
train Correct : 0.9026666666666666
Time : 26362137.050365787M
----------------- Epoch 0 -----------------
valid Error : 0.005345998051762581
valid Correct : 0.932
Time : 26362137.182647176M
----------------- Epoch 1 -----------------
train Error : 0.004672418549077378
train Correct : 0.9386222222222222
Time : 26362139.80254138M
----------------- Epoch 1 -----------------
valid Error : 0.0043663413725793365
valid Correct : 0.9436
Time : 26362139.92593534M
----------------- Epoch 2 -----------------
train Error : 0.003959796860317389
train Correct : 0.9495111111111111
Time : 26362142.539418347M
----------------- Epoch 2 -----------------
valid Error : 0.005420207419246435
valid Correct : 0.9356
Time : 26362142.66357204M


True