In [29]:
from torch.utils.data import DataLoader
from torchvision import utils
import torchvision.models as models

import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.optim import lr_scheduler
from torch.utils.data import SubsetRandomSampler
import torch

from aptos_dataset import aptos_dataset
from preprocessing import preprocessing
from model import classifier

import time
import copy
import numpy as np

data_dir = '/media/sangwook/MGTEC/blindness_detection_data/2019/train_images/'
label_file = '/media/sangwook/MGTEC/blindness_detection_data/2019/train_2019.csv'

dataset = aptos_dataset(d_path=data_dir, label_file=label_file)
dataloader = DataLoader(dataset, batch_size=4, shuffle=True)

validation_split = 0.2
shuffle_dataset = True
random_seed = 102
batch_size = 4

# Creating data indices for training and validation splits:
dataset_size = len(dataset)
indices = list(range(dataset_size))
split = int(np.floor(validation_split * dataset_size))
if shuffle_dataset :
    np.random.seed(random_seed)
    np.random.shuffle(indices)
train_indices, val_indices = indices[split:], indices[:split]

# Creating PT data samplers and loaders:
train_sampler = SubsetRandomSampler(train_indices)
valid_sampler = SubsetRandomSampler(val_indices)

train_loader = torch.utils.data.DataLoader(dataset, batch_size=batch_size, 
                                           sampler=train_sampler)
validation_loader = torch.utils.data.DataLoader(dataset, batch_size=batch_size,
                                                sampler=valid_sampler)

dataloaders = dict(train = train_loader, val =validation_loader)
dataset_sizes = dict(train = len(train_sampler), val = len(valid_sampler))

In [32]:
dataset_sizes

phase = 'train'
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

for i, batch in enumerate(dataloaders[phase]):
    inputs = batch['image'].to(device)
    labels = batch['label'].to(device)
    
    break

In [33]:
inputs.size()

torch.Size([4, 3, 512, 512])

In [None]:
def train_model(model, criterion, optimizer, scheduler, num_epochs=25):
    since = time.time()

    best_model_wts = copy.deepcopy(model.state_dict())
    best_acc = 0.0

    for epoch in range(num_epochs):
        print('Epoch {}/{}'.format(epoch, num_epochs - 1))
        print('-' * 10)

        # 각 에폭(epoch)은 학습 단계와 검증 단계를 갖습니다.
        for phase in ['train', 'val']:
            if phase == 'train':
                scheduler.step()
                model.train()  # 모델을 학습 모드로 설정
            else:
                model.eval()   # 모델을 평가 모드로 설정

            running_loss = 0.0
            running_corrects = 0

            # 데이터를 반복
            for i, batch in enumerate(dataloaders[phase]):
                inputs = batch['image'].to(device).float()
                labels = batch['label'].to(device)

                # 매개변수 경사도를 0으로 설정
                optimizer.zero_grad()

                # 순전파
                # 학습 시에만 연산 기록을 추적
                with torch.set_grad_enabled(phase == 'train'):
                    outputs = model(inputs)
                    _, preds = torch.max(outputs, 1)
                    loss = criterion(outputs, labels)

                    # 학습 단계인 경우 역전파 + 최적화
                    if phase == 'train':
                        loss.backward()
                        optimizer.step()

                # 통계
                running_loss += loss.item() * inputs.size(0)
                running_corrects += torch.sum(preds == labels.data)

            epoch_loss = running_loss / dataset_sizes[phase]
            epoch_acc = running_corrects.double() / dataset_sizes[phase]

            print('{} Loss: {:.4f} Acc: {:.4f}'.format(
                phase, epoch_loss, epoch_acc))

            # 모델을 깊은 복사(deep copy)함
            if phase == 'val' and epoch_acc > best_acc:
                best_acc = epoch_acc
                best_model_wts = copy.deepcopy(model.state_dict())

        print()

    time_elapsed = time.time() - since
    print('Training complete in {:.0f}m {:.0f}s'.format(
        time_elapsed // 60, time_elapsed % 60))
    print('Best val Acc: {:4f}'.format(best_acc))

    # 가장 나은 모델 가중치를 불러옴
    model.load_state_dict(best_model_wts)
    return model

In [10]:
i_batch

{'image': tensor([[[[128, 128, 128,  ..., 128, 128, 128],
           [128, 128, 128,  ..., 128, 128, 128],
           [128, 128, 128,  ..., 128, 128, 136],
           ...,
           [124, 124, 124,  ..., 132, 132, 132],
           [124, 124, 124,  ..., 132, 132, 132],
           [128, 124, 124,  ..., 128, 132, 132]],
 
          [[128, 128, 128,  ..., 128, 128, 128],
           [128, 128, 128,  ..., 128, 128, 128],
           [128, 128, 128,  ..., 128, 128, 132],
           ...,
           [124, 124, 124,  ..., 132, 132, 132],
           [124, 124, 124,  ..., 132, 132, 132],
           [128, 124, 124,  ..., 128, 132, 132]],
 
          [[128, 128, 128,  ..., 128, 128, 128],
           [128, 128, 128,  ..., 128, 128, 128],
           [128, 128, 128,  ..., 128, 128, 132],
           ...,
           [124, 124, 124,  ..., 132, 132, 132],
           [124, 124, 124,  ..., 132, 132, 132],
           [128, 124, 124,  ..., 128, 132, 132]]],
 
 
         [[[128, 128, 128,  ..., 128, 128, 128],


In [12]:
len(train_loader)

733

In [14]:
len(validation_loader)

183

In [11]:
for i, i_batch in enumerate(train_loader):
    
    print(i, i_batch['image'].size())

0 torch.Size([4, 3, 512, 512])
1 torch.Size([4, 3, 512, 512])
2 torch.Size([4, 3, 512, 512])
3 torch.Size([4, 3, 512, 512])
4 torch.Size([4, 3, 512, 512])
5 torch.Size([4, 3, 512, 512])


KeyboardInterrupt: 

In [3]:
torch.utils.data.random_split(dataloader.dataset, 10)

TypeError: 'int' object is not iterable

In [20]:
y_output.float(), y_true.long()

(tensor([[-0.2431, -0.1761,  0.9961, -0.3093,  0.7021],
         [ 0.5917,  0.5105,  1.2794, -0.6101,  0.3022],
         [-0.0390, -0.9371,  1.1241,  0.2495,  1.1459],
         [-0.7444,  0.0964,  0.4105, -0.2357,  0.2959]], device='cuda:0',
        grad_fn=<MmBackward>), tensor([0, 2, 2, 4], device='cuda:0'))