#Download dataset

official PyTorch Custom Dataset > https://github.com/utkuozbulak/pytorch-custom-dataset-examples#future-updates

In [None]:
!pip install gdown==3.6.0
my_file_id = '14ExLzhn9S4pYD1L0uHXj3j3uhNVp54po'
!gdown https://drive.google.com/uc?id={my_file_id}

#Bulid torch dataloder with custom dataset

In [None]:
import time
import copy

from scipy.io import loadmat
import numpy as np

import torch
from torch import nn
from torch import optim
from torchvision import transforms, models
from torch.utils.data import Dataset, DataLoader
from torch.utils.data.sampler import SubsetRandomSampler
from collections import OrderedDict

In [None]:
# load db
def load_data(mat_path):
  d = loadmat(mat_path)

  return d["image"], d["age"][0]

image, age = load_data('wiki_db.mat')

In [None]:
# to create torch custom dataset
class MyDataset(Dataset):
    def __init__(self, data, target, transform=None):
        data = data.transpose((0,3,1,2))
        self.data = torch.from_numpy(data).float()
        self.target = torch.from_numpy(target).long()
        self.transform = transform
        
    def __getitem__(self, index):
        x = self.data[index]
        y = self.target[index]
        
        if self.transform:
            img = self.transform(x)
            
        return x, y
    
    def __len__(self):
        return len(self.data)

In [None]:
# Creating data indices for training and validation splits:
batch_size = 32
validation_split = .2
dataset_size = len(age)
shuffle_dataset = True
random_seed= 42

indices = list(range(dataset_size))
split = int(np.floor(validation_split * dataset_size))
if shuffle_dataset :
    np.random.seed(random_seed)
    np.random.shuffle(indices)

indices = {'train':indices[split:], 'valid':indices[:split]}

# Creating PT data samplers and loaders:
sampler = {x: SubsetRandomSampler(indices[x]) for x in ['train', 'valid']}
dataset = MyDataset(image, age) #instant dataset
dataloaders = {x: torch.utils.data.DataLoader(dataset=dataset, batch_size=batch_size, 
                                           sampler=sampler[x]) for x in ['train', 'valid']}


In [None]:
# Build model
#e.g.
model = models.densenet121(pretrained= True)
for param in model.parameters():
    param.requires_grad = False

classifier = nn.Sequential(OrderedDict([
                          ('fc1', nn.Linear(1024, 512)),
                          ('Relu', nn.ReLU()),
                          ('dropout', nn.Dropout(0.3)),
                          ('fc2', nn.Linear(512,101))
                          ]))
    
model.classifier = classifier

criterion = nn.CrossEntropyLoss()

optimizer = optim.Adam(model.classifier.parameters(), lr=0.0001, amsgrad=True)

# scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=10)

In [None]:
# Training
#e.g.
num_epochs = 15
device = 'cuda'
train_losses, val_losses = [], []

model.to(device)
since = time.time()

best_model_wts = copy.deepcopy(model.state_dict())
best_acc = 0.0

for epoch in range(1, num_epochs+1):
    print('Epoch {}/{}'.format(epoch, num_epochs))
    print('-' * 10)

    # Each epoch has a training and validation phase
    for phase in ['train', 'valid']:
        if phase == 'train':
            optimizer.step()
#             scheduler.step()
            model.train()  # Set model to training mode
        else:
            model.eval()  # Set model to evaluate mode

        running_loss = 0.0
        running_corrects = 0

        # Iterate over data.
        for inputs, labels in dataloaders[phase]:
            inputs = inputs.to(device)
            labels = labels.to(device)

            # zero the parameter gradients
            optimizer.zero_grad()

            # forward
            # track history if only in train
            with torch.set_grad_enabled(phase == 'train'):
                outputs = model(inputs)
                _, preds = torch.max(outputs, 1)
                loss = criterion(outputs, labels)

                # backward + optimize only if in training phase
                if phase == 'train':
                    loss.backward()
                    optimizer.step()

            # statistics
            running_loss += loss.item() * inputs.size(0)
            running_corrects += torch.sum(preds == labels.data)

        epoch_loss = running_loss / len(indices[phase])
        epoch_acc = running_corrects.double() / len(indices[phase])
        print('{} Loss: {:.4f} Acc: {:.4f}'.format(
            phase, epoch_loss, epoch_acc))
        
        # deep copy the model
        if phase == 'valid':
          val_losses.append(running_loss/len(indices[phase]))
          if epoch_acc > best_acc:
            best_acc = epoch_acc
            best_model_wts = copy.deepcopy(model.state_dict())
        else:
          train_losses.append(running_loss/len(indices[phase]))
            

    print()

time_elapsed = time.time() - since
print('Training complete in {:.0f}m {:.0f}s'.format(
    time_elapsed // 60, time_elapsed % 60))
print('Best val Acc: {:4f}'.format(best_acc))

# load best model weights
model.load_state_dict(best_model_wts)

In [None]:
%matplotlib inline
%config InlineBackend.figure_format = 'retina'
import matplotlib.pyplot as plt


In [None]:
plt.plot(train_losses, label='Training loss')
plt.plot(val_losses, label='Validation loss')
plt.legend(frameon=False)
plt.show()