# Direct Model
## Setup

In [53]:
from load_database import load_bolete_data
import numpy as np
from sklearn.model_selection import StratifiedShuffleSplit

data = load_bolete_data()
print()
for k in data.keys():
    print(k, np.shape(data[k]))

ItemsViewHDF5(<HDF5 file "bolete.h5" (mode r)>)
bolete-characteristics <HDF5 dataset "bolete-characteristics": shape (38, 1873), type "|u1">
bolete-edibility <HDF5 dataset "bolete-edibility": shape (5, 1873), type "|u1">
bolete-images <HDF5 dataset "bolete-images": shape (3, 512, 512, 1873), type "|u1">
bolete-labels <HDF5 dataset "bolete-labels": shape (1873,), type "<u8">

bolete-characteristics (38, 1873)
bolete-edibility (5, 1873)
bolete-images (3, 512, 512, 1873)
bolete-labels (1873,)


## Partition Data
Divide data in train and held-out test partitions

In [63]:
sss_tt = StratifiedShuffleSplit(n_splits=1, test_size=0.3, train_size=0.7, random_state=0)
X, y = data["bolete-images"].T, data["bolete-labels"].T
for train_idx, test_idx in sss_tt.split(X, y):
    train_x = X[train_idx]
    train_y_sp = data["bolete-labels"].T[train_idx]
    train_y_ed = data["bolete-edibility"].T[train_idx]
    train_y_ch = data["bolete-characteristics"].T[train_idx]

    test_x = X[test_idx]
    test_y_sp = data["bolete-labels"].T[test_idx]
    test_y_ed = data["bolete-edibility"].T[test_idx]
    test_y_ch = data["bolete-characteristics"].T[test_idx]


In [73]:
M = np.size(np.unique(data["bolete-labels"]))
N, H, W, C = np.shape(data["bolete-images"].T)
print("Number of species:", M)
print("N = {}, H = {}, W = {}, C = {}".format(N, H, W, C))

Number of species: 178
N = 1873, H = 512, W = 512, C = 3


## Setup PyTorch

In [80]:
import torchvision.models as models
import torch
from torch import nn, optim

USE_GPU = False
dtype = torch.float32 # we will be using float throughout this tutorial

if USE_GPU and torch.cuda.is_available():
    device = torch.device('cuda')
else:
    device = torch.device('cpu')

# Constant to control how frequently we print train loss
print_every = 100

print('using device:', device)

using device: cuda


## Define training and accuracy procedure

In [90]:
def check_accuracy(loader, model):
    if loader.dataset.train:
        print('Checking accuracy on validation set')
    else:
        print('Checking accuracy on test set')   
    num_correct = 0
    num_samples = 0
    model.eval()  # set model to evaluation mode
    with torch.no_grad():
        for x, y in loader:
            x = x.to(device=device, dtype=dtype)  # move to device, e.g. GPU
            y = y.to(device=device, dtype=torch.long)
            scores = model(x)
            _, preds = scores.max(1)
            num_correct += (preds == y).sum()
            num_samples += preds.size(0)
        acc = float(num_correct) / num_samples
        print('Got %d / %d correct (%.2f)' % (num_correct, num_samples, 100 * acc))
        return acc

def train(loader, model, optimizer, loader_train, loader_val, epochs=1):
    """
    Train a model using the PyTorch Module API.
    
    Inputs:
    - model: A PyTorch Module giving the model to train.
    - optimizer: An Optimizer object we will use to train the model
    - epochs: (Optional) A Python integer giving the number of epochs to train for
    
    Returns: Nothing, but prints model accuracies during training.
    """
    model = model.to(device=device)  # move the model parameters to CPU/GPU
    history = []
    for e in range(epochs):
        for t, (x,y) in enumerate(loader_train):
            model.train()  # put model to training mode
            x = x.to(device=device, dtype=dtype)  # move to device, e.g. GPU
            y = x.to(device=device, dtype=torch.long)

            scores = model(x)
            loss = F.cross_entropy(scores, y)

            # Zero out all of the gradients for the variables which the optimizer
            # will update.
            optimizer.zero_grad()

            # This is the backwards pass: compute the gradient of the loss with
            # respect to each  parameter of the model.
            loss.backward()

            # Actually update the parameters of the model using the gradients
            # computed by the backwards pass.
            optimizer.step()

            if t % print_every == 0:
                print('Iteration %d, loss = %.4f' % (t, loss.item()))
                history.append([loss.item(), check_accuracy(loader_val, model)])
                print()
    return history

## Define models

In [88]:
def simple_model():
    # copy final model from ass igment 2
    class Flatten(nn.Module):
        def forward(self, x):
            return flatten(x)

    channel_1 = 32
    channel_2 = 24
    channel_3 = 16
    hidden_dim = 150
    torch.manual_seed(0)
    learning_rate = 3e-3 # 1e-2

    model = nn.Sequential(
        nn.Conv2d(C, channel_1, kernel_size=5, padding=2),
        # nn.GroupNorm(4,channel_1),
        nn.BatchNorm2d(channel_1),
        nn.ReLU(),
        nn.Dropout(0.3),
        nn.Conv2d(channel_1, channel_2, kernel_size=3, padding=1),
        # nn.GroupNorm(4,channel_2),
        nn.BatchNorm2d(channel_2),
        nn.ReLU(),
        nn.Dropout(0.2),
        nn.Conv2d(channel_2, channel_3, kernel_size=3, padding=1),
        # nn.GroupNorm(4,channel_3),
        nn.BatchNorm2d(channel_3),
        nn.ReLU(),
        nn.Dropout(0.1),
        Flatten(),
        nn.Linear(channel_3 * H * W, hidden_dim),
        nn.ReLU(),
        nn.Linear(hidden_dim, M)
    )
    optimizer = optim.SGD(model.parameters(), lr=learning_rate,
                        momentum=0.8, nesterov=True)
    return model, optimizer


## Cross Validate on training data

In [91]:
folds = 1
sss = StratifiedShuffleSplit(n_splits=folds, random_state=0, test_size=0.2, train_size=0.8)
for train_index, val_index in sss.split(train_x, train_y_sp):
    k_train_x, k_val_x = train_x[train_index], train_x[val_index]
    k_train_y, k_val_y = train_y_sp[train_index], train_y_sp[val_index]
    print(np.shape(k_train_x), np.shape(k_train_y), np.shape(k_val_x), np.shape(k_val_y))
    model, optimizer = simple_model()

    loader_train = DataLoader(cifar10_train, batch_size=64, 
                          sampler=sampler.SubsetRandomSampler(range(NUM_TRAIN)))
    loader_val = DataLoader(cifar10_train, batch_size=64, 
                          sampler=sampler.SubsetRandomSampler(range(NUM_TRAIN)))
    train(model, optimizer, loader_train, loader_val, epochs=1)

(1048, 512, 512, 3) (1048,) (263, 512, 512, 3) (263,)


TypeError: train() missing 2 required positional arguments: 'X' and 'Y'