# Direct Model
## Setup

In [1]:
from data_utils import *
from train import * 

import numpy as np
from sklearn.model_selection import StratifiedShuffleSplit
import matplotlib.pyplot as plt

import torch
from torch import nn, optim
import torch.nn.functional as F
import torchvision.transforms as T
import torchvision.models as models

# for auto-reloading external modules
# see http://stackoverflow.com/questions/1907993/autoreload-of-modules-in-ipython
%load_ext autoreload
%autoreload 2

USE_GPU = True

if USE_GPU and torch.cuda.is_available():
    device = torch.device('cuda')
    
else:
    device = torch.device('cpu')

dtype = torch.float32
torch.backends.cudnn.benchmark = True

print('use cuda = ', torch.cuda.is_available())
print('using device:', device)

use cuda =  True
using device: cuda


## Load Dataset

In [2]:
data = load_bolete_data()
print()
for k in data.keys():
    print(k, np.shape(data[k]))

ItemsViewHDF5(<HDF5 file "bolete.h5" (mode r)>)

bolete-characteristics (38, 1868)
bolete-edibility (5, 1868)
bolete-images (3, 512, 512, 1868)
bolete-labels (1868,)


## Split into train and test

In [3]:
X_train, X_test, Y_train, Y_test, y_train, y_test = get_train_and_test(data, "bolete-labels")

N, H, W, C = X_train.shape
M = np.size(np.unique(Y_train))

Y_train = Y_train.astype(np.long)
Y_test = Y_test.astype(np.long)

## Define models

In [13]:
def simple_model():
    # copy final model from ass igment 2
    def flatten(x):
        """Flattens to [N, -1] where -1 is whatever it needs to be"""
        N = x.shape[0] # read in N, C, H, W
        return x.view(N, -1)  
        # "flatten" the C * H * W values into a single vector per image
    class Flatten(nn.Module):
        def forward(self, x):
            return flatten(x)

    channel_1 = 32
    channel_2 = 24
    channel_3 = 16
    hidden_dim = 150
    learning_rate = 3e-3 # 1e-2

    model = nn.Sequential(
        nn.Conv2d(C, channel_1, kernel_size=5, padding=2),
        # nn.GroupNorm(4,channel_1),
        nn.BatchNorm2d(channel_1),
        nn.ReLU(),
        nn.Dropout(0.3),
        nn.Conv2d(channel_1, channel_2, kernel_size=3, padding=1),
        # nn.GroupNorm(4,channel_2),
        nn.BatchNorm2d(channel_2),
        nn.ReLU(),
        nn.Dropout(0.2),
        nn.Conv2d(channel_2, channel_3, kernel_size=3, padding=1),
        # nn.GroupNorm(4,channel_3),
        nn.BatchNorm2d(channel_3),
        nn.ReLU(),
        nn.Dropout(0.1),
        Flatten(),
        nn.Linear(channel_3 * H * W, hidden_dim),
        nn.ReLU(),
        nn.Linear(hidden_dim, M)
    )
    
    return model

def freeze(model):
    for param in model.parameters():
        param.requires_grad = False
    return model

def modified_alexnet():
    model = models.alexnet(pretrained=True)
    model = freeze(model)

    # model.classifier[0] = ???
    model.classifier[6] = nn.Linear(in_features=4096, out_features=M, bias=True)
    print(model)
    return model

def modified_googlenet():
    model = models.googlenet(pretrained=True)
    model = freeze(model)

    fc_in_feats = model.fc.in_features
    model.fc = nn.Linear(in_features=fc_in_feats, out_features=M, bias=True)
    print(model)
    return model

def modified_resnext():
    model = models.resnext101_32x8d(pretrained=True)
    model = freeze(model)
    
    fc_in_feats = model.fc.in_features
    model.fc = nn.Linear(fc_in_feats, M)
    return model

## Define the loss function

In [10]:
loss_fn = nn.CrossEntropyLoss()

def pred_fn(scores):
    m = nn.Softmax(dim=1)
    return torch.argmax(m(scores), dim=1)

## Cross Validate on training data

In [None]:
# model = simple_model()
# model = modified_alexnet()
# model = modified_googlenet()
model = modified_resnext()

optimizer = optim.RMSprop(
    model.parameters(),
    lr=1e-3,
    momentum=0.4,
    alpha=0.99
    )

transform = T.Compose([T.ToTensor()])

batch_size = 8
num_epochs = 90

torch.manual_seed(0)
history = cross_val(
    X_train=X_train,
    Y_train=Y_train,
    y_train=y_train,
    model=model,
    optimizer=optimizer,
    loss_fn=loss_fn,
    pred_fn=pred_fn,
    batch_size=batch_size,
    num_epochs=num_epochs,
    show_every=1,
    folds=1,
    test_size=0.3,
    device=device,
    dtype=dtype,
    transform=transform,
)

CV model on:  cuda
CV Fold:  1
Training model on:  cuda
Iter: 0
train Loss: 11.6596 Acc: 0.0031
Iter: 1
val Loss: 7.1986 Acc: 0.0107
Iter: 2
train Loss: 6.0528 Acc: 0.0291
Iter: 3
val Loss: 6.1356 Acc: 0.0071
Iter: 4
train Loss: 4.8655 Acc: 0.0704
Iter: 5
val Loss: 6.3362 Acc: 0.0285
Iter: 6
train Loss: 4.2467 Acc: 0.0873
Iter: 7
val Loss: 5.4914 Acc: 0.0783
Iter: 8
train Loss: 3.4461 Acc: 0.1960
Iter: 9
val Loss: 5.3299 Acc: 0.0498
Iter: 10
train Loss: 2.8630 Acc: 0.2695
Iter: 11
val Loss: 5.0432 Acc: 0.0925
Iter: 12
train Loss: 2.3735 Acc: 0.4028
Iter: 13
val Loss: 4.8742 Acc: 0.0996
Iter: 14
train Loss: 1.9589 Acc: 0.5038
Iter: 15
val Loss: 5.4673 Acc: 0.0854
Iter: 16
train Loss: 1.6392 Acc: 0.5590
Iter: 17
val Loss: 5.1771 Acc: 0.0890
Iter: 18
train Loss: 1.3396 Acc: 0.6784
Iter: 19
val Loss: 4.8323 Acc: 0.1352
Iter: 20
train Loss: 1.1336 Acc: 0.7305
Iter: 21
val Loss: 4.9152 Acc: 0.1388
Iter: 22
train Loss: 0.9882 Acc: 0.7596
Iter: 23
val Loss: 5.0166 Acc: 0.1530
Iter: 24
train Lo

## Plot Training History

In [None]:
fig, axes = plt.subplots(1, 2, figsize=[15,5], sharex=False, sharey=False)
axes[0].set_ylabel("Loss")
axes[1].set_ylabel("Accuracy")

plotnum = 0
for a in ["train", "val"]:
    # ax1 = fig.add_subplot(120 + plotnum)
    axes[plotnum].set_title(a)
    axes[plotnum].set_xlabel("Epoch")
    for i in range(len(history[a])):
        axes[plotnum].plot(history[a][i])
    plotnum += 1

## Train on all the data

In [None]:
dataloader = get_loader(X_train, Y_train, batch_size, transform)
dataloaders = {"train":dataloader}
h = train_model(
    model,
    optimizer,
    dataloaders,
    loss_fn,
    pred_fn,
    num_epochs,
    show_every=1,
    device=device,
    dtype=dtype,
    phases=["train"],
)

## Evaluate 

In [None]:
# X, Y, y = X_test, Y_test, y_test
X, Y, y = get_val(X_train, Y_train, y_train) # place holder for real test data

scores, y_pred, y_true, y_labels = evaluate(
    X,  # images
    Y,  # output
    y,  # labels
    model,
    M,
    pred_fn,
    device=device,
    transform=None,
) 

### Save Evaluation Data

In [None]:
save_raw_eval_data(scores, y_pred, y_true, y_labels, "direct")

### Check status of GPU Memory for Debugging Purposes

In [None]:
import torch
import gc
for obj in gc.get_objects():
    try:
        if torch.is_tensor(obj) or (hasattr(obj, 'data') and torch.is_tensor(obj.data)):
            print(type(obj), obj.size())
    except:
        pass