# BiteMe | Train

This notebook includes the most important part of the project - the modelling. The notebook tests methodologies for training, and in it the chosen algorithm is decided. Validation also occurs before final testing, which is conducted in the test notebook. This stage is highly iterative, so all model artefacts, logs and configurations are recorded and saved to disk automatically. This initial setup of what will eventually become MLOps for the final product will be really useful, and helps keep track of what is successful and what isn't.

Models to try:
 - resnet50v2
 - resnet101v2
 - resnet152v2
 - vgg19
 - densenet169
 - densenet121
 - densenet201
 - inceptionv3
 - inception_resnetv2
 - resnext50
 - resnext101
 - xception
 - efficientnet_b0
 - efficientnet_b1
 - efficientnet_b2
 - efficientnet_b3
 - efficientnet_b4
 - efficientnet_b5

Initial model work is done by using simple, typical image recognition models (CNN architectures) to see how effective these models can be for the problem. Although I don't 


In [None]:
# Basic imports
import pandas as pd
import numpy as np
import os
import sys

# Data visualisation
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn

# Modelling imports
from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import accuracy_score

# Image processing
import cv2
import albumentations as A
import imgaug as ia
import imgaug.augmenters as iaa

import torch

# Local imports
sys.path.append("..")
from helpers import read_images, augs, get_augs
from constants import *

plt.rcParams["figure.figsize"] = (14, 8)

np.random.seed(SEED)
ia.seed(SEED)

In [None]:
# Define directories
base_dir_path = "../"

data_dir_path = os.path.join(base_dir_path, "data")
data_preprocessed_dir_path = os.path.join(data_dir_path, "preprocessed")
data_preprocessed_train_dir_path = os.path.join(data_dir_path, "preprocessed/train")

data_dir = os.listdir(data_dir_path)
data_preprocessed_dir = os.listdir(data_preprocessed_dir_path)
data_preprocessed_train_dir = os.listdir(data_preprocessed_train_dir_path)

metadata_preprocessed_path = os.path.join(data_preprocessed_dir_path, "metadata.csv")
metadata = pd.read_csv(metadata_preprocessed_path)
# Subset to train only
metadata = metadata.loc[metadata.split == "train"]

metadata.head()

In [None]:
# Read in train images
X_train = read_images(
    data_dir_path=data_preprocessed_train_dir_path, 
    rows=ROWS, 
    cols=COLS, 
    channels=CHANNELS, 
    write_images=False, 
    output_data_dir_path=None,
    verbose=VERBOSE
)

# Get labels
y_train = np.array(metadata["label"])

## Set Parameters

In [None]:
# Choose augmentations to use in preprocessing
# For full list see helpers.py
augs_to_select = [
    "Fliplr", 
    "Flipud", 
    "Cutout"
]
# Subset augs based on those selected
augs = dict((aug_name, augs[aug_name]) for aug_name in augs_to_select)

# Modelling constants - add this to constants.py when needed
MODEL_NAME = "resnet50v2"
EPOCHS = 6



# Create dictionary of configurations used in modelling
# this will be updated as modelling progresses if necessary, for logging
conf = {
    "device": "cuda" if torch.cuda.device_count() > 0 else "cpu",
    "device_name": torch.cuda.get_device_name(0),
    "n_workers": torch.cuda.device_count(),
    "rows": ROWS,
    "cols": COLS,
    "channels": CHANNELS,
    "seed": SEED,
    "num_classes": list(metadata["label"].unique()),
    "classes": np.unique(y_train, return_counts=True)[0],
    "class_counts": np.unique(y_train, return_counts=True)[1],
    "test_size": TEST_SIZE,
    "num_train_sample": y_train.shape[0],
    "val_size": 0.15,
    "num_augs": len(augs),
    "augs": augs,
    "model_name": MODEL_NAME,
    "batch_size": 16,
    "epochs": EPOCHS,
    "lr": 1e-5,
    "optimizer": "AdamW",
    "n_splits": N_SPLITS
}

In [None]:
import torch
torch.cuda.get_device_name(0)

In [None]:
torch.version.cuda

In [None]:
torch.cuda.device_count()

In [None]:
torch.cuda.is_available()

In [None]:
# Split cross validation idx
# Subset images and labels for cross validation
# Create image augmentations and additional labels
# Read in pretrained weights
# Any additional layers
# Create model instance
# Create error metric
# Run training
# Make val predictions
# Val error metric
# Create directory for instance
# Save model
# Save log and config 
# Append train/val errors to csv

In [None]:
skf = StratifiedKFold(n_splits=3)
for train_index, test_index in skf.split(metadata.index, metadata["label"]):
    print(train_index)
    print("-"*40)

In [1]:
# import the necessary packages
from collections import OrderedDict
import torch.nn as nn
from torch.optim import SGD
from sklearn.model_selection import train_test_split
from sklearn.datasets import make_blobs
import torch



In [14]:
%%time
def get_training_model(inFeatures=4, hiddenDim=8, nbClasses=3):
    # construct a shallow, sequential neural network
    mlpModel = nn.Sequential(OrderedDict([
        ("hidden_layer_1", nn.Linear(inFeatures, hiddenDim)),
        ("activation_1", nn.ReLU()),
        ("output_layer", nn.Linear(hiddenDim, nbClasses))
    ]))
    # return the sequential model
    return mlpModel

def next_batch(inputs, targets, batchSize):
    # loop over the dataset
    for i in range(0, inputs.shape[0], batchSize):
        # yield a tuple of the current batched data and labels
        yield (inputs[i:i + batchSize], targets[i:i + batchSize])
        
# specify our batch size, number of epochs, and learning rate
BATCH_SIZE = 64
EPOCHS = 10
LR = 1e-2
# determine the device we will be using for training
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
print("[INFO] training using {}...".format(DEVICE))

# generate a 3-class classification problem with 1000 data points,
# where each data point is a 4D feature vector
print("[INFO] preparing data...")
(X, y) = make_blobs(n_samples=10000000, n_features=4, centers=3,
    cluster_std=2.5, random_state=95)
# create training and testing splits, and convert them to PyTorch
# tensors
(trainX, testX, trainY, testY) = train_test_split(X, y,
    test_size=0.15, random_state=95)
trainX = torch.from_numpy(trainX).float()
testX = torch.from_numpy(testX).float()
trainY = torch.from_numpy(trainY).float()
testY = torch.from_numpy(testY).float()

# initialize our model and display its architecture
mlp = get_training_model().to(DEVICE)
print(mlp)
# initialize optimizer and loss function
opt = SGD(mlp.parameters(), lr=LR)
lossFunc = nn.CrossEntropyLoss()

# create a template to summarize current training progress
trainTemplate = "epoch: {} test loss: {:.3f} test accuracy: {:.3f}"
# loop through the epochs
for epoch in range(0, EPOCHS):
    # initialize tracker variables and set our model to trainable
    print("[INFO] epoch: {}...".format(epoch + 1))
    trainLoss = 0
    trainAcc = 0
    samples = 0
    mlp.train()
    # loop over the current batch of data
    for (batchX, batchY) in next_batch(trainX, trainY, BATCH_SIZE):
        # flash data to the current device, run it through our
        # model, and calculate loss
        (batchX, batchY) = (batchX.to(DEVICE), batchY.to(DEVICE))
        predictions = mlp(batchX)
        loss = lossFunc(predictions, batchY.long())
        # zero the gradients accumulated from the previous steps,
        # perform backpropagation, and update model parameters
        opt.zero_grad()
        loss.backward()
        opt.step()
        # update training loss, accuracy, and the number of samples
        # visited
        trainLoss += loss.item() * batchY.size(0)
        trainAcc += (predictions.max(1)[1] == batchY).sum().item()
        samples += batchY.size(0)
    # display model progress on the current training batch
    trainTemplate = "epoch: {} train loss: {:.3f} train accuracy: {:.3f}"
    print(trainTemplate.format(epoch + 1, (trainLoss / samples),
        (trainAcc / samples)))

[INFO] training using cuda...
[INFO] preparing data...
Sequential(
  (hidden_layer_1): Linear(in_features=4, out_features=8, bias=True)
  (activation_1): ReLU()
  (output_layer): Linear(in_features=8, out_features=3, bias=True)
)
[INFO] epoch: 1...
epoch: 1 train loss: 0.024 train accuracy: 0.991
[INFO] epoch: 2...
epoch: 2 train loss: 0.021 train accuracy: 0.992
[INFO] epoch: 3...
epoch: 3 train loss: 0.021 train accuracy: 0.993
[INFO] epoch: 4...
epoch: 4 train loss: 0.021 train accuracy: 0.993
[INFO] epoch: 5...
epoch: 5 train loss: 0.021 train accuracy: 0.993
[INFO] epoch: 6...
epoch: 6 train loss: 0.020 train accuracy: 0.993
[INFO] epoch: 7...
epoch: 7 train loss: 0.020 train accuracy: 0.993
[INFO] epoch: 8...
epoch: 8 train loss: 0.020 train accuracy: 0.993
[INFO] epoch: 9...
epoch: 9 train loss: 0.020 train accuracy: 0.993
[INFO] epoch: 10...
epoch: 10 train loss: 0.020 train accuracy: 0.993
CPU times: user 11min 5s, sys: 8.12 s, total: 11min 13s
Wall time: 11min 15s


In [15]:
%%time
def get_training_model(inFeatures=4, hiddenDim=8, nbClasses=3):
    # construct a shallow, sequential neural network
    mlpModel = nn.Sequential(OrderedDict([
        ("hidden_layer_1", nn.Linear(inFeatures, hiddenDim)),
        ("activation_1", nn.ReLU()),
        ("output_layer", nn.Linear(hiddenDim, nbClasses))
    ]))
    # return the sequential model
    return mlpModel

def next_batch(inputs, targets, batchSize):
    # loop over the dataset
    for i in range(0, inputs.shape[0], batchSize):
        # yield a tuple of the current batched data and labels
        yield (inputs[i:i + batchSize], targets[i:i + batchSize])
        
# specify our batch size, number of epochs, and learning rate
BATCH_SIZE = 64
EPOCHS = 10
LR = 1e-2
# determine the device we will be using for training
DEVICE = "cpu"
print("[INFO] training using {}...".format(DEVICE))

# generate a 3-class classification problem with 1000 data points,
# where each data point is a 4D feature vector
print("[INFO] preparing data...")
(X, y) = make_blobs(n_samples=10000000, n_features=4, centers=3,
    cluster_std=2.5, random_state=95)
# create training and testing splits, and convert them to PyTorch
# tensors
(trainX, testX, trainY, testY) = train_test_split(X, y,
    test_size=0.15, random_state=95)
trainX = torch.from_numpy(trainX).float()
testX = torch.from_numpy(testX).float()
trainY = torch.from_numpy(trainY).float()
testY = torch.from_numpy(testY).float()

# initialize our model and display its architecture
mlp = get_training_model().to(DEVICE)
print(mlp)
# initialize optimizer and loss function
opt = SGD(mlp.parameters(), lr=LR)
lossFunc = nn.CrossEntropyLoss()

# create a template to summarize current training progress
trainTemplate = "epoch: {} test loss: {:.3f} test accuracy: {:.3f}"
# loop through the epochs
for epoch in range(0, EPOCHS):
    # initialize tracker variables and set our model to trainable
    print("[INFO] epoch: {}...".format(epoch + 1))
    trainLoss = 0
    trainAcc = 0
    samples = 0
    mlp.train()
    # loop over the current batch of data
    for (batchX, batchY) in next_batch(trainX, trainY, BATCH_SIZE):
        # flash data to the current device, run it through our
        # model, and calculate loss
        (batchX, batchY) = (batchX.to(DEVICE), batchY.to(DEVICE))
        predictions = mlp(batchX)
        loss = lossFunc(predictions, batchY.long())
        # zero the gradients accumulated from the previous steps,
        # perform backpropagation, and update model parameters
        opt.zero_grad()
        loss.backward()
        opt.step()
        # update training loss, accuracy, and the number of samples
        # visited
        trainLoss += loss.item() * batchY.size(0)
        trainAcc += (predictions.max(1)[1] == batchY).sum().item()
        samples += batchY.size(0)
    # display model progress on the current training batch
    trainTemplate = "epoch: {} train loss: {:.3f} train accuracy: {:.3f}"
    print(trainTemplate.format(epoch + 1, (trainLoss / samples),
        (trainAcc / samples)))

[INFO] training using cpu...
[INFO] preparing data...
Sequential(
  (hidden_layer_1): Linear(in_features=4, out_features=8, bias=True)
  (activation_1): ReLU()
  (output_layer): Linear(in_features=8, out_features=3, bias=True)
)
[INFO] epoch: 1...
epoch: 1 train loss: 0.024 train accuracy: 0.991
[INFO] epoch: 2...
epoch: 2 train loss: 0.021 train accuracy: 0.992
[INFO] epoch: 3...
epoch: 3 train loss: 0.021 train accuracy: 0.993
[INFO] epoch: 4...
epoch: 4 train loss: 0.021 train accuracy: 0.993
[INFO] epoch: 5...
epoch: 5 train loss: 0.021 train accuracy: 0.993
[INFO] epoch: 6...
epoch: 6 train loss: 0.021 train accuracy: 0.993
[INFO] epoch: 7...
epoch: 7 train loss: 0.020 train accuracy: 0.993
[INFO] epoch: 8...
epoch: 8 train loss: 0.020 train accuracy: 0.993
[INFO] epoch: 9...
epoch: 9 train loss: 0.020 train accuracy: 0.993
[INFO] epoch: 10...
epoch: 10 train loss: 0.020 train accuracy: 0.993
CPU times: user 6h 53min 55s, sys: 11 s, total: 6h 54min 6s
Wall time: 6min 33s
