# Retraining of top performing FFNN

## Imports

In [62]:
# General imports 
import sys
import os 
sys.path.insert(1, os.path.join(os.pardir, 'src'))
from itertools import product

# Data imports
import cv2
import torch
import mlflow
import numpy as np
from mlflow.tracking.client import MlflowClient
from torchvision import datasets, transforms

# Homebrew imports 
import model
from utils import one_hot_encode_index
from optimizers import Adam
from activations import Softmax, ReLU
from layers import Dropout, LinearLayer
from loss import CategoricalCrossEntropyLoss

# pytorch imports 
from torch import nn, cuda, optim, no_grad
import torch.nn.functional as F
from torchvision import transforms

## TESTING 
import importlib
importlib.reload(model)
##

<module 'model' from '..\\src\\model.py'>

## Finding best runs

In [59]:
# querying results to see best 2 performing homebrew models
query = "params.data_split = '90/10' and params.type = 'FFNN' and params.framework = 'homebrew'"
hb_runs = MlflowClient().search_runs(
                                experiment_ids="8",
                                filter_string=query,
                                max_results=1,
                                order_by=["metrics.validation_accuracy DESC"]
                            )

query = "params.data_split = '90/10' and params.type = 'FFNN' and params.framework = 'pytorch'"
pt_runs =  MlflowClient().search_runs(
                                experiment_ids="8",
                                filter_string=query,
                                max_results=1,
                                order_by=["metrics.validation_accuracy DESC"]
                            )

## Setup data loaders

In [60]:
train_transforms = transforms.Compose([transforms.RandomRotation(30),
                                       transforms.RandomResizedCrop(32),
                                       transforms.RandomHorizontalFlip(),
                                       transforms.Grayscale(num_output_channels=1),
                                       transforms.ToTensor(),
                                       transforms.Normalize([0.5],[0.5])
                                      ])

test_transforms = transforms.Compose([transforms.Resize(33),
                                      transforms.CenterCrop(32),
                                      transforms.Grayscale(num_output_channels=1),
                                      transforms.ToTensor(),
                                      transforms.Normalize([0.5],[0.5])
                                    ])

# setting up data loaders
data_dir = os.path.join(os.pardir, 'data', 'Plant_leave_diseases_32')

train_data = datasets.ImageFolder(os.path.join(data_dir, 'train'), transform=train_transforms)
test_data = datasets.ImageFolder(os.path.join(data_dir, 'validation'), transform=test_transforms)

## Training 'Homebrew' models

In [61]:
# Getting Configs 
par = hb_runs[0].data.params
config = {'data_split': par['data_split'],
        'decay': np.float64(par['decay']),
        'dropout': np.float64(par['dropout']),
        'framework': par['framework'],
        'learning_rate': np.float64(par['learning_rate']),
        'max_epochs': int(par['max_epochs']),
        'resolution': int(par['resolution']),
        'type': par['type']}

mlflow.set_experiment("Plant Leaf Disease")

train_loader = torch.utils.data.DataLoader(train_data, batch_size=64, shuffle=True)
validation_loader = torch.utils.data.DataLoader(test_data, batch_size=64, shuffle=True)

# initialize model 
mdl = model.Model(Adam(learning_rate=config['learning_rate'], decay=config['decay']),
                    CategoricalCrossEntropyLoss())

# Config early stop 
mdl.add_early_stop(25)

# save config 
mdl.set_save_config(model_name='FFNN_top_homebrew', save_path=os.path.join('models'))

# Defining architecture 
mdl.set_sequence([
                    LinearLayer(32*32, 1024),
                    ReLU(),
                    Dropout(config['dropout']),
                    LinearLayer(1024, 512), 
                    ReLU(),
                    Dropout(config['dropout']),
                    LinearLayer(512, 39),
                    Softmax()
                ])


with mlflow.start_run():
        mlflow.log_params(config)
        mdl.train_with_loader(train_loader, epochs=config['max_epochs'], validation_loader=validation_loader, cls_count=39, flatten_input=True)

=== Epoch: 1 ===
Step: 0/865, accuracy0.047, loss4.201, learning rate 0.0010000 
Step: 100/865, accuracy0.078, loss3.362, learning rate 0.0009901 
Step: 200/865, accuracy0.234, loss3.145, learning rate 0.0009804 
Step: 300/865, accuracy0.141, loss3.415, learning rate 0.0009709 
Step: 400/865, accuracy0.203, loss3.305, learning rate 0.0009615 
Step: 500/865, accuracy0.109, loss3.499, learning rate 0.0009524 
Step: 600/865, accuracy0.062, loss3.463, learning rate 0.0009434 
Step: 700/865, accuracy0.109, loss3.182, learning rate 0.0009346 
Step: 800/865, accuracy0.156, loss3.321, learning rate 0.0009259 
Step: 864/865, accuracy0.065, loss3.521, learning rate 0.0009205 
Epoch: 1/200, accuracy0.149, loss3.305, learning rate 0.001
Estimated reamining runtime: 7:47:15.439634
--Validation--
Validation : Accuracy: 0.230, Loss: 2.935
=== Epoch: 2 ===
Step: 0/865, accuracy0.094, loss3.410, learning rate 0.0009204 
Step: 100/865, accuracy0.203, loss3.033, learning rate 0.0009120 
Step: 200/865, ac

### Training pytorch Mode

In [63]:
#### Net and training function 
class PlantDiseaseNet(nn.Module):
    def __init__(self, input_size=1024, l1=1024, l2=512, output_size=39, dropout_p=0.5):
        super(PlantDiseaseNet, self).__init__()
        self.fc1 = nn.Linear(input_size, l1)
        self.fc2 = nn.Linear(l1, l2)
        self.fc3 = nn.Linear(l2, output_size)
        self.dropout = nn.Dropout(dropout_p)

    def forward(self, x):
        x = x.view(x.shape[0], -1)
        x = F.relu(self.fc1(x))
        x = self.dropout(x)
        x = F.relu(self.fc2(x))
        x = self.dropout(x)
        x = F.log_softmax(self.fc3(x), dim=1)
        return x

In [64]:
def train(model, train_loader, validation_loader, config, n_epochs=10, stopping_treshold=None):

    if torch.cuda.is_available():
        print('CUDA is available!  Training on GPU ...')
        model.cuda()


    # Loss and optimizer setup 
    criterion = nn.NLLLoss()
    optimizer = optim.Adam(model.parameters(), lr=config['learning_rate'])

    # Setting minimum validation loss to inf
    validation_loss_minimum = np.Inf 
    train_loss_history = []
    validation_loss_history = []

    for epoch in range(1, n_epochs +1):

        training_loss = 0.0
        validation_loss = 0.0

        # Training loop
        training_accuracies = []
        for X, y in train_loader:
            
            # Moving data to gpu if using 
            if torch.cuda.is_available():
                X, y = X.cuda(), y.cuda()
            
            # clear the gradients of all optimized variables
            optimizer.zero_grad()
            # forward pass: compute predicted outputs by passing inputs to the model
            output = model(X)
            # calculate the batch loss
            loss = criterion(output, y)
            # backward pass: compute gradient of the loss with respect to model parameters
            loss.backward()
            # perform a single optimization step (parameter update)
            optimizer.step()
            # update training loss
            training_loss += loss.item()*X.size(0)

            # calculating accuracy
            ps = torch.exp(output)
            top_p, top_class = ps.topk(1, dim=1)
            equals = top_class == y.view(*top_class.shape)
            training_accuracies.append(torch.mean(equals.type(torch.FloatTensor)).item())

        # Validation Loop
        with torch.no_grad():
            accuracies = []
            for X, y in validation_loader:

                # Moving data to gpu if using 
                if torch.cuda.is_available():
                    X, y = X.cuda(), y.cuda()
                # forward pass: compute predicted outputs by passing inputs to the model
                output = model(X)
                # calculate the batch loss
                loss = criterion(output, y)
                # update validation loss
                validation_loss += loss.item()*X.size(0)

                # calculating accuracy
                ps = torch.exp(output)
                top_p, top_class = ps.topk(1, dim=1)
                equals = top_class == y.view(*top_class.shape)
                accuracies.append(torch.mean(equals.type(torch.FloatTensor)).item())
                
        # Mean loss 
        mean_training_loss = training_loss/len(train_loader.sampler)
        mean_validation_loss = validation_loss/len(validation_loader.sampler)
        mean_train_accuracy = sum(training_accuracies)/len(training_accuracies)
        mean_accuracy = sum(accuracies)/len(accuracies)
        train_loss_history.append(mean_training_loss)
        validation_loss_history.append(mean_validation_loss)

        # Printing epoch stats
        print(f'Epoch: {epoch}/{n_epochs}, ' +\
              f'Training Loss: {mean_training_loss:.3f}, '+\
              f'Train accuracy {mean_train_accuracy:.3f} ' +\
              f'Validation Loss: {mean_validation_loss:.3f}, '+\
              f'Validation accuracy {mean_accuracy:.3f}')

        # logging with mlflow 
        if mlflow.active_run():
            mlflow.log_metric('loss', mean_training_loss, step=epoch)
            mlflow.log_metric('accuracy', mean_train_accuracy, step=epoch)
            mlflow.log_metric('validation_accuracy', mean_accuracy, step=epoch)
            mlflow.log_metric('validation_loss', mean_validation_loss, step=epoch)

        # Testing for early stopping
        # Testing for early stopping
        if stopping_treshold:
            if mean_validation_loss < validation_loss_minimum:
                validation_loss_minimum = mean_validation_loss
                print('New minimum validation loss (saving model)')
                save_pth = os.path.join('models',f'{config["name"]}.pt')
                torch.save(model.state_dict(), save_pth)
            elif len([v for v in validation_loss_history[-stopping_treshold:] if v > validation_loss_minimum]) >= stopping_treshold:
                print(f"Stopping early at epoch: {epoch}/{n_epochs}")
                break

### Training Pytorch models

In [65]:
# getting configs
# # Getting Configs 
par = pt_runs[0].data.params
config = {'data_split': par['data_split'],
        'decay': np.float64(par['decay']),
        'dropout': np.float64(par['dropout']),
        'framework': par['framework'],
        'learning_rate': np.float64(par['learning_rate']),
        'max_epochs': int(par['max_epochs']),
        'resolution': int(par['resolution']),
        'type': par['type'],
        'name': 'top_pytorch'} 

In [66]:
# Set up data loaders
train_loader = torch.utils.data.DataLoader(train_data, batch_size=64, shuffle=True)
validation_loader = torch.utils.data.DataLoader(test_data, batch_size=64, shuffle=True)

# Initializing the model
mdl = PlantDiseaseNet(input_size=config['resolution']**2, dropout_p=config['dropout'])
print("Starting training on network: \n", mdl)

mlflow.set_experiment("Plant Leaf Disease")
with mlflow.start_run():
    mlflow.log_params(config)
    tlh, vlh = train(mdl, train_loader, validation_loader, config, n_epochs=config['max_epochs'], stopping_treshold=50)

Starting training on network: 
 PlantDiseaseNet(
  (fc1): Linear(in_features=1024, out_features=1024, bias=True)
  (fc2): Linear(in_features=1024, out_features=512, bias=True)
  (fc3): Linear(in_features=512, out_features=39, bias=True)
  (dropout): Dropout(p=0.2, inplace=False)
)
CUDA is available!  Training on GPU ...
Epoch: 1/200, Training Loss: 3.110, Train accuracy 0.184 Validation Loss: 2.768, Validation accuracy 0.264
New minimum validation loss (saving model)
Epoch: 2/200, Training Loss: 2.916, Train accuracy 0.221 Validation Loss: 2.621, Validation accuracy 0.301
New minimum validation loss (saving model)
Epoch: 3/200, Training Loss: 2.835, Train accuracy 0.240 Validation Loss: 2.465, Validation accuracy 0.324
New minimum validation loss (saving model)
Epoch: 4/200, Training Loss: 2.767, Train accuracy 0.250 Validation Loss: 2.389, Validation accuracy 0.337
New minimum validation loss (saving model)
Epoch: 5/200, Training Loss: 2.745, Train accuracy 0.256 Validation Loss: 2.38

TypeError: cannot unpack non-iterable NoneType object