<a href="https://colab.research.google.com/github/Tessellate-Imaging/monk_v1/blob/master/study_roadmaps/4_image_classification_zoo/Weed%20Species%20Classification%20-%20Hyperparameter%20Tuning%20using%20Monk.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Table of contents


## Install Monk


## Using pretrained model for classifying weed species types


## Training a classifier from scratch

# Install Monk

  - git clone https://github.com/Tessellate-Imaging/monk_v1.git

  - cd monk_v1/installation/Linux && pip install -r requirements_cu9.txt
        (Select the requirements file as per OS and CUDA version)

In [None]:
! git clone https://github.com/Tessellate-Imaging/monk_v1.git

In [None]:
# If using Colab install using the commands below
! cd monk_v1/installation/Misc && pip install -r requirements_colab.txt

# If using Kaggle uncomment the following command
#! cd monk_v1/installation/Misc && pip install -r requirements_kaggle.txt

# Select the requirements file as per OS and CUDA version when using a local system or cloud
#! cd monk_v1/installation/Linux && pip install -r requirements_cu9.txt

# Used trained classifier for demo

In [None]:
# Import monk

import os
import sys
sys.path.append("monk_v1/monk/");

In [None]:
# Download trained weights

In [None]:
! wget --load-cookies /tmp/cookies.txt "https://docs.google.com/uc?export=download&confirm=$(wget --save-cookies /tmp/cookies.txt --keep-session-cookies --no-check-certificate 'https://docs.google.com/uc?export=download&id=119FB2X_NrAPBBGuC6GwbgT3hQo2Whwy2' -O- | sed -rn 's/.*confirm=([0-9A-Za-z_]+).*/\1\n/p')&id=119FB2X_NrAPBBGuC6GwbgT3hQo2Whwy2" -O cls_weed_trained.zip && rm -rf /tmp/cookies.txt

In [None]:
! unzip -qq cls_weed_trained.zip 

In [None]:
ls workspace/Project-weed_species

In [None]:
# Gluon project
from gluon_prototype import prototype

In [None]:
# Load project in inference mode

gtf = prototype(verbose=1);
gtf.Prototype("Project-weed_species", "Gluon-Resnet18_v1", eval_infer=True);

#Other trained models - uncomment 
#gtf.Prototype("Project-weed_species", "Gluon-Resnet34_v1", eval_infer=True);
#gtf.Prototype("Project-weed_species", "Gluon-Resnet50_v1", eval_infer=True);

In [None]:
#Infer

In [None]:
img_name = "workspace/test/test1.jpg"
predictions = gtf.Infer(img_name=img_name);
from IPython.display import Image
Image(filename=img_name)

In [None]:
img_name = "workspace/test/test2.jpg"
predictions = gtf.Infer(img_name=img_name);
from IPython.display import Image
Image(filename=img_name)

# Training custom classifier from scratch

# Table of Contents


## [1. Load data](#1)


## [2. Try different base models on partial dataset](#2)


## [3. Analyse what batch size does the trick](#3)


## [4. Analyse which learning rate suites the best](#4)


## [5. Try out different optimizers on partial dataset](#5)


## [6. Train](#6)


## [7. Run inference on trained classifier](#7)

## Dataset - Weed species classification
    - https://github.com/AlexOlsen/DeepWeeds

In [None]:
! wget --load-cookies /tmp/cookies.txt "https://docs.google.com/uc?export=download&confirm=$(wget --save-cookies /tmp/cookies.txt --keep-session-cookies --no-check-certificate 'https://docs.google.com/uc?export=download&id=1FjwRgyJ426y9tsTkvT_dx3vUgumRp3lN' -O- | sed -rn 's/.*confirm=([0-9A-Za-z_]+).*/\1\n/p')&id=1FjwRgyJ426y9tsTkvT_dx3vUgumRp3lN" -O weed_data.zip && rm -rf /tmp/cookies.txt

In [None]:
! unzip -qq weed_data.zip

In [None]:
import pandas as pd
df = pd.read_csv("weed_data/train_labels.csv")

In [None]:
for i in range(len(df)):
    df["Species"][i] = df["Species"][i].replace(" ", "_");

In [None]:
df.to_csv("weed_data/train_labels_updated.csv", index=False)

## Imports

In [None]:
# Monk
import os
import sys
sys.path.append("monk_v1/monk/");

In [None]:
#Using mxnet-gluon backend 
from gluon_prototype import prototype

<a id='1'></a>
# Load Data

## Creating and managing experiments
    - Provide project name
    - Provide experiment name
    - For a specific data create a single project
    - Inside each project multiple experiments can be created
    - Every experiment can be have diferent hyper-parameters attached to it

In [None]:
gtf = prototype(verbose=1);
gtf.Prototype("project", "weed_species_classification");

### This creates files and directories as per the following structure
    
    
    workspace
        |
        |--------project
                    |
                    |
                    |-----weed_species_classification
                                |
                                |-----experiment-state.json
                                |
                                |-----output
                                        |
                                        |------logs (All training logs and graphs saved here)
                                        |
                                        |------models (all trained models saved here)

## Set dataset and select the model

In [None]:
gtf.Default(dataset_path="weed_data/images",
            path_to_csv="weed_data/train_labels_updated.csv",
            model_name="resnet18_v1", 
            freeze_base_network=False,
            num_epochs=10);

<a id='2'></a>
# Try different base models on partial dataset

In [None]:
gtf.List_Models();

In [None]:
# Analysis Project Name
analysis_name = "analyse_models";

# Models to analyse
# First element in the list- Model Name
# Second element in the list - Boolean value to freeze base network or not
# Third element in the list - Boolean value to use pretrained model as the starting point or not
models = [["resnet18_v1", True, True], ["resnet152_v2", False, True], 
          ["densenet121", True, True], ["densenet201", False, True],
          ["mobilenetv3_large", False, True], ["resnext101_64x4d", False, True]];  

# Num epochs for each experiment to run	
epochs=10;

# Percentage of original dataset to take in for experimentation
percent_data=5;

# "keep_all" - Keep all the sub experiments created
# "keep_non" - Delete all sub experiments created
analysis = gtf.Analyse_Models(analysis_name, models, percent_data, num_epochs=epochs, state="keep_none"); 

In [None]:
# Lets select densenet201
#As per results set the apropriate model
gtf.update_model_name("densenet201");
gtf.update_freeze_base_network(False);
gtf.update_use_pretrained(True);

gtf.Reload();

<a id='3'></a>
# Try different batch sizes on partial dataset

In [None]:
# Analysis Project Name
analysis_name = "analyse_batch_sizes";

# Batch sizes to explore - set the max as per the gpu limit
#batch size -64 takes in 14000 Mb of GPU RAM
batch_sizes = [4, 16, 32, 64];

# Num epochs for each experiment to run	
epochs = 10;

# Percentage of original dataset to take in for experimentation
percent_data = 10;

# "keep_all" - Keep all the sub experiments created
# "keep_non" - Delete all sub experiments created	
analysis = gtf.Analyse_Batch_Sizes(analysis_name, batch_sizes, percent_data, 
                                   num_epochs=epochs, state="keep_none"); 


In [None]:
#As per results set the apropriate batch size
gtf.update_batch_size(64);

gtf.Reload();

<a id='4'></a>
# Try different initial learning rates

In [None]:
# Analysis Project Name
analysis_name = "analyse_learning_rates"

# Learning rates to explore
lrs = [0.1, 0.05, 0.01, 0.005, 0.0001];

# Num epochs for each experiment to run
epochs=10

# Percentage of original dataset to take in for experimentation
percent_data=10

# "keep_all" - Keep all the sub experiments created
# "keep_non" - Delete all sub experiments created
analysis = gtf.Analyse_Learning_Rates(analysis_name, lrs, percent_data, num_epochs=epochs, state="keep_none"); 


In [None]:
#As per results set the apropriate learning rate
gtf.update_learning_rate(0.01);

gtf.Reload();

<a id='5'></a>
# Try different optimizers on partial dataset

In [None]:
# Analysis Project Name
analysis_name = "analyse_optimizers";

# Optimizers to explore
optimizers = ["sgd", "adam", "nesterov_adam", "adagrad"];   #Model name, learning rate

# Num epochs for each experiment to run	
epochs = 5;

# Percentage of original dataset to take in for experimentation
percent_data = 10;

# "keep_all" - Keep all the sub experiments created
# "keep_non" - Delete all sub experiments created
analysis = gtf.Analyse_Optimizers(analysis_name, optimizers, percent_data, num_epochs=epochs, state="keep_none"); 

In [None]:
#As per results set the appropriate 
gtf.optimizer_sgd(0.001);

gtf.Reload();

<a id='6'></a>
# Train

In [None]:
gtf.Train();

<a id='6'></a>
# Infer on Training Data

In [None]:
gtf = prototype(verbose=1);
gtf.Prototype("project", "weed_species_classification", eval_infer=True);

In [None]:
output = gtf.Infer(img_name = "weed_data/test/test1.jpg");
from IPython.display import Image
Image(filename='weed_data/test/test1.jpg') 

In [None]:
output = gtf.Infer(img_name = "weed_data/test/test2.jpg");
from IPython.display import Image
Image(filename='weed_data/test/test2.jpg') 