## Abstract
Despite the fact that the agricultural sector is a national economic development priority in sub-Saharan Africa, crop pests and diseases have been the challenge affecting major food security crops like maize. 
Maize Leaf Blight, also known as Northern Corn Leaf Blight has become a menace in low land agro-ecologies, during the last decade. On the other hand, according to research, Maize Streak Disease which is caused by the Maize Streak Virus is regarded as the third most serious disease affecting maize in sub-Saharan Africa. 
The prominence of these diseases has greatly affected the yields of Africa’s most important food crop. 

## Classes
<div style="display: flex; justify-content: center;">
    <div style="text-align: center; margin-right: 20px;">
        <p>Healthy</p>
        <img src="sample/1621590060253.jpg" alt="Image 1" style="width: 200;">
    </div>
    <div style="text-align: center;">
        <p>Maize Streak Disease</p>
        <img src="sample/1621319276554.jpg" alt="Image 2" style="width: 200;">
    </div>
</div>


In [19]:
from keras.layers import Dense, Flatten
from keras.models import Model, load_model
from keras.applications.vgg16 import VGG16
from keras.preprocessing.image import ImageDataGenerator
from keras import optimizers
import keras
import numpy as np
import os
import json
import mlflow
import mlflow.keras
from urllib.parse import urlparse


import warnings
warnings.filterwarnings("ignore", category=FutureWarning)

In [3]:
# params
params = {
    "LEARNING_RATE": 0.01,
    "CLASSES": 2,
    "EPOCHS": 8,
    "INCLUDE_TOP": False,
    "BATCH_SIZE": 16,
    "WEIGHTS": "imagenet",
    "IMAGE_SIZE": [224, 224, 3],
    "DECAY": 1e-6,
    "MOMENTUM": 0.9
}

# mlflow configs
MLFLOW_TRACKING_URI = "https://dagshub.com/kalema3502/vggnet-transfer-learning-for-msv.mlflow"
MLFLOW_TRACKING_USERNAME = "kalema3502"
MLFLOW_TRACKING_PASSWORD = "fb3845efcc3b2e46a4157b1d2c977a21e02dd16e"

In [4]:
# data path
DATA = "data/"

In [5]:
# base model
base_model = VGG16(input_shape=params['IMAGE_SIZE'], weights=params['WEIGHTS'], include_top=params['INCLUDE_TOP'])


In [6]:
# base model summary
base_model.summary()

Model: "vgg16"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_1 (InputLayer)        [(None, 224, 224, 3)]     0         
                                                                 
 block1_conv1 (Conv2D)       (None, 224, 224, 64)      1792      
                                                                 
 block1_conv2 (Conv2D)       (None, 224, 224, 64)      36928     
                                                                 
 block1_pool (MaxPooling2D)  (None, 112, 112, 64)      0         
                                                                 
 block2_conv1 (Conv2D)       (None, 112, 112, 128)     73856     
                                                                 
 block2_conv2 (Conv2D)       (None, 112, 112, 128)     147584    
                                                                 
 block2_pool (MaxPooling2D)  (None, 56, 56, 128)       0     

In [7]:
# freeze hidden layers
for layer in base_model.layers:
    layer.trainable = False

In [8]:
# create custom output(dense) layer
output = Dense(params['CLASSES'], activation="softmax") (Flatten() (base_model.output))

In [9]:
# custom model
new_model = Model(inputs=base_model.input, outputs=output)
new_model.compile(
            optimizer=optimizers.SGD(learning_rate=params['LEARNING_RATE'], decay=params['DECAY'], momentum=params['MOMENTUM']),
            loss=keras.losses.CategoricalCrossentropy(),
            metrics=["accuracy"]
        )
new_model.summary()

Model: "model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_1 (InputLayer)        [(None, 224, 224, 3)]     0         
                                                                 
 block1_conv1 (Conv2D)       (None, 224, 224, 64)      1792      
                                                                 
 block1_conv2 (Conv2D)       (None, 224, 224, 64)      36928     
                                                                 
 block1_pool (MaxPooling2D)  (None, 112, 112, 64)      0         
                                                                 
 block2_conv1 (Conv2D)       (None, 112, 112, 128)     73856     
                                                                 
 block2_conv2 (Conv2D)       (None, 112, 112, 128)     147584    
                                                                 
 block2_pool (MaxPooling2D)  (None, 56, 56, 128)       0     

In [10]:
# Augmentation
datagenerator_kwargs = dict(
            rescale=1./255,
            validation_split=0.20
        )
dataflow_kwargs = dict(
            target_size=params['IMAGE_SIZE'][:-1],
            batch_size=params['BATCH_SIZE'],
            interpolation="bilinear"
        )
train_datagenerator = ImageDataGenerator(
                rotation_range=40,
                horizontal_flip=True,
                width_shift_range=20,
                height_shift_range=20,
                shear_range=0.2,
                zoom_range=0.2,
                **datagenerator_kwargs
            )
validation_datagenerator = ImageDataGenerator(
               **datagenerator_kwargs
            )



In [11]:
# train test sets
train_set = train_datagenerator.flow_from_directory(
            directory=DATA,
            subset='training',
            shuffle=True,
            class_mode="categorical",
            **dataflow_kwargs
        )
validation_set = validation_datagenerator.flow_from_directory(
            directory=DATA,
            subset='validation',
            shuffle=True,
            class_mode="categorical",
            **dataflow_kwargs
        )

Found 960 images belonging to 2 classes.
Found 240 images belonging to 2 classes.


In [11]:
# training
from datetime import datetime
from keras.callbacks import ModelCheckpoint, LearningRateScheduler, ReduceLROnPlateau

os.makedirs('artifacts/', exist_ok=True)

steps_per_epoch = train_set.samples // train_set.batch_size
validation_steps = validation_set.samples // validation_set.batch_size

lr_reducer = ReduceLROnPlateau(factor=np.sqrt(0.1),
                               cooldown=0,
                               patience=5,
                               min_lr=0.5e-5)

checkpoint = ModelCheckpoint(filepath='artifacts/model.h5',
                             verbose=1,
                             save_best_only=True)

callbacks = [checkpoint, lr_reducer]
start = datetime.now()

new_model.fit_generator(generator=train_set,
                        validation_data=validation_set,
                        epochs=params['EPOCHS'],
                        steps_per_epoch=steps_per_epoch,
                        validation_steps=validation_steps,
                        callbacks=callbacks,
                        verbose=1)

duration = datetime.now() - start
print(f"Training completed in time: {duration}")

  new_model.fit_generator(generator=train_set,


Epoch 1/8


2024-04-25 09:07:44.689819: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'Placeholder/_0' with dtype int32
	 [[{{node Placeholder/_0}}]]
2024-04-25 09:07:47.035976: W tensorflow/tsl/framework/cpu_allocator_impl.cc:83] Allocation of 205520896 exceeds 10% of free system memory.
2024-04-25 09:07:47.475249: W tensorflow/tsl/framework/cpu_allocator_impl.cc:83] Allocation of 205520896 exceeds 10% of free system memory.
2024-04-25 09:07:48.834124: W tensorflow/tsl/framework/cpu_allocator_impl.cc:83] Allocation of 51380224 exceeds 10% of free system memory.
2024-04-25 09:07:48.953045: W tensorflow/tsl/framework/cpu_allocator_impl.cc:83] Allocation of 102760448 exceeds 10% of free system memory.
2024-04-25 09:07:49.492470: W tensorflow/tsl/framework/cpu_allocator_impl.cc:83] Allocation of 102760448 exceeds 10



2024-04-25 09:15:10.988089: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'Placeholder/_0' with dtype int32
	 [[{{node Placeholder/_0}}]]



Epoch 1: val_loss improved from inf to 1.51998, saving model to artifacts/model.h5
Epoch 2/8
Epoch 2: val_loss did not improve from 1.51998
Epoch 3/8
Epoch 3: val_loss did not improve from 1.51998
Epoch 4/8
Epoch 4: val_loss did not improve from 1.51998
Epoch 5/8
Epoch 5: val_loss improved from 1.51998 to 1.48110, saving model to artifacts/model.h5
Epoch 6/8
Epoch 6: val_loss did not improve from 1.48110
Epoch 7/8
Epoch 7: val_loss improved from 1.48110 to 1.25932, saving model to artifacts/model.h5
Epoch 8/8
Epoch 8: val_loss did not improve from 1.25932
Training completed in time: 1:52:42.307205


In [12]:
# model evaluation
best_model = load_model('artifacts/model.h5')

results = best_model.evaluate(validation_set)

scores = {'loss': results[0], 'accuracy': results[1]}

try:
    with open('scores.json', 'w') as json_file:
        json.dump(scores, json_file)
except IOError as e:
    raise IOError(f"An error occurred while writing to the file: {e}")

2024-04-25 12:10:40.221375: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'Placeholder/_0' with dtype int32
	 [[{{node Placeholder/_0}}]]
2024-04-25 12:10:41.138712: W tensorflow/tsl/framework/cpu_allocator_impl.cc:83] Allocation of 205520896 exceeds 10% of free system memory.
2024-04-25 12:10:41.441395: W tensorflow/tsl/framework/cpu_allocator_impl.cc:83] Allocation of 205520896 exceeds 10% of free system memory.
2024-04-25 12:10:42.631832: W tensorflow/tsl/framework/cpu_allocator_impl.cc:83] Allocation of 102760448 exceeds 10% of free system memory.
2024-04-25 12:10:43.170877: W tensorflow/tsl/framework/cpu_allocator_impl.cc:83] Allocation of 102760448 exceeds 10% of free system memory.


 1/15 [=>............................] - ETA: 1:58 - loss: 6.1094e-07 - accuracy: 1.0000

2024-04-25 12:10:48.655310: W tensorflow/tsl/framework/cpu_allocator_impl.cc:83] Allocation of 205520896 exceeds 10% of free system memory.




In [13]:
# mlflow configs
os.environ["MLFLOW_TRACKING_URI"]= MLFLOW_TRACKING_URI
os.environ["MLFLOW_TRACKING_USERNAME"]= MLFLOW_TRACKING_USERNAME
os.environ["MLFLOW_TRACKING_PASSWORD"]= MLFLOW_TRACKING_PASSWORD

In [14]:
# experiment tracking
mlflow.set_registry_uri(MLFLOW_TRACKING_URI)
tracking_url_type_store= urlparse(mlflow.get_tracking_uri()).scheme

with mlflow.start_run():
    mlflow.log_params(params)
    mlflow.log_metrics(
        {"loss": results[0], "accuracy": results[1]}
    )
    
    if tracking_url_type_store != "file":
        mlflow.keras.log_model(best_model, "model", registered_model_name="VGG16Model")
    else:
        mlflow.keras.log_model(best_model, "model")



INFO:tensorflow:Assets written to: /tmp/tmpe20yp8i3/model/data/model/assets


INFO:tensorflow:Assets written to: /tmp/tmpe20yp8i3/model/data/model/assets
Registered model 'VGG16Model' already exists. Creating a new version of this model...
2024/04/25 11:07:02 INFO mlflow.tracking._model_registry.client: Waiting up to 300 seconds for model version to finish creation. Model name: VGG16Model, version 2
Created version '2' of model 'VGG16Model'.
