<a href="https://colab.research.google.com/github/Clint07-datascientist/ML_Pipeline_Summative/blob/main/notebook/ml_pipeline.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
# Importing all necessary libraries
import os
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.applications import MobileNetV2
from tensorflow.keras.applications.mobilenet_v2 import preprocess_input
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import GlobalAveragePooling2D, Dense, Dropout
from sklearn.metrics import classification_report, confusion_matrix


In [10]:
# Dataset structure: data/train/<class_name>/*.jpg and data/test/<class_name>/*.jpg
train_dir = "/content/ML_Pipeline_Summative/data/train_set"
test_dir = "/content/ML_Pipeline_Summative/data/test_set"

classes = ['fall_armyworm', 'grasshopper', 'healthy', 'leaf_beetle',
           'leaf_blight', 'leaf_spot', 'streak_virus']

IMG_SIZE = 224
BATCH_SIZE = 32
EPOCHS = 10

In [11]:
# Data Generators
train_datagen = ImageDataGenerator(
    preprocessing_function=preprocess_input,
    rotation_range=20,
    zoom_range=0.2,
    horizontal_flip=True
)

test_datagen = ImageDataGenerator(
    preprocessing_function=preprocess_input
)

train_gen = train_datagen.flow_from_directory(
    train_dir,
    target_size=(IMG_SIZE, IMG_SIZE),
    batch_size=BATCH_SIZE,
    class_mode='categorical'
)

test_gen = test_datagen.flow_from_directory(
    test_dir,
    target_size=(IMG_SIZE, IMG_SIZE),
    batch_size=BATCH_SIZE,
    class_mode='categorical',
    shuffle=False
)


Found 19426 images belonging to 7 classes.
Found 5125 images belonging to 7 classes.


In [12]:
# Defining the model
base_model = MobileNetV2(
    include_top=False,
    weights='imagenet',
    input_shape=(IMG_SIZE, IMG_SIZE, 3)
)
base_model.trainable = False  # Freeze base

model = Sequential([
    base_model,
    GlobalAveragePooling2D(),
    Dropout(0.3),
    Dense(128, activation='relu'),
    Dropout(0.3),
    Dense(len(classes), activation='softmax')
])

model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
model.summary()


Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/mobilenet_v2/mobilenet_v2_weights_tf_dim_ordering_tf_kernels_1.0_224_no_top.h5
[1m9406464/9406464[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step


In [None]:
# Model Training
history = model.fit(
    train_gen,
    validation_data=test_gen,
    epochs=EPOCHS
)

  self._warn_if_super_not_called()


Epoch 1/10
[1m608/608[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1140s[0m 2s/step - accuracy: 0.6098 - loss: 1.0293 - val_accuracy: 0.7093 - val_loss: 0.7418
Epoch 2/10
[1m608/608[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1129s[0m 2s/step - accuracy: 0.7372 - loss: 0.6863 - val_accuracy: 0.7348 - val_loss: 0.6668
Epoch 3/10
[1m608/608[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1117s[0m 2s/step - accuracy: 0.7448 - loss: 0.6449 - val_accuracy: 0.7339 - val_loss: 0.6457
Epoch 4/10
[1m608/608[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1113s[0m 2s/step - accuracy: 0.7584 - loss: 0.6254 - val_accuracy: 0.7294 - val_loss: 0.6598
Epoch 5/10
[1m608/608[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1154s[0m 2s/step - accuracy: 0.7574 - loss: 0.6201 - val_accuracy: 0.7266 - val_loss: 0.6471
Epoch 6/10
[1m608/608[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1102s[0m 2s/step - accuracy: 0.7709 - loss: 0.5910 - val_accuracy: 0.7489 - val_loss: 0.6034
Epoch 7/10
[1m6

In [None]:
# Saving the model
model.save("/content/ML_Pipeline_Summative/models/maize_model.h5")


In [None]:
# Plot accruracy and loss
plt.figure(figsize=(14, 5))

# Accuracy
plt.subplot(1, 2, 1)
plt.plot(history.history['accuracy'], label='Train Accuracy')
plt.plot(history.history['val_accuracy'], label='Validation Accuracy')
plt.title("Model Accuracy")
plt.xlabel("Epochs")
plt.ylabel("Accuracy")
plt.legend()

# Loss
plt.subplot(1, 2, 2)
plt.plot(history.history['loss'], label='Train Loss')
plt.plot(history.history['val_loss'], label='Validation Loss')
plt.title("Model Loss")
plt.xlabel("Epochs")
plt.ylabel("Loss")
plt.legend()

plt.tight_layout()
plt.show()


In [None]:
# Evaluating the model using confucion matrix
predictions = model.predict(test_gen)
y_pred = np.argmax(predictions, axis=1)
y_true = test_gen.classes
label_names = list(test_gen.class_indices.keys())

print(classification_report(y_true, y_pred, target_names=label_names))

cm = confusion_matrix(y_true, y_pred)

plt.figure(figsize=(10, 8))
sns.heatmap(cm, annot=True, fmt="d", cmap="Blues", xticklabels=label_names, yticklabels=label_names)
plt.title("Confusion Matrix")
plt.xlabel("Predicted")
plt.ylabel("True")
plt.show()


In [None]:
# Predicting on a single image
from tensorflow.keras.preprocessing import image

def predict_single(img_path):
    img = image.load_img(img_path, target_size=(IMG_SIZE, IMG_SIZE))
    img_array = image.img_to_array(img)
    img_array = preprocess_input(img_array)
    img_array = np.expand_dims(img_array, axis=0)

    prediction = model.predict(img_array)
    predicted_class = label_names[np.argmax(prediction)]
    confidence = np.max(prediction)

    plt.imshow(img)
    plt.title(f"Prediction: {predicted_class} ({confidence:.2f})")
    plt.axis("off")
    plt.show()

# Example
predict_single("../data/test/leaf_blight/sample.jpg")


In [5]:
!ls /content/ML_Pipeline_Summative

activate_env.bat  api  data  notebook  README.md  requirements.txt  src  ui
