<a href="https://colab.research.google.com/github/Braingix/puter/blob/main/grain-pest-detection.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import requests
import os

data_url = "https://prod-dcd-datasets-cache-zipfiles.s3.eu-west-1.amazonaws.com/bwh3zbpkpv-1.zip"

def download_file(url, filename):
    response = requests.get(url, stream=True)
    with open(filename, 'wb') as f:
        for chunk in response.iter_content(chunk_size=8192):
            if chunk:
                f.write(chunk)
    print(f"Downloaded {filename}, size: {os.path.getsize(filename) / (1024**3):.2f} GB")

download_file(data_url, "drive/MyDrive/CCMT_data.zip")

Downloaded drive/MyDrive/CCMT_data.zip, size: 7.86 GB


In [2]:
import os
import zipfile

def unzip_file(zip_path, extract_path):
    with zipfile.ZipFile(zip_path, 'r') as zip_ref:
        zip_ref.extractall(extract_path)
    print(f"Unzipped {zip_path} to {extract_path}")

unzip_file("datasets.zip", ".")

Unzipped datasets.zip to .


In [None]:
import os
import shutil

# Define base paths
raw_base = "drive/MyDrive/CCMT_dataset/Dataset for Crop Pest and Disease Detection/Raw Data/CCMT Dataset/Maize"
aug_base = "drive/MyDrive/CCMT_dataset/Dataset for Crop Pest and Disease Detection/CCMT Dataset-Augmented/Maize"
raw_dest = "drive/MyDrive/CCMT_dataset/raw_maize_subset"
aug_dest = "drive/MyDrive/CCMT_dataset/aug_maize_subset"

# Create directories
for dest in [raw_dest, aug_dest]:
    os.makedirs(os.path.join(dest, "infested"), exist_ok=True)
    os.makedirs(os.path.join(dest, "not_infested"), exist_ok=True)

# Filter raw data
for folder in ["fall armyworm", "grasshopper"]:
    src = os.path.join(raw_base, folder)
    if os.path.exists(src):
        for img in os.listdir(src):
            shutil.copy(os.path.join(src, img), os.path.join(raw_dest, "infested"))
src = os.path.join(raw_base, "healthy")
if os.path.exists(src):
    for img in os.listdir(src):
        shutil.copy(os.path.join(src, img), os.path.join(raw_dest, "not_infested"))

# Filter augmented data (train and test sets)
for split in ["train_set", "test_set"]:
    aug_maize_base = os.path.join(aug_base, split)
    if os.path.exists(aug_maize_base):
        for folder in ["fall armyworm", "grasshopper"]:
            src = os.path.join(aug_maize_base, folder)
            if os.path.exists(src):
                for img in os.listdir(src):
                    shutil.copy(os.path.join(src, img), os.path.join(aug_dest, "infested"))
        src = os.path.join(aug_maize_base, "healthy")
        if os.path.exists(src):
            for img in os.listdir(src):
                shutil.copy(os.path.join(src, img), os.path.join(aug_dest, "not_infested"))

# Count and size
for dest, name in [(raw_dest, "Raw"), (aug_dest, "Augmented")]:
    infested_count = len(os.listdir(os.path.join(dest, "infested")))
    not_infested_count = len(os.listdir(os.path.join(dest, "not_infested")))
    total_size = sum(os.path.getsize(os.path.join(root, f)) for root, _, files in os.walk(dest) for f in files)
    print(f"{name} - Infested: {infested_count}, Not Infested: {not_infested_count}, Size: {total_size / (1024**3):.2f} GB")


Raw - Infested: 285, Not Infested: 208, Size: 0.02 GB
Augmented - Infested: 1424, Not Infested: 1041, Size: 0.15 GB


In [15]:
import os
import shutil

# Define base path for test set
aug_base = "drive/MyDrive/CCMT_dataset/Dataset for Crop Pest and Disease Detection/CCMT Dataset-Augmented/Maize/test_set"
test_dest = "test_maize_subset"
os.makedirs(os.path.join(test_dest, "infested"), exist_ok=True)
os.makedirs(os.path.join(test_dest, "not_infested"), exist_ok=True)

# Filter test set
for folder in ["fall armyworm", "grasshopper"]:
    src = os.path.join(aug_base, folder)
    if os.path.exists(src):
        for img in os.listdir(src):
            shutil.copy(os.path.join(src, img), os.path.join(test_dest, "infested"))
src = os.path.join(aug_base, "healthy")
if os.path.exists(src):
    for img in os.listdir(src):
        shutil.copy(os.path.join(src, img), os.path.join(test_dest, "not_infested"))

# Count and size
infested_count = len(os.listdir(os.path.join(test_dest, "infested")))
not_infested_count = len(os.listdir(os.path.join(test_dest, "not_infested")))
total_size = sum(os.path.getsize(os.path.join(root, f)) for root, _, files in os.walk(test_dest) for f in files)
print(f"Test Set - Infested: {infested_count}, Not Infested: {not_infested_count}, Size: {total_size / (1024**3):.2f} GB")

Test Set - Infested: 0, Not Infested: 0, Size: 0.00 GB


In [None]:
from tensorflow.keras.preprocessing.image import ImageDataGenerator

aug_data_dir = "datasets/augmented/train_set"
aug_datagen = ImageDataGenerator(rescale=1./255, validation_split=0.3)

aug_train_generator = aug_datagen.flow_from_directory(
    aug_data_dir, target_size=(224, 224), batch_size=32, class_mode='binary', subset='training'
)
aug_val_generator = aug_datagen.flow_from_directory(
    aug_data_dir, target_size=(224, 224), batch_size=32, class_mode='binary', subset='validation'
)

Found 1726 images belonging to 2 classes.
Found 739 images belonging to 2 classes.


In [4]:
import tensorflow as tf
from tensorflow.keras.applications import MobileNetV2
from tensorflow.keras.layers import Dense, GlobalAveragePooling2D
from tensorflow.keras.models import Model
from tensorflow.keras.preprocessing.image import ImageDataGenerator

# Define paths
train_dir = 'datasets/augmented/train_set'

# Data generator for training
train_datagen = ImageDataGenerator(
    rescale=1./255,
    validation_split=0.2  # 20% of train set for validation
)

train_generator = train_datagen.flow_from_directory(
    train_dir,
    target_size=(224, 224),
    batch_size=32,
    class_mode='binary',
    subset='training'
)
val_generator = train_datagen.flow_from_directory(
    train_dir,
    target_size=(224, 224),
    batch_size=32,
    class_mode='binary',
    subset='validation'
)

# Build model
base_model = MobileNetV2(weights='imagenet', include_top=False, input_shape=(224, 224, 3))
base_model.trainable = False

x = base_model.output
x = GlobalAveragePooling2D()(x)
x = Dense(128, activation='relu')(x)
predictions = Dense(1, activation='sigmoid')(x)
model = Model(inputs=base_model.input, outputs=predictions)

# Compile and train
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
history = model.fit(train_generator, validation_data=val_generator, epochs=10)

# Save model
model.save('maize_pest_model_augmented.keras')
model.save('maize_pest_model_augmented.h5')
print("Model saved")

Found 3636 images belonging to 2 classes.
Found 909 images belonging to 2 classes.
Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/mobilenet_v2/mobilenet_v2_weights_tf_dim_ordering_tf_kernels_1.0_224_no_top.h5
[1m9406464/9406464[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step


  self._warn_if_super_not_called()


Epoch 1/10
[1m114/114[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m217s[0m 2s/step - accuracy: 0.9245 - loss: 0.1848 - val_accuracy: 0.9879 - val_loss: 0.0319
Epoch 2/10
[1m114/114[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m266s[0m 2s/step - accuracy: 0.9930 - loss: 0.0233 - val_accuracy: 0.9923 - val_loss: 0.0273
Epoch 3/10
[1m114/114[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m209s[0m 2s/step - accuracy: 0.9971 - loss: 0.0106 - val_accuracy: 0.9912 - val_loss: 0.0270
Epoch 4/10
[1m114/114[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m215s[0m 2s/step - accuracy: 0.9995 - loss: 0.0059 - val_accuracy: 0.9824 - val_loss: 0.0517
Epoch 5/10
[1m114/114[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m253s[0m 2s/step - accuracy: 0.9993 - loss: 0.0037 - val_accuracy: 0.9923 - val_loss: 0.0293
Epoch 6/10
[1m114/114[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m200s[0m 2s/step - accuracy: 0.9963 - loss: 0.0080 - val_accuracy: 0.9879 - val_loss: 0.0334
Epoch 7/10
[1m114/114



Model saved


Raw dataset

In [5]:
# Define test path
test_dir = 'datasets/augmented/test_set'

# Data generator for testing (no augmentation)
test_datagen = ImageDataGenerator(rescale=1./255)
test_generator = test_datagen.flow_from_directory(
    test_dir,
    target_size=(224, 224),
    batch_size=32,
    class_mode='binary',
    shuffle=False
)

# Load model
model = tf.keras.models.load_model('maize_pest_model_augmented.h5')

# Evaluate
test_eval = model.evaluate(test_generator)
print(f"Test Loss: {test_eval[0]}, Test Accuracy: {test_eval[1]}")

# Detailed metrics
predictions = model.predict(test_generator)
predicted_classes = (predictions > 0.5).astype(int)
true_classes = test_generator.classes

from sklearn.metrics import classification_report, confusion_matrix
print("Classification Report:")
print(classification_report(true_classes, predicted_classes, target_names=['not_infested', 'infested']))
print("Confusion Matrix:")
print(confusion_matrix(true_classes, predicted_classes))

Found 906 images belonging to 2 classes.




[1m29/29[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m58s[0m 2s/step - accuracy: 0.9994 - loss: 0.0021
Test Loss: 0.003220242215320468, Test Accuracy: 0.9988962411880493
[1m29/29[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m45s[0m 1s/step
Classification Report:
              precision    recall  f1-score   support

not_infested       1.00      1.00      1.00       695
    infested       1.00      1.00      1.00       211

    accuracy                           1.00       906
   macro avg       1.00      1.00      1.00       906
weighted avg       1.00      1.00      1.00       906

Confusion Matrix:
[[694   1]
 [  0 211]]


In [None]:
base_model_raw = MobileNetV2(weights='imagenet', include_top=False, input_shape=(224, 224, 3))
base_model_raw.trainable = False

x = base_model_raw.output
x = GlobalAveragePooling2D()(x)
x = Dense(128, activation='relu')(x)
predictions = Dense(1, activation='sigmoid')(x)
raw_model = Model(inputs=base_model_raw.input, outputs=predictions)

raw_model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
raw_history = raw_model.fit(raw_train_generator, validation_data=raw_val_generator, epochs=10)
raw_eval = raw_model.evaluate(raw_val_generator)
print(f"Raw Model - Loss: {raw_eval[0]}, Accuracy: {raw_eval[1]}")
raw_model.save('maize_pest_model_raw.h5')

UnidentifiedImageError: cannot identify image file <_io.BytesIO object at 0x7bc2c09bbd80>

In [14]:
from google.colab import drive
drive.mount('/drive')

Mounted at /drive


In [None]:
import matplotlib.pyplot as plt

# Plot accuracy
plt.plot(aug_history.history['accuracy'], label='Augmented Train')
plt.plot(aug_history.history['val_accuracy'], label='Augmented Val')
plt.plot(raw_history.history['accuracy'], label='Raw Train')
plt.plot(raw_history.history['val_accuracy'], label='Raw Val')
plt.title('Model Accuracy Comparison')
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.legend()
plt.show()

# Plot loss
plt.plot(aug_history.history['loss'], label='Augmented Train')
plt.plot(aug_history.history['val_loss'], label='Augmented Val')
plt.plot(raw_history.history['loss'], label='Raw Train')
plt.plot(raw_history.history['val_loss'], label='Raw Val')
plt.title('Model Loss Comparison')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend()
plt.show()