In [1]:
# Load packages we need
import sys
import os
import time

import numpy as np
import pandas as pd
import sklearn

import tensorflow as tf

from tensorflow.keras.datasets import fashion_mnist, cifar10, imdb

from sklearn.model_selection import train_test_split
from keras.utils import to_categorical
import keras


# import layers and callbacks we may use (may not be a complete list)
from tensorflow.keras.layers import Input, Flatten, Dense, Conv2D, MaxPooling2D, Dropout
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint, TensorBoard
from tensorflow.keras.models import Model, Sequential
from tensorflow.keras.preprocessing.image import ImageDataGenerator


from tensorflow.python.keras.utils import layer_utils


from matplotlib import pyplot as plt
plt.rcParams.update({'font.size': 14})

# Load the TensorBoard notebook extension
#%load_ext tensorboard


# Let's check our software versions
print('------------')
print('### Python version: ' + __import__('sys').version)
print('### NumPy version: ' + np.__version__)
print('### Scikit-learn version: ' + sklearn.__version__)
print('### Tensorflow version: ' + tf.__version__)
print('------------')

def var_exists(var_name):
    return (var_name in globals() or var_name in locals())
from PIL import Image
from keras.preprocessing.image import load_img, img_to_array


------------
### Python version: 3.11.5 (main, Sep 11 2023, 08:31:25) [Clang 14.0.6 ]
### NumPy version: 1.26.4
### Scikit-learn version: 1.4.2
### Tensorflow version: 2.16.1
------------


In [17]:
# Define the directory containing the data
data_dir = 'archive/Corn (Maize)'

# Define the categories (folder names)
categories = ['Cercospora Leaf Spot', 'Common Rust ', 'Healthy', 'Northern Leaf Blight']

# Initialize lists to store filenames and labels
filenames = []
labels = []

# Iterate over categories to gather filenames and labels
for category_id, category in enumerate(categories):
    category_dir = os.path.join(data_dir, 'Train', category)
    print(category_dir)
    for filename in os.listdir(category_dir):
        if filename.endswith('.jpg') or filename.endswith('.png'):  # Assuming images are jpg or png format
            filenames.append(os.path.join(category_dir, filename))
            labels.append(category_id)

# Create a DataFrame from filenames and labels
df = pd.DataFrame({'filename': filenames, 'label': labels})

# Display the first few rows of the DataFrame
print(df.head())

# Convert integer labels to one-hot encoded vectors
num_classes = len(categories)
labels = to_categorical(labels, num_classes=num_classes)

# Split the data into training and validation sets
train_filenames, aux_file, train_labels, aux_labels = train_test_split(
    filenames, labels, test_size=0.2, random_state=42)

val_filenames, test_filenames, val_labels, test_labels = train_test_split(
    aux_file, aux_labels, test_size=0.5, random_state=42)



# Define a function to load and preprocess images
def load_and_preprocess_image(image_path):
    # Load image from file
    img = load_img(image_path, target_size=(224, 224))
    # Convert image to numpy array
    img_array = img_to_array(img)
    # Preprocess the image (e.g., normalization)
    img_array /= 255.0  # Normalize pixel values to [0, 1]
    return img_array

# Load and preprocess training images
X_train = np.array([load_and_preprocess_image(filename) for filename in train_filenames])
y_train = train_labels

# Load and preprocess validation images
X_val = np.array([load_and_preprocess_image(filename) for filename in val_filenames])
y_val = val_labels

X_test = np.array([load_and_preprocess_image(filename) for filename in test_filenames])
y_test = test_labels


# Shuffle the training data
train_indices = np.random.permutation(len(X_train))
X_train = X_train[train_indices]
y_train = y_train[train_indices]

# Shuffle the val data
val_indices = np.random.permutation(len(X_val))
X_val = X_val[val_indices]
y_val = y_val[val_indices]
                  
                  
# Check the shapes of the training and validation data
print("Shape of X_train:", X_train.shape)
print("Shape of y_train:", y_train.shape)
print("Shape of X_val:", X_val.shape)
print("Shape of y_val:", y_val.shape)
print("Shape of X_test", X_test.shape)
print("Shape of y_test", y_test.shape)

archive/Corn (Maize)/Train/Cercospora Leaf Spot
archive/Corn (Maize)/Train/Common Rust 
archive/Corn (Maize)/Train/Healthy
archive/Corn (Maize)/Train/Northern Leaf Blight
                                            filename  label
0  archive/Corn (Maize)/Train/Cercospora Leaf Spo...      0
1  archive/Corn (Maize)/Train/Cercospora Leaf Spo...      0
2  archive/Corn (Maize)/Train/Cercospora Leaf Spo...      0
3  archive/Corn (Maize)/Train/Cercospora Leaf Spo...      0
4  archive/Corn (Maize)/Train/Cercospora Leaf Spo...      0
Shape of X_train: (1823, 224, 224, 3)
Shape of y_train: (1823, 4)
Shape of X_val: (228, 224, 224, 3)
Shape of y_val: (228, 4)
Shape of X_test (228, 224, 224, 3)
Shape of y_test (228, 4)


In [18]:
def create_compile_cnn(input_shape=[224, 224, 3], num_outputs=4, verbose=False):
    model = Sequential(name='CIFAR-10--CNN')
    
    model.add(Conv2D(16, kernel_size=(3, 3), input_shape=input_shape, activation='relu', 
                     padding='same', strides=(1, 1), kernel_initializer='lecun_uniform', name='conv1'))
    model.add(Conv2D(16, kernel_size=(3, 3), activation='relu', 
                     padding='same', strides=(1, 1), kernel_initializer='lecun_uniform', name='conv2'))
    model.add(MaxPooling2D((2, 2), name='MaxPool1'))
    
    model.add(Conv2D(32, kernel_size=(3, 3), activation='relu', 
                     padding='same', strides=(1, 1), kernel_initializer='lecun_uniform', name='conv3'))
    model.add(Conv2D(32, kernel_size=(3, 3), activation='relu', 
                     padding='same', strides=(1, 1), kernel_initializer='lecun_uniform', name='conv4'))
    model.add(MaxPooling2D((2, 2), name='MaxPool2'))
    
    model.add(Conv2D(64, kernel_size=(3, 3), activation='relu', 
                     padding='same', strides=(1, 1), kernel_initializer='lecun_uniform', name='conv5'))
    model.add(Conv2D(64, kernel_size=(3, 3), activation='relu', 
                     padding='same', strides=(1, 1), kernel_initializer='lecun_uniform', name='conv6'))
    model.add(MaxPooling2D((2, 2), name='MaxPool3'))
    
    model.add(Flatten(name='flatten'))
    
    model.add(Dense(64, activation='relu', kernel_initializer='lecun_uniform', name='dense1'))
    model.add(Dropout(0.25, name='drop1'))
    
    model.add(Dense(32, activation='relu', kernel_initializer='lecun_uniform', name='dense2'))
    model.add(Dropout(0.25, name='drop2'))
    
    model.add(Dense(num_outputs, activation='softmax', name='output'))
    
    opt = keras.optimizers.Adam(learning_rate=0.001)
    
    if verbose:
        model.summary()
    
    model.compile(loss='categorical_crossentropy', optimizer=opt, metrics=['accuracy'])

    return model

In [19]:
model = create_compile_cnn(verbose=True)

  super().__init__(


In [20]:
early_stop_cb = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)
max_epochs = 15
batch_size = 64
    
model.fit(X_train, y_train, validation_data=(X_val, y_val), epochs=max_epochs, batch_size=batch_size, 
                         shuffle=True, callbacks=[early_stop_cb])

Epoch 1/15
[1m29/29[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m47s[0m 2s/step - accuracy: 0.6335 - loss: 0.8265 - val_accuracy: 0.8289 - val_loss: 0.4137
Epoch 2/15
[1m29/29[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m42s[0m 1s/step - accuracy: 0.8764 - loss: 0.2861 - val_accuracy: 0.8947 - val_loss: 0.2865
Epoch 3/15
[1m29/29[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m37s[0m 1s/step - accuracy: 0.9052 - loss: 0.2087 - val_accuracy: 0.8991 - val_loss: 0.2169
Epoch 4/15
[1m29/29[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m37s[0m 1s/step - accuracy: 0.9289 - loss: 0.1861 - val_accuracy: 0.9518 - val_loss: 0.1442
Epoch 5/15
[1m29/29[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m36s[0m 1s/step - accuracy: 0.9447 - loss: 0.1467 - val_accuracy: 0.9649 - val_loss: 0.1228
Epoch 6/15
[1m29/29[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m38s[0m 1s/step - accuracy: 0.9524 - loss: 0.1327 - val_accuracy: 0.9737 - val_loss: 0.1262
Epoch 7/15
[1m29/29[0m [32m━━━━━━━━━━

<keras.src.callbacks.history.History at 0x1510396d0>

In [21]:
# Evaluate the model
test_loss, test_acc = model.evaluate(X_test, y_test)
print('Test accuracy:', test_acc)

[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 152ms/step - accuracy: 0.9719 - loss: 0.0811
Test accuracy: 0.9780701994895935


In [22]:
# Assuming you have already trained your model and have predictions
from sklearn.metrics import confusion_matrix

predictions = model.predict(X_test)
y_pred = np.argmax(predictions, axis=1)
y_true = np.argmax(y_test, axis=1)

# Calculate confusion matrix
conf_matrix = confusion_matrix(y_true, y_pred)

print("Confusion Matrix:")
print(conf_matrix)

[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 165ms/step
Confusion Matrix:
[[ 11   0   5]
 [  0 197   0]
 [  0   0  15]]
