In [1]:
# Import necessary packages
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
import keras
from keras.models import Sequential
from keras.layers import Conv2D , Dropout, MaxPooling2D, Flatten, Dense
from PIL.Image import core as image
import os
from PIL import Image
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from google.cloud import storage

Using TensorFlow backend.


In [2]:
# Check to see that GPU is being used
# import tensorflow as tf
# tf.config.list_physical_devices('GPU')

[]

In [2]:
# Run through image files and determine if any are the wrong shape
# Count images by crop/disease type
data_path = 'PlantVillage-Dataset/raw_image_data/color'
diff_shape_count = 0
img_count = 0
leaf_type_img_count = 0
leaf_type_img_count_dict = {}
for folder in os.listdir(data_path):
    for image in os.listdir('%s/%s' % (data_path, folder)):
        img_loc = '%s/%s/%s' % (data_path, folder, image)
        img = Image.open(img_loc)
        arr = np.array(img)
        img_shape = arr.shape
        img_count += 1
        leaf_type_img_count += 1
        if img_shape != (256, 256, 3):
            diff_shape_count += 1
            print(img_loc)
            print(img_shape)
        else:
            continue
    leaf_type_img_count_dict[folder] = leaf_type_img_count
    leaf_type_img_count = 0
print('Wrong Shape Image Count: %d' % (diff_shape_count))
print('Image Count: %d' % (img_count))
leaf_type_img_count_dict

Wrong Shape Image Count: 0
Image Count: 54304


{'Apple___Apple_scab': 630,
 'Apple___Black_rot': 621,
 'Apple___Cedar_apple_rust': 275,
 'Apple___healthy': 1645,
 'Blueberry___healthy': 1502,
 'Cherry_(including_sour)___Powdery_mildew': 1052,
 'Cherry_(including_sour)___healthy': 854,
 'Corn_(maize)___Cercospora_leaf_spot Gray_leaf_spot': 513,
 'Corn_(maize)___Common_rust_': 1192,
 'Corn_(maize)___Northern_Leaf_Blight': 985,
 'Corn_(maize)___healthy': 1162,
 'Grape___Black_rot': 1180,
 'Grape___Esca_(Black_Measles)': 1383,
 'Grape___Leaf_blight_(Isariopsis_Leaf_Spot)': 1076,
 'Grape___healthy': 423,
 'Orange___Haunglongbing_(Citrus_greening)': 5507,
 'Peach___Bacterial_spot': 2297,
 'Peach___healthy': 360,
 'Pepper,_bell___Bacterial_spot': 997,
 'Pepper,_bell___healthy': 1477,
 'Potato___Early_blight': 1000,
 'Potato___Late_blight': 1000,
 'Potato___healthy': 152,
 'Raspberry___healthy': 371,
 'Soybean___healthy': 5090,
 'Squash___Powdery_mildew': 1835,
 'Strawberry___Leaf_scorch': 1109,
 'Strawberry___healthy': 456,
 'Tomato___Bac

In [3]:
# Define function to convert every image into (256*256*3) array
def image_to_array(image_loc):
    img = Image.open(image_loc)
    arr = np.array(img)
    return arr

In [4]:
# Define function to normalize pixels (0 to 1) of each image (0 to 255 pixel values possible)
def pixel_normalization(img_array):
    img_array = img_array.astype('float32')
    img_array /= 255.0
    return img_array

In [5]:
# Define function to center pixel values based on mean pixel value
def pixel_centering(norm_img_arr):
    mean = norm_img_arr.mean()
    norm_img_arr = norm_img_arr - mean
    return norm_img_arr

In [6]:
# Run through image files and convert to pixel array, normalize, and center
# Add array to larger data array
data_list = []
target_list = []
data_path = 'PlantVillage-Dataset/raw_image_data/color'
for folder in os.listdir(data_path):
    for image in os.listdir('%s/%s' % (data_path, folder)):
        img_loc = '%s/%s/%s' % (data_path, folder, image)
        img_arr = image_to_array(img_loc)
        data_list.append(img_arr)
        target_list.append(folder)

In [7]:
# Convert list to array
data_array = np.array(data_list)
target_array = np.array(target_list)

In [8]:
# Normalize Arrays
norm_list = []
for img_array in data_array:
    norm_arr = pixel_normalization(img_array)
    norm_list.append(norm_arr)

In [9]:
# Standardize Arrays
standardized_list = []
for norm_arr in norm_list:
    standardized_img_arr = pixel_centering(norm_arr)
    standardized_list.append(standardized_img_arr)

In [10]:
# Convert to array
standardized_data_array = np.array(standardized_list)

In [11]:
# Train Test Split
X_train, X_test, y_train, y_test = train_test_split(standardized_data_array, target_array,
                                                    test_size = .25, random_state = 14)

In [15]:
# Save Train Test Splits with numpy
# train_test_dict = {'X_train' : X_train, 'X_test' : X_test, 'y_train' : y_train, 'y_test' : y_test}
# for key, val in train_test_dict.items():
#     np.save('%s.npy' % (key), val)

In [21]:
# Access numpy objects from bucket
# client = storage.Client()
# bucket_name = "capstone-image-classification-bucket"
# bucket = client.get_bucket(bucket_name)
# blobs = list(bucket.list_blobs())
# for blob in blobs:
#     blob.download_to_filename(blob.name)

In [30]:
# Load Train Test Splits with numpy
# X_train = None
# X_test = None
# y_train = None
# y_test = None
# train_test_dict = {'X_train' : X_train, 'X_test' : X_test, 'y_train' : y_train, 'y_test' : y_test}
# for key in train_test_dict.keys():
#     train_test_dict[key] = np.load('%s.npy' % (key))
# X_train = train_test_dict['X_train']
# X_test = train_test_dict['X_test']
# y_train = train_test_dict['y_train']
# y_test = train_test_dict['y_test']

In [13]:
# Define epochs, batch size, and number of classes
batch_size = 100
epochs = 10
n_classes = 38

In [14]:
# Label Encode Target Classes
target_class_list = list(leaf_type_img_count_dict.keys())
le = LabelEncoder()
le.fit(target_class_list)
target_class_int_list = list(le.classes_)
y_train = le.transform(y_train)
y_test = le.transform(y_test)

In [15]:
# Convert target class vectors to target class binary matrices
y_train = keras.utils.to_categorical(y_train, num_classes = n_classes)
y_test = keras.utils.to_categorical(y_test, num_classes = n_classes)

In [16]:
# Construct convolutional neural network architecture
model = Sequential()
model.add(Conv2D(32, kernel_size=(3, 3),
                 activation='relu',
                 input_shape=(256,256,3)))
model.add(Conv2D(64, (3, 3), activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))
model.add(Flatten())
model.add(Dense(128, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(n_classes, activation='softmax'))

In [17]:
# Compile model and run
model.compile(loss=keras.losses.categorical_crossentropy,
              optimizer=keras.optimizers.Adadelta(),
              metrics=['accuracy', 'mse'])

model.fit(X_train, y_train,
          batch_size = batch_size,
          epochs = epochs,
          verbose = 1,
          validation_data = (X_test, y_test))
score = model.evaluate(X_test, y_test, verbose = 0)
print('Test loss:', score[0])
print('Test accuracy:', score[1])

Train on 40728 samples, validate on 13576 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Test loss: 0.5941627931264687
Test accuracy: 0.8327931761741638
