In [1]:
# Import necessary packages
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
import keras
from keras.models import Sequential
from keras.layers import Conv2D , Dropout, MaxPooling2D, Flatten, Dense
from PIL.Image import core as image
import os
from PIL import Image
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder

Using TensorFlow backend.


In [2]:
# Run through image files and determine if any are the wrong shape
# Count images by crop/disease type
data_path = 'PlantVillage-Dataset/raw_image_data/color'
diff_shape_count = 0
img_count = 0
leaf_type_img_count = 0
leaf_type_img_count_dict = {}
for folder in os.listdir(data_path):
    for image in os.listdir(f'{data_path}/{folder}'):
        img_loc = f'{data_path}/{folder}/{image}'
        img = Image.open(img_loc)
        arr = np.array(img)
        img_shape = arr.shape
        img_count += 1
        leaf_type_img_count += 1
        if img_shape != (256, 256, 3):
            diff_shape_count += 1
            print(img_loc)
            print(img_shape)
        else:
            continue
    leaf_type_img_count_dict[folder] = leaf_type_img_count
    leaf_type_img_count = 0
print(f'Wrong Shape Image Count: {diff_shape_count}')
print(f'Image Count: {img_count}')
leaf_type_img_count_dict

Wrong Shape Image Count: 0
Image Count: 54304


{'Strawberry___healthy': 456,
 'Grape___Black_rot': 1180,
 'Potato___Early_blight': 1000,
 'Blueberry___healthy': 1502,
 'Corn_(maize)___healthy': 1162,
 'Tomato___Target_Spot': 1404,
 'Peach___healthy': 360,
 'Potato___Late_blight': 1000,
 'Tomato___Late_blight': 1909,
 'Tomato___Tomato_mosaic_virus': 373,
 'Pepper,_bell___healthy': 1477,
 'Orange___Haunglongbing_(Citrus_greening)': 5507,
 'Tomato___Leaf_Mold': 952,
 'Grape___Leaf_blight_(Isariopsis_Leaf_Spot)': 1076,
 'Cherry_(including_sour)___Powdery_mildew': 1052,
 'Apple___Cedar_apple_rust': 275,
 'Tomato___Bacterial_spot': 2127,
 'Grape___healthy': 423,
 'Tomato___Early_blight': 1000,
 'Corn_(maize)___Common_rust_': 1192,
 'Grape___Esca_(Black_Measles)': 1383,
 'Raspberry___healthy': 371,
 'Tomato___healthy': 1591,
 'Cherry_(including_sour)___healthy': 854,
 'Tomato___Tomato_Yellow_Leaf_Curl_Virus': 5357,
 'Apple___Apple_scab': 630,
 'Corn_(maize)___Northern_Leaf_Blight': 985,
 'Tomato___Spider_mites Two-spotted_spider_mite': 16

In [3]:
# Define function to convert every image into (256*256*3) array
def image_to_array(image_loc):
    img = Image.open(image_loc)
    arr = np.array(img)
    return arr

In [5]:
# Run through image files and convert to pixel array
# Add array to larger data array
data_list = []
target_list = []
data_path = 'PlantVillage-Dataset/raw_image_data/color'
for folder in os.listdir(data_path):
    for image in os.listdir(f'{data_path}/{folder}'):
        img_loc = f'{data_path}/{folder}/{image}'
        img_arr = image_to_array(img_loc)
        data_list.append(img_arr)
        target_list.append(folder)
data_array = np.array(data_list)
target_array = np.array(target_list)

In [5]:
# Train Test Split
X_train, X_test, y_train, y_test = train_test_split(data_array, target_array, test_size = .25, random_state = 70)

In [11]:
# Save Train Test Splits with numpy
train_test_dict = {'X_train' : X_train, 'X_test' : X_test, 'y_train' : y_train, 'y_test' : y_test}
for key, val in train_test_dict.items():
    np.save(f'../Numpy_Objects/{key}.npy', val)

In [11]:
# Load Train Test Splits with numpy
X_train = None
X_test = None
y_train = None
y_test = None
train_test_dict = {'X_train' : X_train, 'X_test' : X_test, 'y_train' : y_train, 'y_test' : y_test}
for key in train_test_dict.keys():
    train_test_dict[key] = np.load(f'../Numpy_Objects/{key}.npy')
X_train = train_test_dict['X_train']
X_test = train_test_dict['X_test']
y_train = train_test_dict['y_train']
y_test = train_test_dict['y_test']

In [12]:
# Define epochs, batch size, and number of classes
batch_size = 500
epochs = 3
n_classes = 38

In [13]:
# Label Encode Target Classes
target_class_list = list(leaf_type_img_count_dict.keys())
le = LabelEncoder()
le.fit(target_class_list)
target_class_int_list = list(le.classes_)
y_train = le.transform(y_train)
y_test = le.transform(y_test)

In [14]:
# Convert target class vectors to target class binary matrices
y_train = keras.utils.to_categorical(y_train, num_classes = n_classes)
y_test = keras.utils.to_categorical(y_test, num_classes = n_classes)

In [15]:
# Construct convolutional neural network architecture
model = Sequential()
model.add(Conv2D(32, kernel_size=(3, 3),
                 activation='relu',
                 input_shape=(256,256,3)))
model.add(Conv2D(64, (3, 3), activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))
model.add(Flatten())
model.add(Dense(128, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(n_classes, activation='softmax'))

In [None]:
# Compile model and run
model.compile(loss=keras.losses.categorical_crossentropy,
              optimizer=keras.optimizers.Adadelta(),
              metrics=['accuracy'])

model.fit(X_train, y_train,
          batch_size = batch_size,
          epochs = epochs,
          verbose = 1,
          validation_data = (X_test, y_test))
score = model.evaluate(X_test, y_test, verbose = 0)
print('Test loss:', score[0])
print('Test accuracy:', score[1])

Train on 40728 samples, validate on 13576 samples
Epoch 1/3


### MNIST Examples

In [49]:
from keras.datasets import mnist

In [50]:
(x_train, y_train), (x_test, y_test) = mnist.load_data()

Downloading data from https://s3.amazonaws.com/img-datasets/mnist.npz


In [52]:
x_train.shape

(60000, 28, 28)

In [53]:
y_train

array([5, 0, 4, ..., 5, 6, 8], dtype=uint8)

In [None]:
batch_size = 100
num_classes = 38
epochs = 5