## Imports

In [1]:
import numpy as np
import tensorflow as tf
from os import listdir
import cv2 
from keras.models import Sequential
from keras.layers import Dense, Dropout, Activation, Flatten
from keras.layers.convolutional import Conv2D, MaxPooling2D
from keras.preprocessing import image
from keras.preprocessing.image import img_to_array, ImageDataGenerator
from keras.backend.tensorflow_backend import set_session
from sklearn.preprocessing import LabelBinarizer
from sklearn.model_selection import train_test_split
from tensorflow.python.client import device_lib
import warnings

warnings.filterwarnings('ignore')
%matplotlib inline

Using TensorFlow backend.


In [2]:
# Checking if TF is running on my GPU or CPU
from tensorflow.python.client import device_lib
print(device_lib.list_local_devices())

[name: "/device:CPU:0"
device_type: "CPU"
memory_limit: 268435456
locality {
}
incarnation: 2530937456958460448
, name: "/device:GPU:0"
device_type: "GPU"
memory_limit: 6696213545
locality {
  bus_id: 1
  links {
  }
}
incarnation: 9961959412963312730
physical_device_desc: "device: 0, name: GeForce GTX 1070, pci bus id: 0000:01:00.0, compute capability: 6.1"
]


In [3]:
# This code solution from https://github.com/keras-team/keras/issues/4161 -- it will allow my GPU to dynamically
# grow its memory and not crash when fitting my models.
import tensorflow as tf
from keras.backend.tensorflow_backend import set_session
config = tf.ConfigProto()
config.gpu_options.allow_growth = True  # dynamically grow the memory used on the GPU
config.log_device_placement = True  # to log device placement (on which device the operation ran)
                                    # (nothing gets printed in Jupyter, only if you run it standalone)
sess = tf.Session(config=config)
set_session(sess)  # set this TensorFlow session as the default session for Keras

## Functions

In [4]:
# This code inspired by muliple different CV2 uses on Stackoverflow and Kaggle
image_size = tuple((128, 128)) #Specify what size you want your images to be here
# The following function will convert all my images to arrays
def arrayify(image_directory):
    try:
        image = cv2.imread(image_directory) # This will read in an image from the image directory
        if image is not None: #If the image exists, do the following:
            image = cv2.resize(image, image_size) # Resize the image to whatever image_size is defined as
            return img_to_array(image)
        else:
            return np.array([]) #Else... return an empty array (doing this mostly to prevent NaNs
                                # and other forms of data implosion)
    except Exception: #If there is an error...
        print (f"Nope!! {Exception}") #Let me know
        return None 

In [5]:
# This code inspired by multiple different instances and examples of image preprocessing on
# Stackoverflow, Kaggle, and from General Assembly lesson examples
def process_images(image_directory): 
    #Let's instantiate some lists!
    image_list = []
    label_list = []
    try:
        print("Loading Images ༼ つ ◕_◕ ༽つ")
        actual_folder = listdir(image_directory) # listdir will give me the directory for every folder
            # in the given directory, I can then use these directories in my function
        for folder in actual_folder:  
            if folder == ".DS_Store": #I don't want .DS_Store to be called for this function
                actual_folder.remove(folder)
        for image_folder in actual_folder: # For each specific image folder in the main folder...
            print(f"Processing {image_folder}...")
            formatted_image_list = listdir(f"{image_directory}/{image_folder}/") #Pull the images out and put them in a list
            for image in formatted_image_list: #For each image in the image list
                if image == ".DS_Store":
                    formatted_image_list.remove(image)
            for image in formatted_image_list:
                specific_image = f"{image_directory}/{image_folder}/{image}" #Create a specific image
                # Variable
                if specific_image.lower().endswith(".jpg") == True: #So long as the directory ends with
                    # .jpg, do the following
                    resized_array = arrayify(specific_image) # call the arrayify function on the image
                    image_list.append(resized_array) # append the resized array to the image_list var
                    label_list.append(image_folder) # Add the folder that the image came from as its "label"
        print("Processing Complete")
        return image_list, label_list
                    
                
                
                
                    
    except: #If there is an error...
        print (f"Nope!! {Exception}") #Let me know
        return None 

## Processing Image Directories

In [6]:
plant_village_arrays, plant_village_labels = process_images("./Images/PlantVillage")

Loading Images ༼ つ ◕_◕ ༽つ
Processing Pepper__bell___Bacterial_spot...
Processing Pepper__bell___healthy...
Processing Potato___Early_blight...
Processing Potato___healthy...
Processing Potato___Late_blight...
Processing Tomato_Bacterial_spot...
Processing Tomato_Early_blight...
Processing Tomato_healthy...
Processing Tomato_Late_blight...
Processing Tomato_Leaf_Mold...
Processing Tomato_Septoria_leaf_spot...
Processing Tomato_Spider_mites_Two_spotted_spider_mite...
Processing Tomato__Target_Spot...
Processing Tomato__Tomato_mosaic_virus...
Processing Tomato__Tomato_YellowLeaf__Curl_Virus...
Processing Complete


In [7]:
print(f"Processed {len(plant_village_arrays)} Images from the Plantvillage Dataset, Size {image_size}")
print(f"Labels for the Plantvillage Dataset are as follows: \n {set(plant_village_labels)}")

Processed 20636 Images from the Plantvillage Dataset, Size (128, 128)
Labels for the Plantvillage Dataset are as follows: 
 {'Tomato_Leaf_Mold', 'Potato___Late_blight', 'Tomato_Late_blight', 'Pepper__bell___Bacterial_spot', 'Tomato__Target_Spot', 'Tomato_Spider_mites_Two_spotted_spider_mite', 'Tomato__Tomato_mosaic_virus', 'Potato___Early_blight', 'Pepper__bell___healthy', 'Tomato_Early_blight', 'Tomato_healthy', 'Tomato_Septoria_leaf_spot', 'Potato___healthy', 'Tomato_Bacterial_spot', 'Tomato__Tomato_YellowLeaf__Curl_Virus'}


In [8]:
rice_arrays, rice_labels = process_images('./Images/Rice')

Loading Images ༼ つ ◕_◕ ༽つ
Processing rice_Brown_Spot...
Processing rice_Healthy...
Processing rice_Hispa...
Processing rice_Leaf_Blast...
Processing Complete


In [9]:
print(f"Processed {len(rice_arrays)} Images from the Rice Dataset, Size {image_size}")
print(f"Labels for the Rice Dataset are as follows: \n {set(rice_labels)}")

Processed 3355 Images from the Rice Dataset, Size (128, 128)
Labels for the Rice Dataset are as follows: 
 {'rice_Hispa', 'rice_Brown_Spot', 'rice_Leaf_Blast', 'rice_Healthy'}


In [10]:
# Combining my processed arrays and labels

image_arrays = plant_village_arrays + rice_arrays
image_labels = plant_village_labels + rice_labels

In [11]:
# Transforming my Labels into numbers using LabelBinarizer
lb = LabelBinarizer()
lb_image_labels = lb.fit_transform(image_labels)


## ImageDataGenerator CNN with combined image datasets

In [12]:
# Instantiating my image-data-generator
# I used this post as a resource for my datagen https://fairyonice.github.io/Learn-about-ImageDataGenerator.html
imagedatagen = ImageDataGenerator(
    rotation_range=25, # Rotate each image slightly
    width_shift_range=0.1, #Slightly shift my images so that the model can learn from them
    height_shift_range=0.1, 
    shear_range=0.2, #I don't completely understand the concept of shearing it seems to stretch the image in certain way,
                     # either way, it seems helpful to have.
    zoom_range=0.2, # Add some zooming in on image
    horizontal_flip=True) # And flip them horizontally

In [13]:
# Defining my X and y vars
X = image_arrays
y = lb_image_labels

In [14]:
# Properly converting X to arrays so they can be normalized later on.
X = np.array(image_arrays, dtype=np.float16)

In [15]:
generated_images = imagedatagen.flow(X, y, save_format='jpg')

In [16]:
# Train test split!
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size =0.2, random_state =42)

In [17]:
# Normalizing X_train and X_test
X_train /= 255
X_test /= 255

In [18]:
width = 128
height = 128
depth = 3

# Model Instantiation
model = Sequential()

# Input
model.add(Conv2D(32, (3,3), input_shape = (width, height, depth)))
model.add(Activation("relu"))
model.add(MaxPooling2D(pool_size=(3,3)))
model.add(Dropout(0.25))

# Second Layer
model.add(Conv2D(64, (3,3)))
model.add(Activation("relu"))
model.add(Dropout(0.25))

# Third Layer
model.add(Conv2D(128, (3,3)))
model.add(Activation("relu"))
model.add(MaxPooling2D(pool_size=(2,2)))
model.add(Dropout(0.25))

# Flatten It
model.add(Flatten())

# Fourth Layer 
model.add(Dense(128))
model.add(Activation("relu"))

#Fifth Layer
model.add(Dense(64))
model.add(Activation("relu"))

# Final Output Layer
model.add(Dense(19))
model.add(Activation("softmax"))


# Compile the Model
model.compile(loss='categorical_crossentropy',
                       optimizer='adam',
                       metrics=['accuracy'])

W0824 20:39:21.325320 14780 deprecation_wrapper.py:119] From C:\Users\JoeRo\Anaconda3\envs\keras_gpu\lib\site-packages\keras\backend\tensorflow_backend.py:74: The name tf.get_default_graph is deprecated. Please use tf.compat.v1.get_default_graph instead.

W0824 20:39:21.378179 14780 deprecation_wrapper.py:119] From C:\Users\JoeRo\Anaconda3\envs\keras_gpu\lib\site-packages\keras\backend\tensorflow_backend.py:517: The name tf.placeholder is deprecated. Please use tf.compat.v1.placeholder instead.

W0824 20:39:21.480926 14780 deprecation_wrapper.py:119] From C:\Users\JoeRo\Anaconda3\envs\keras_gpu\lib\site-packages\keras\backend\tensorflow_backend.py:4138: The name tf.random_uniform is deprecated. Please use tf.random.uniform instead.

W0824 20:39:21.565677 14780 deprecation_wrapper.py:119] From C:\Users\JoeRo\Anaconda3\envs\keras_gpu\lib\site-packages\keras\backend\tensorflow_backend.py:3976: The name tf.nn.max_pool is deprecated. Please use tf.nn.max_pool2d instead.

W0824 20:39:21.5706

In [19]:
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_1 (Conv2D)            (None, 126, 126, 32)      896       
_________________________________________________________________
activation_1 (Activation)    (None, 126, 126, 32)      0         
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 42, 42, 32)        0         
_________________________________________________________________
dropout_1 (Dropout)          (None, 42, 42, 32)        0         
_________________________________________________________________
conv2d_2 (Conv2D)            (None, 40, 40, 64)        18496     
_________________________________________________________________
activation_2 (Activation)    (None, 40, 40, 64)        0         
_________________________________________________________________
dropout_2 (Dropout)          (None, 40, 40, 64)        0         
__________

In [20]:
history = model.fit_generator(
    imagedatagen.flow(X_train, y_train, batch_size=32),
    validation_data=(X_test, y_test),
    steps_per_epoch=len(X_train) // 32,
    epochs=25, 
    verbose=1
    )

MemoryError: 

## While I wish I could get this imagedatagenerator to work, it seems as though my rig does not have the RAM to pull it off sadly.