This notebook is running on C Desktop although the data folder is placed in D. 

Make use of this root path when needed:

__D:\Kaggle Plant Path__

In [22]:
import os

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.preprocessing.image import ImageDataGenerator

import pandas as pd
import numpy as np

import matplotlib as mpl
import matplotlib.pyplot as plt


from shutil import copyfile
from shutil import copy2

## Data Preprocessing

- ImageDataGenerator requires our images to be placed in subdirectories named after their respective labels. In our case, our images currently are unlabelled and are not divided into subdirectories. We are, however, given a csv file with the labels per image file.
- We must first create subdirectories followed by a loop that places each training image in its corrrect subdirectory.

In [23]:
#Let's get the labels by first importing the label csv
root = 'D:\Kaggle Plant Path'
label_csv = os.path.join(root,'train.csv')
df = pd.read_csv(label_csv)
df.head()

Unnamed: 0,image,labels
0,800113bb65efe69e.jpg,healthy
1,8002cb321f8bfcdf.jpg,scab frog_eye_leaf_spot complex
2,80070f7fb5e2ccaa.jpg,scab
3,80077517781fb94f.jpg,scab
4,800cbf0ff87721f8.jpg,complex


In [24]:
a = list(df['labels'])
labels = list(np.unique(a))
labels

['complex',
 'frog_eye_leaf_spot',
 'frog_eye_leaf_spot complex',
 'healthy',
 'powdery_mildew',
 'powdery_mildew complex',
 'rust',
 'rust complex',
 'rust frog_eye_leaf_spot',
 'scab',
 'scab frog_eye_leaf_spot',
 'scab frog_eye_leaf_spot complex']

In [25]:
#Create primary directory and necessary subdirectories
train_dir = os.path.join(root,'plants-training')
to_create = [train_dir]
for label in labels: #creates list of all necessary subdirectories to make
    to_create.append(os.path.join(train_dir,label))

#loop to create directories using paths in the to_create list
'''for directory in to_create:
    try:
        os.mkdir(directory)
        print(directory, 'created')
    except:
        print(directory, 'creation failed')''';

In [26]:
#Create alias variables for subdirectories for ease of reference
'''var_list = ['complx', 'frog_eye_leaf_spot', 'frog_eye_leaf_spot_complx', 'healthy', 'powdery_mildew', 'powdery_mildew_complx','rust', 'rust_complx', 'rust_frog_eye_leaf_spot', 'scab', 'scab_frog_eye_leaf_spot', 'scab_frog_eye_leaf_spot_complx']
class_dir = to_create[1:] #list that includes only subdirectory paths of the labels
''';
#this allows me to avoid the /f error when manually assigning the string of the path to variables (ex: \\frog_leaf would turn to \x0rog_leaf)
'''for (variable,subdir_path) in zip(var_list,class_dir):
    exec("%s = %a" % (variable, subdir_path))''';

In [27]:
#loop over zip object of the file names and labels in train.csv and copy files to their respective subdirectories
files = list(df['image'])
y_train = list(df['labels'])

images_dir = os.path.join(root,'train_images')

'''
for (file_name,label) in zip(files,y_train):
    try:
        if label == 'complex':
            file_path = os.path.join(images_dir,file_name)
            copy2(file_path, os.path.join(complx,file_name))
        elif label == 'frog_eye_leaf_spot':
            file_path = os.path.join(images_dir,file_name)
            copy2(file_path, os.path.join(frog_eye_leaf_spot,file_name))
        elif label == 'frog_eye_leaf_spot complex':
            file_path = os.path.join(images_dir,file_name)
            copy2(file_path, os.path.join(frog_eye_leaf_spot_complx,file_name))
        elif label == 'healthy':
            file_path = os.path.join(images_dir,file_name)
            copy2(file_path, os.path.join(healthy,file_name))
        elif label ==  'powdery_mildew':
            file_path = os.path.join(images_dir,file_name)
            copy2(file_path, os.path.join(powdery_mildew,file_name))
        elif label == 'powdery_mildew complex':
            file_path = os.path.join(images_dir,file_name)
            copy2(file_path, os.path.join(powdery_mildew_complx,file_name))
        elif label == 'rust':
            file_path = os.path.join(images_dir,file_name)
            copy2(file_path,os.path.join(rust,file_name))
        elif label == 'rust complex':
            file_path = os.path.join(images_dir,file_name)
            copy2(file_path, os.path.join(rust_complx,file_name))
        elif label == 'rust frog_eye_leaf_spot':
            file_path = os.path.join(images_dir,file_name)
            copy2(file_path,os.path.join(rust_frog_eye_leaf_spot,file_name))
        elif label == 'scab':
            file_path = os.path.join(images_dir,file_name)
            copy2(file_path, os.path.join(scab,file_name))
        elif label == 'scab frog_eye_leaf_spot':
            file_path = os.path.join(images_dir,file_name)
            copy2(file_path, os.path.join(scab_frog_eye_leaf_spot,file_name))
        elif label == 'scab frog_eye_leaf_spot complex':
            file_path = os.path.join(images_dir,file_name)
            copy2(file_path, os.path.join(scab_frog_eye_leaf_spot_complx,file_name))
        else:
            print('yo what disease is this')
    except:
        print('failed to copy ', file_name, ' with label ', label)''';

In [28]:
#Create ImageDataGenerators and apply image processing arguments (should find 12 classes)
datagen = ImageDataGenerator(rescale = 1.0/255.,
                                   rotation_range = 40,
                                   width_shift_range = 0.2,
                                   height_shift_range = 0.2,
                                   shear_range = 0.2,
                                   zoom_range = 0.2,
                                   horizontal_flip = True,
                                   fill_mode = 'nearest',
                                   validation_split=0.25)
                                   
train_generator = datagen.flow_from_directory(train_dir,
                                                    subset='training',
                                                    target_size = (200,200),
                                                    batch_size = 32,
                                                    class_mode = 'sparse')
validation_generator = datagen.flow_from_directory(train_dir,
                                                    subset='validation',
                                                    target_size = (200,200),
                                                    batch_size = 32,
                                                    class_mode = 'sparse')
                                                   

STEP_SIZE_TRAIN=train_generator.n//train_generator.batch_size
STEP_SIZE_VALID=validation_generator.n//validation_generator.batch_size

Found 13977 images belonging to 12 classes.
Found 4655 images belonging to 12 classes.


In [29]:
#Define callback 

class myCallback(keras.callbacks.Callback):
    def on_epoch_end(self, epoch, logs={}):
        if(logs.get('acc') is not None and logs.get('acc')>0.75):
            print('Reached 75% training acc. Stopping Training!')
            self.model.stop_training = True

In [30]:
#Define and compile Model
'''callbacks = myCallback()
model = keras.models.Sequential([
    keras.layers.Conv2D(64, (2,2), activation='relu', input_shape=(200,200,3)),
    keras.layers.MaxPooling2D(2,2),
    keras.layers.Conv2D(32, (2,2), activation='relu'),
    keras.layers.MaxPooling2D(2,2),
    keras.layers.Conv2D(32,(2,2), activation='relu'),
    keras.layers.MaxPooling2D(2,2),
    keras.layers.Flatten(),
    keras.layers.Dense(1024,activation='relu'),
    keras.layers.Dropout(0.20),
    keras.layers.Dense(12,activation='softmax')
])

model.compile(optimizer = 'adam', 
             loss = 'sparse_categorical_crossentropy',
             metrics = ['accuracy'])''';


In [31]:
#Train Model
'''
history = model.fit(train_generator, 
                    epochs = 10,
                    validation_data = validation_generator,
                    callbacks = [callbacks])

#Create folder to save model in
model_folder = os.path.join(root,"model_v2")
os.mkdir(model_folder)
model.save(model_folder)
''';


In [32]:
#BONUS (for future ref): this is how you get the filenames taken by the generator
'''
train_files = train_generator.filenames
len(train_files)
'''

'\ntrain_files = train_generator.filenames\nlen(train_files)\n'

In [34]:
#Predict test data

test_dir = os.path.join(root,'test_images')

test_datagen = ImageDataGenerator(rescale = 1.0/255)

target_prediction_steps = 1

test_generator = test_datagen.flow_from_directory(test_dir,
                                                    target_size = (200,200),
                                                    batch_size = target_prediction_steps,
                                                    class_mode = None)

test_images = test_generator.filenames
num_samples = len(test_images)

model_path = os.path.join(root,'model_v2')
model = keras.models.load_model(os.path.join(model_path,'saved_model.pb'))

predictions = model.predict(test_generator, steps = num_samples)

Found 3 images belonging to 1 classes.


OSError: SavedModel file does not exist at: D:\Kaggle Plant Path\model_v2\saved_model.pb/{saved_model.pbtxt|saved_model.pb}

In [None]:
classes = list(train_generator.class_indices.keys()) # gives us the list of classes in the correct order
classes

In [None]:
predictions

In [None]:
#Let's return the respective labels instead of the probability values
max_predictions = predictions.argmax(axis=1) #shape of (3,)

label_predictions = [classes[predictions] for predictions in max_predictions]
label_predictions

In [None]:
#Place image names and predicted labels in pd dataframe to convert to csv
'''
test_images = os.listdir(os.path.join(test_dir,'testing'))
pred_arr = np.c_[test_images,label_predictions]
type(pred_arr)
''''''

In [None]:
#convert array to datafram 
'''
submission_df = pd.DataFrame(data = pred_arr,
                            index = None,
                             columns = ['image', 'labels'])
submission_df    
'''

In [None]:
#submission_df.to_csv('Chummi_Apple_Leaves_v2.csv', index = False)