In [1]:
from keras import applications, optimizers
from keras.layers import Dropout, Flatten, Dense, Input
from keras.models import Sequential, Model
from keras.preprocessing.image import ImageDataGenerator
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.model_selection import StratifiedKFold
import keras
import os, csv
import shutil
import numpy as np
import pandas as pd

Using TensorFlow backend.


In [None]:
# dimensions of our images.
img_width, img_height = 224, 224

train_data_dir = '../dataset/train'
test_data_dir = '../dataset/test'
validation_data_dir = '../dataset/validation'
data_dir = '../dataset-original'
nb_train_samples = 1792
nb_test_samples = 210
nb_validation_samples = 525

batch_size = 16
epochs = 20

In [None]:
def buildModel():
    # build the VGG16 network
    base_model = applications.VGG16(weights='imagenet', 
                                    include_top=False,
                                    input_tensor=Input(shape=(img_width, img_height, 3)))

    for layer in base_model.layers:
        layer.trainable = False
        
    top_model = base_model.output
    top_model = Flatten(name="Flatten")(top_model)
    top_model = Dense(512, activation='relu')(top_model)
    top_model = Dense(256, activation='relu')(top_model)
    top_model = Dense(6, activation='softmax')(top_model)
    
    model = Model(inputs=base_model.input, outputs=top_model)

    model.summary()
    
    model.compile(loss='categorical_crossentropy',
              optimizer='adam',
              metrics=['accuracy'])
    
    return model

In [18]:
def createFolds():
    # create dataframe with the images filenames
    dataset_files = open('./dataset.csv', 'w+')
    writer = csv.writer(dataset_files)

    writer.writerow(['image','class'])
    for path, dirs, files in os.walk("./dataset-original"):
        for filename in files:
            writer.writerow([filename, os.path.basename(path)])

    # dataframe containing the filenames of the images (e.g., GUID filenames) and the classes
    df = pd.read_csv('./dataset.csv')
    df_y = df['class']
    df_x = df['image']

    skf = StratifiedKFold(n_splits = 5)
    i = 1

    for train_index, test_index in skf.split(df_x, df_y):
        x_train, x_test = df_x[train_index], df_x[test_index]
        y_train, y_test = df_y[train_index], df_y[test_index]

        train = pd.concat([x_train, y_train], axis=1)
        test = pd.concat([x_test, y_test], axis = 1)
        # take 20% of the training data from this fold for validation during training
        validation = test.sample(frac = 0.5)

        # make sure validation data does not include training data
        train = train[~train['image'].isin(list(validation['image']))]

        # copy the images according to the fold
        copy_images(train, 'training', i)
        copy_images(validation, 'validation', i)
        copy_images(test, 'test', i)
        i +=1

In [19]:
createFolds()



copying training files to ./dataset/training-1...
copying validation files to ./dataset/validation-1...
copying test files to ./dataset/test-1...
Error when copying .DS_Store: [Errno 2] No such file or directory: './dataset-original/dataset-original/.DS_Store'
copying training files to ./dataset/training-2...
copying validation files to ./dataset/validation-2...
copying test files to ./dataset/test-2...
copying training files to ./dataset/training-3...
Error when copying .DS_Store: [Errno 2] No such file or directory: './dataset-original/dataset-original/.DS_Store'
copying validation files to ./dataset/validation-3...
copying test files to ./dataset/test-3...
copying training files to ./dataset/training-4...
Error when copying .DS_Store: [Errno 2] No such file or directory: './dataset-original/dataset-original/.DS_Store'
copying validation files to ./dataset/validation-4...
copying test files to ./dataset/test-4...
copying training files to ./dataset/training-5...
Error when copying .D

In [17]:
# used to copy files according to each fold
def copy_images(dataframe, directory, fold):
    destination_directory = './dataset/{}-{}'.format(directory,fold)
    print('copying {} files to {}...'.format(directory, destination_directory))

    # remove all files from previous fold
    if os.path.exists(destination_directory):
        shutil.rmtree(destination_directory)

    # create folder for files from this fold
    if not os.path.exists(destination_directory):
        os.makedirs(destination_directory)

    # create subfolders for each class
    for image_class in set(list(dataframe['class'])):
        if not os.path.exists(destination_directory + '/' + image_class):
            os.makedirs(destination_directory + '/' + image_class)

    # copy files for this fold from a directory holding all the files
    for i, row in dataframe.iterrows():
        try:
            # this is the path to all of your images kept together in a separate folder
            path_from = './dataset-original/{}/{}'.format(row['class'],row['image'])
            path_to = "{}/{}".format(destination_directory, row['class'])
            
            # move from folder keeping all files to training, test, or validation folder (the "directory" argument)
            shutil.copy(path_from, path_to)
        except Exception as e:
            print("Error when copying {}: {}".format(row['image'], str(e)))

In [None]:
def generateData(batch_size):
    train_datagen = ImageDataGenerator(
        horizontal_flip=True,
        rescale=1. /255,
        rotation_range=30,
        shear_range=0.2,
        zoom_range=0.2)

    validation_datagen = ImageDataGenerator(rescale=1. /255)

    train_generator = train_datagen.flow_from_directory(
        train_data_dir,
        target_size=(img_width, img_height),
        batch_size=batch_size,
        shuffle=True,
        class_mode='categorical')

    validation_generator = validation_datagen.flow_from_directory(
        validation_data_dir,
        target_size=(img_width, img_height),
        batch_size=batch_size,
        class_mode='categorical')
    
    return [train_generator,validation_generator]

In [None]:
def fineTuneModel(model, train_generator, validation_generator, epochs, batch_size):
    model.fit_generator(
        train_generator,
        steps_per_epoch=nb_train_samples // batch_size,
        epochs=epochs,
        validation_data=validation_generator,
        validation_steps=nb_validation_samples // batch_size)
    
    return model

In [None]:
def getMetrics(model, batch_size):
    
    test_datagen = ImageDataGenerator(rescale=1. /255)

    test_generator = test_datagen.flow_from_directory(
        test_data_dir,
        target_size=(img_width,img_height),
        shuffle=False,
        batch_size=1,
        class_mode='categorical')
    
    predicted_results = model.predict_generator(test_generator, 
                                                steps = nb_test_samples)
    predicted_results = np.argmax(predicted_results, axis=1)
    targets = ["cardboard", "glass", "metal", "paper", "plastic", "trash"]
    
    # confusion matrix
    print("CONFUSION MATRIX:")
    print(confusion_matrix(test_generator.classes, predicted_results))
    
    # classification report
    print("CLASSIFICATION REPORT:")
    print(classification_report(test_generator.classes, predicted_results, target_names=targets))

In [None]:
model = buildModel()
train_generator,test_generator = generateData(batch_size)
trained_model = fineTuneModel(model,train_generator, test_generator, epochs, batch_size)
metrics = getMetrics(trained_model, batch_size)