In [None]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import matplotlib.pyplot as plt # basic plotting
import seaborn as sns # additional plotting functionality
from keras.layers import Conv2D, MaxPooling2D, GlobalAveragePooling2D
from keras.layers import Dropout, Flatten, Dense
from keras.models import Sequential
import os
from keras.callbacks import ModelCheckpoint
from keras.preprocessing.image import ImageDataGenerator
from sklearn.model_selection import train_test_split
from sklearn import metrics
import tensorflow as tf


In [None]:
def flow_from_dataframe(img_data_gen, in_df, path_col, y_col, **dflow_args):
    base_dir = os.path.dirname(in_df[path_col].values[0])
    print('## Ignore next message from keras, values are replaced anyways')
    df_gen = img_data_gen.flow_from_directory(base_dir, 
                                     class_mode = 'sparse',
                                    **dflow_args)
    df_gen.filenames = in_df[path_col].values
    df_gen.classes = np.stack(in_df[y_col].values)
    df_gen.samples = in_df.shape[0]
    df_gen.n = in_df.shape[0]
    df_gen._set_index_array()
    df_gen.directory = '' # since we have the full path
    print('Reinserting dataframe: {} images'.format(in_df.shape[0]))
    return df_gen

In [None]:
classes=['Alex','Ananya','Kavisha','Mithun','Sahil','Sonu','Tom','Dhruv','Ethan','Premal']
#reading file
csvFile = pd.read_csv('images_path.csv')
print(len(csvFile))


In [None]:
train_x,test_x,train_y,test_y = train_test_split(csvFile['Images'],csvFile['Labels'],test_size=0.3,stratify=csvFile['Labels'])

print(len(train_x))
print(len(test_x))

train_data = {'Images':train_x,'Labels':train_y}
test_data = {'Images':train_x,'Labels':train_y}

# you can also create val set like this but right now i am creating only test and val set
train = pd.DataFrame(train_data)
test = pd.DataFrame(test_data)

In [None]:
image_size = (299, 299)
core_idg = ImageDataGenerator(samplewise_center=True, 
                              samplewise_std_normalization=True, 
                              horizontal_flip = True, 
                              vertical_flip = False, 
                              height_shift_range= 0.05, 
                              width_shift_range=0.1, 
                              rotation_range=5, 
                              shear_range = 0.1,
                              zoom_range=0.15)


In [None]:
train_gen = core_idg.flow_from_dataframe(dataframe=csvFile,
directory=None,
x_col = 'Images',
y_col = 'Labels',
class_mode = 'categorical',
classes = classes,
target_size = image_size,
color_mode = 'rgb',
batch_size = 32)

test_datagen = ImageDataGenerator(rescale=1./255)
valid_gen = test_datagen.flow_from_dataframe(dataframe=csvFile,
directory=None,
x_col = 'Images',
y_col = 'Labels',
class_mode = 'categorical',
classes = classes,
target_size = image_size,
color_mode = 'rgb',
batch_size = 32)

test_X, test_Y = next(core_idg.flow_from_dataframe(dataframe=csvFile,
directory=None,
x_col = 'Images',
y_col = 'Labels',
lass_mode = 'categorical',
classes = classes,
target_size = image_size,
color_mode = 'rgb',
batch_size = 1024,
#validation_split=0.3
))

In [None]:


# Create CNN model
# Will use a combination of convolutional, max pooling, and dropout layers for this purpose
model = Sequential()

model.add(Conv2D(filters = 8, kernel_size = 3, padding = 'same', activation = 'relu', input_shape = test_X.shape[1:]))
model.add(MaxPooling2D(pool_size = 2))
model.add(Dropout(0.2))

model.add(Conv2D(filters = 16, kernel_size = 3, padding = 'same', activation = 'relu'))
model.add(MaxPooling2D(pool_size = 2))
model.add(Dropout(0.2))
          
model.add(Conv2D(filters = 32, kernel_size = 3, padding = 'same', activation = 'relu'))
model.add(MaxPooling2D(pool_size = 2))
model.add(Dropout(0.2))

model.add(Conv2D(filters = 64, kernel_size = 3, padding = 'same', activation = 'relu'))
model.add(MaxPooling2D(pool_size = 2))
model.add(Dropout(0.2))
          
model.add(Conv2D(filters = 128, kernel_size = 3, padding = 'same', activation = 'relu'))
model.add(MaxPooling2D(pool_size = 3))
model.add(Dropout(0.2))

# add in fully connected dense layers to model, then output classifiction probabilities using a softmax activation function
model.add(Flatten())
model.add(Dense(3000, activation = 'relu'))
model.add(Dropout(0.2))
model.add(Dense(2000, activation = 'relu'))
model.add(Dropout(0.2))
model.add(Dense(1000, activation = 'relu'))
model.add(Dropout(0.2))
model.add(Dense(len(classes), activation = 'softmax'))

# compile model, run summary
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
model.summary()

In [None]:
#checkpointer = ModelCheckpoint(filepath='weights.best.hdf5', verbose=1, save_best_only = True)
#callbacks_list = [checkpointer]
checkpoint = tf.keras.callbacks.ModelCheckpoint(
    filepath='cnn_accuracy',
    save_weights_only=True,
    #monitor='accuracy',
    mode='max',
    verbose=1,
    save_best_only=True)

callbacks_list = [checkpoint]

In [None]:
model.fit_generator(generator = train_gen, steps_per_epoch = 2, epochs = 250, callbacks = callbacks_list,shuffle=True,verbose=1)

In [None]:
model_predictions = model.predict(test_X, batch_size = 32, verbose = 1)
#model.evaluate(test_X,classes)

In [None]:
print("Evaluate on test data")
results = model.evaluate(test_X, test_Y, batch_size=32)

In [None]:
from sklearn.metrics import classification_report

y_pred = model.predict(test_X, batch_size=20, verbose=1)
y_pred_bool = np.argmax(y_pred, axis=1)
print(y_pred_bool)
test_Y = np.argmax(test_Y, axis=1)
print(classification_report(test_Y, y_pred_bool))