In [None]:
from keras.preprocessing.image import ImageDataGenerator
from keras.optimizers import Adam, SGD
from keras.models import Sequential
from keras.layers import Conv2D, MaxPooling2D
from keras.layers import Activation, Dropout, Flatten, Dense
from keras import backend as K
from keras.callbacks import CSVLogger
import os
import matplotlib.pyplot as plt
from numpy import savetxt
import pandas as pd
import shutil

In [None]:
def move(validation_data_dir):

    gt = pd.read_csv(os.path.join(validation_data_dir, 'ground_truth.txt'),sep = ';', header = None)

    for root, dirs, files in os.walk(validation_data_dir):
        for file in files:
            if file.endswith('jpg'):
                img_class = gt.loc[gt[0]==file].iloc[0][1]
                x = img_class.split(" ")
                x = [i.replace(":","") for i in x]
                img_class = "".join(x)

                dir_ = os.path.join(root, img_class)
                if not os.path.exists(dir_):
                    os.mkdir(dir_)
                old, new = os.path.join(root, file), os.path.join(dir_, file)
                shutil.move(old, new)
    print("done")
    
#move(validation_data_dir)

In [None]:
validation_data_dir = 'data/sc5-test'
train_data_dir = 'data/sc5'
val = sorted([i for i in os.listdir(validation_data_dir) if os.path.isdir(os.path.join(os.getcwd(),os.path.join(validation_data_dir,i)))])
train = sorted([i for i in os.listdir(train_data_dir) if os.path.isdir(os.path.join(os.getcwd(),os.path.join(train_data_dir,i)))])



In [None]:
to_keep = set(val).intersection(set(train))


In [None]:
def move2():
    for i in os.listdir(validation_data_dir):
        if i not in to_keep:
            q=os.path.join(validation_data_dir, i)
            if os.path.isdir(q):
                shutil.rmtree(q)
    print("------")
    for i in os.listdir(train_data_dir):
        if i not in to_keep:
            q=os.path.join(train_data_dir, i)
            if os.path.isdir(q):
                shutil.rmtree(q)

In [None]:
train_data_dir = 'data/sc5'
validation_data_dir = 'data/sc5-test'
#number of training images
nb_train_samples = 0
for root, dirs, files in os.walk(train_data_dir):
    nb_train_samples += len([i for i in files if i.endswith('.jpg')])

#number of images used for testing (validation)
nb_validation_samples = 0
for root, dirs, files in os.walk(validation_data_dir):
    nb_validation_samples += len([i for i in files if i.endswith('.jpg')])
print("Traning: {}\nTesting: {}".format(nb_train_samples, nb_validation_samples))

In [None]:
# CNN MODEL
def VGCC18(n_classes, img_width, img_height): #n--numebr of classes
	if K.image_data_format() == 'channels_first':
		input_shape = (3, img_width, img_height)
	else:
		input_shape = (img_width, img_height, 3)
	print(input_shape)
	model = Sequential()
	model.add(Conv2D(32, (3, 3), input_shape=input_shape))
	model.add(Activation('relu'))
	model.add(MaxPooling2D(pool_size=(2, 2)))

	model.add(Conv2D(32, (3, 3)))
	model.add(Activation('relu'))
	model.add(MaxPooling2D(pool_size=(2, 2)))

	model.add(Conv2D(64, (3, 3)))
	model.add(Activation('relu'))
	model.add(MaxPooling2D(pool_size=(2, 2)))

	model.add(Flatten())
	model.add(Dense(64))
	model.add(Activation('relu'))
	model.add(Dropout(0.5))
	model.add(Dense(n_classes)) #18 classes
	model.add(Activation('sigmoid'))
	return model

In [None]:
#size of our generated images
img_width, img_height = 240, 800

model = VGCC18(n_classes = len(to_keep),
              img_width = 240,
              img_height = 800)

#tracking our model
csv_logger = CSVLogger('training.log')

model.compile(loss='categorical_crossentropy',
              optimizer=Adam(lr=0.01),
              metrics=['accuracy'])


epochs = 5
batch_size = 200

##Data augmentation##
#for training
train_datagen = ImageDataGenerator(
    rotation_range = 30,
    rescale=1. / 255,
    zoom_range=0.2,
    horizontal_flip=True)

#for testing
test_datagen = ImageDataGenerator(rescale=1. / 255)

#further augmentation of our data
#training
train_generator = train_datagen.flow_from_directory(
    train_data_dir,
    target_size=(img_width, img_height),
    batch_size=batch_size,
    class_mode = 'categorical')
#testing
validation_generator = test_datagen.flow_from_directory(
    validation_data_dir,
    target_size=(img_width, img_height),
    batch_size=batch_size,
    class_mode = 'categorical')

model.fit_generator(
    train_generator,
    steps_per_epoch=nb_train_samples // batch_size,
    epochs=epochs,
    validation_data=validation_generator,
    validation_steps=nb_validation_samples // batch_size,
	callbacks=[csv_logger], #logging our progress
	verbose = 1)

try:
	scores = model.evaluate_generator(validation_generator, nb_validation_samples // batch_size, pickle_safe = False)
	predict = model.predict_generator(validation_generator, nb_validation_samples // batch_size, verbose=1)
	savetxt('scores.txt', scores)
	savetxt('predictions.txt', predict)
except BaseException as error:
    print('An exception occurred: {}'.format(error))

model.save_weights('my_model_weights_2.h5') #saving weights for further analysis
model.save('my_model_2.h5')

In [None]:
nb_train_samples