In [22]:
from keras.preprocessing.image import ImageDataGenerator
from keras.optimizers import Adam, SGD
from keras.models import Sequential
from keras.layers import Conv2D, MaxPooling2D
from keras.layers import Activation, Dropout, Flatten, Dense
from keras import backend as K
from keras.callbacks import CSVLogger
import os
import matplotlib.pyplot as plt
from numpy import savetxt
import pandas as pd
import shutil

In [24]:
validation_data_dir = 'data/sc5-test'
train_data_dir = 'data/sc5'

def move(validation_data_dir):
    '''
    only do this once
    '''
    gt = pd.read_csv(os.path.join(validation_data_dir, 'ground_truth.txt'),sep = ';', header = None)

    for root, dirs, files in os.walk(validation_data_dir):
        for file in files:
            if file.endswith('jpg'):
                img_class = gt.loc[gt[0]==file].iloc[0][1]
                x = img_class.split(" ")
                x = [i.replace(":","") for i in x]
                img_class = "".join(x)

                dir_ = os.path.join(root, img_class)
                if not os.path.exists(dir_):
                    os.mkdir(dir_)
                old, new = os.path.join(root, file), os.path.join(dir_, file)
                shutil.move(old, new)
    print("done")
    
move(validation_data_dir)

done


In [26]:
val = sorted([i for i in os.listdir(validation_data_dir) if os.path.isdir(os.path.join(os.getcwd(),os.path.join(validation_data_dir,i)))])
train = sorted([i for i in os.listdir(train_data_dir) if os.path.isdir(os.path.join(os.getcwd(),os.path.join(train_data_dir,i)))])

to_keep = set(val).intersection(set(train)) # number of classes to keep
len(to_keep)


18

In [20]:
def select_classes():
    for root, dirs, files in os.walk(validation_data_dir):
        c_name = root.split("/")[-1]
        if c_name in to_keep and len(os.listdir(root)) < 50:
            to_keep.remove(c_name)
            print("deleted {} with {} samples".format(c_name, len(os.listdir(root))))

In [27]:
to_keep

{'Alilaguna',
 'Ambulanza',
 'Barchino',
 'Gondola',
 'Lanciafino10m',
 'Lanciafino10mBianca',
 'Lanciafino10mMarrone',
 'Lanciamaggioredi10mBianca',
 'Motobarca',
 'Motopontonerettangolare',
 'MotoscafoACTV',
 'Mototopo',
 'Patanella',
 'Polizia',
 'Raccoltarifiuti',
 'Sandoloaremi',
 'Topa',
 'VaporettoACTV'}

In [28]:
def move2(validation_data_dir, train_data_dir, to_keep):
    '''
    gets rid of classes that dont appear on both sets
    '''
    for i in os.listdir(validation_data_dir):
        if i not in to_keep:
            q=os.path.join(validation_data_dir, i)
            if os.path.isdir(q):
                shutil.rmtree(q)
    print("------")
    for i in os.listdir(train_data_dir):
        if i not in to_keep:
            q=os.path.join(train_data_dir, i)
            if os.path.isdir(q):
                shutil.rmtree(q)
#move2(validation_data_dir, train_data_dir, to_keep)

------


In [29]:
train_data_dir = 'data/sc5'
validation_data_dir = 'data/sc5-test'
#number of training images
nb_train_samples = 0
for root, dirs, files in os.walk(train_data_dir):
    nb_train_samples += len([i for i in files if i.endswith('.jpg')])

#number of images used for testing (validation)
nb_validation_samples = 0
for root, dirs, files in os.walk(validation_data_dir):
    nb_validation_samples += len([i for i in files if i.endswith('.jpg')])
print("Traning: {}\nTesting: {}".format(nb_train_samples, nb_validation_samples))

Traning: 3810
Testing: 1252


In [60]:
# CNN MODEL
def VGCC18(n_classes, img_width, img_height): #n--numebr of classes
    if K.image_data_format() == 'channels_first':
        input_shape = (3, img_width, img_height)
    else:
        input_shape = (img_width, img_height, 3)
    print(input_shape)
    model = Sequential()
    model.add(Conv2D(32, (3, 3), input_shape=input_shape))
    model.add(Activation('relu'))
    model.add(Dropout(0.1))
    model.add(MaxPooling2D(pool_size=(2, 2)))

    model.add(Conv2D(64, (3, 3)))
    model.add(Activation('relu'))
    model.add(Dropout(0.1))
    model.add(MaxPooling2D(pool_size=(2, 2)))

    model.add(Conv2D(64, (3, 3)))
    model.add(Activation('relu'))
    model.add(Dropout(0.1))
    model.add(MaxPooling2D(pool_size=(2, 2)))

    model.add(Flatten())
    model.add(Dense(80))
    model.add(Activation('relu'))
    model.add(Dropout(0.6))
    model.add(Dense(n_classes)) #18 classes
    model.add(Activation('softmax'))
    return model

def VGCC181(n_classes, img_width, img_height):
    model = Sequential()

    model.add(Conv2D(20,(3,3),input_shape=(200,200,3)))
    model.add(Activation('relu'))
    model.add(Dropout(0.5))

    model.add(Conv2D(20,(3,3)))
    model.add(Activation('relu'))
    model.add(Dropout(0.5))

    model.add(Flatten())
    model.add(Dense(80))
    model.add(Activation('relu'))
    model.add(Dropout(0.5))
    model.add(Dense(18))
    model.add(Activation('softmax'))
    return model

In [61]:
#size of our generated images
img_width, img_height = 200, 200

model = VGCC18(n_classes = len(to_keep),
              img_width = img_width,
              img_height = img_height)

model.summary()

(200, 200, 3)
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_55 (Conv2D)           (None, 198, 198, 32)      896       
_________________________________________________________________
activation_85 (Activation)   (None, 198, 198, 32)      0         
_________________________________________________________________
dropout_58 (Dropout)         (None, 198, 198, 32)      0         
_________________________________________________________________
max_pooling2d_49 (MaxPooling (None, 99, 99, 32)        0         
_________________________________________________________________
conv2d_56 (Conv2D)           (None, 97, 97, 64)        18496     
_________________________________________________________________
activation_86 (Activation)   (None, 97, 97, 64)        0         
_________________________________________________________________
dropout_59 (Dropout)         (None, 97, 97, 64)        0      

In [None]:
#tracking our model
csv_logger = CSVLogger('training.log')

model.compile(loss='categorical_crossentropy',
              optimizer=Adam(lr=0.001),
              metrics=['accuracy'])


epochs = 10
batch_size = 32

##Data augmentation##
#for training
train_datagen = ImageDataGenerator(
    rotation_range = 30,
    rescale=1. / 255,
    zoom_range=0.2,
    horizontal_flip=True)

#for testing
test_datagen = ImageDataGenerator(rescale=1. / 255)

#further augmentation of our data
#training
train_generator = train_datagen.flow_from_directory(
    train_data_dir,
    target_size=(img_width, img_height),
    batch_size=batch_size,
    class_mode = 'categorical')
#testing
validation_generator = test_datagen.flow_from_directory(
    validation_data_dir,
    target_size=(img_width, img_height),
    batch_size=batch_size,
    class_mode = 'categorical')

model.fit_generator(
    train_generator,
    steps_per_epoch=2000 // batch_size,
    epochs=epochs,
    validation_data=validation_generator,
    validation_steps=800 // batch_size,
    callbacks=[csv_logger], #logging our progress
    verbose = 1)

Found 3810 images belonging to 18 classes.
Found 1252 images belonging to 18 classes.
Epoch 1/10

In [18]:
try:
	scores = model.evaluate_generator(validation_generator, nb_validation_samples // batch_size, pickle_safe = False)
	predict = model.predict_generator(validation_generator, nb_validation_samples // batch_size, verbose=1)
	savetxt('scores.txt', scores)
	savetxt('predictions.txt', predict)
except BaseException as error:
    print('An exception occurred: {}'.format(error))

model.save_weights('my_model_weights_2.h5') #saving weights for further analysis
model.save('my_model_2.h5')

Found 3272 images belonging to 7 classes.
Found 1125 images belonging to 7 classes.
Epoch 1/10
 3/20 [===>..........................] - ETA: 5:48 - loss: 1.9680 - acc: 0.2267

KeyboardInterrupt: 

In [None]:
history = pd.read_csv('training.log')
plt.subplot(211)
# summarize history for accuracy
plt.plot(history['acc'])
plt.plot(history['val_acc'])
plt.title('model accuracy')
plt.ylabel('accuracy')
plt.xlabel('epoch')
plt.legend(['train', 'test'], loc='lower right')
#plt.show()
plt.subplot(212)
# summarize history for loss
plt.plot(history['loss'])
plt.plot(history['val_loss'])
plt.title('model loss')
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(['train', 'test'], loc='lower right')
plt.show()

In [None]:
import numpy as np
from keras.preprocessing.image import ImageDataGenerator
from keras.models import Sequential
from keras.layers import Dropout, Flatten, Dense
from keras import applications

# dimensions of our images.
img_width, img_height = 240, 800

top_model_weights_path = 'bottleneck_fc_model.h5'

train_data_dir = 'data/sc5'
validation_data_dir = 'data/sc5-test'
#number of training images
nb_train_samples = 0
for root, dirs, files in os.walk(train_data_dir):
    nb_train_samples += len([i for i in files if i.endswith('.jpg')])

#number of images used for testing (validation)
nb_validation_samples = 0
for root, dirs, files in os.walk(validation_data_dir):
    nb_validation_samples += len([i for i in files if i.endswith('.jpg')])
print("Traning: {}\nTesting: {}".format(nb_train_samples, nb_validation_samples))

epochs = 50
batch_size = 16

In [None]:
def save_bottlebeck_features():
    datagen = ImageDataGenerator(rescale=1. / 255)

    # build the VGG16 network
    model = applications.VGG16(include_top=False, weights='imagenet')

    generator = datagen.flow_from_directory(
        train_data_dir,
        target_size=(img_width, img_height),
        batch_size=batch_size,
        class_mode='categorical',
        shuffle=False)
    bottleneck_features_train = model.predict_generator(
        generator, nb_train_samples // batch_size)
    np.save(open('bottleneck_features_train.npy', 'w'),
            bottleneck_features_train)

    generator = datagen.flow_from_directory(
        validation_data_dir,
        target_size=(img_width, img_height),
        batch_size=batch_size,
        class_mode='categorical',
        shuffle=False)
    bottleneck_features_validation = model.predict_generator(
        generator, nb_validation_samples // batch_size)
    np.save(open('bottleneck_features_validation.npy', 'w'),
            bottleneck_features_validation)


def train_top_model():
    train_data = np.load(open('bottleneck_features_train.npy'))
    train_labels = np.array(
        [0] * (nb_train_samples / 2) + [1] * (nb_train_samples / 2))

    validation_data = np.load(open('bottleneck_features_validation.npy'))
    validation_labels = np.array(
        [0] * (nb_validation_samples / 2) + [1] * (nb_validation_samples / 2))

    model = Sequential()
    model.add(Flatten(input_shape=train_data.shape[1:]))
    model.add(Dense(256, activation='relu'))
    model.add(Dropout(0.5))
    model.add(Dense(1, activation='sigmoid'))

    model.compile(optimizer='rmsprop',
                  loss='categorical_crossentropy', metrics=['accuracy'])

    model.fit(train_data, train_labels,
              epochs=epochs,
              batch_size=batch_size,
              validation_data=(validation_data, validation_labels))
    model.save_weights(top_model_weights_path)


save_bottlebeck_features()
#train_top_model()

In [None]:
import numpy as np

In [None]:
plt.imread('data/sc5/Gondola/20130410_122235_88375.jpg').shape

In [None]:
ee=0
for root, dirs, files in os.walk(train_data_dir):
        if len(os.listdir(root))>200: ee+=1
print(ee)

In [None]:
len(os.listdir(root))

In [None]:
train_data_dir