In [5]:
#Basic imports
from sys import argv, stdout

#Local imports
from common import constants
from models import model_1
from model_utils import get_image_labels, get_label_ids, model_fit_data_feeder
from utils import list_files

batch_size = 16
n_images = 20
n_epochs = 2
model_name = "model_1"

dataset = "train"
validation_split = 0.2
input_shape = constants.IMG_SHAPE
source_loc = constants.PROCESSED_DATASET_MAPPINGS[dataset]

image_labels = get_image_labels()
label_ids = get_label_ids()
num_classes = len(label_ids)
img_files = list_files(source_loc, n_images)
n_images = len(img_files)

#Training and validation sets
split_marker = int(n_images*(1 - validation_split))
train_set = img_files[:split_marker]
validation_set = img_files[split_marker:]

#Initialize the model
model = model_1(input_shape, num_classes)
model.summary()

print(len(train_set))
print(len(validation_set))

#Train the model
model.fit_generator(
    model_fit_data_feeder("training", source_loc, train_set, batch_size, image_labels, label_ids),
    steps_per_epoch = int((len(train_set) + batch_size - 1)/batch_size),
    epochs = n_epochs,
    validation_data=model_fit_data_feeder("validation", source_loc, validation_set, batch_size, image_labels, label_ids),
    validation_steps=int((len(validation_set) + batch_size - 1)/batch_size))

#Save model
print("Saving model")
model.save('model.h5', overwrite=True)

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv1d_13 (Conv1D)           (None, 398, 32)           67232     
_________________________________________________________________
conv1d_14 (Conv1D)           (None, 396, 32)           3104      
_________________________________________________________________
max_pooling1d_7 (MaxPooling1 (None, 198, 32)           0         
_________________________________________________________________
dropout_10 (Dropout)         (None, 198, 32)           0         
_________________________________________________________________
conv1d_15 (Conv1D)           (None, 196, 64)           6208      
_________________________________________________________________
conv1d_16 (Conv1D)           (None, 194, 64)           12352     
_________________________________________________________________
max_pooling1d_8 (MaxPooling1 (None, 97, 64)            0         
__________

In [None]:
def load_model_data(source_path, files, batch_size, class_name_map, label_dict, num_classes):
    #Image batch placeholder
    x = None
    
    #Labels placeholder
    y = None

   # with tqdm(total = len(files), file=stdout) as progress_bar:
    #    loaded = 0
    while True:
        shuffle(files)
        for batch_files in batch(files, batch_size):
            #Load images
            x = load_dataset(source_path, batch_files)

            #Normalize
            x = np.array(x/255)

            y = [class_name_map[label_dict[image]] for image in batch_files]
            y = to_categorical(y, num_classes = num_classes)

           # loaded += len(batch_files)
           # progress_bar.set_description("Loaded {loaded}".format(loaded = loaded))
           # progress_bar.update(len(batch_files))

            yield [x], y
    

In [None]:
files = ["0000e88ab.jpg", "000a6daec.jpg"]
batch_size = 2
for x, y in load_model_data(TRAIN_SET_LOC, files, batch_size, CLASS_NAME_MAP, LABEL_DICT, NUM_CLASSES):
    print(x[0].shape)

In [None]:
### Create label and class mapping for training set. ###

#Load labels
LABEL_DICT = {}

with open(LABEL_FILE_LOC, 'r') as handle:
    label_reader = csv.reader(handle)
    next(label_reader, None)
    
    loaded_items = 0
    for row in label_reader:
        LABEL_DICT[row[0]] = row[1]
    
#Classes
CLASS_NAMES = list(set(LABEL_DICT.values()))
CLASS_NAME_MAP = {}

class_idx = 0
for class_name in CLASS_NAMES:
    CLASS_NAME_MAP[class_name] = class_idx
    class_idx += 1

NUM_CLASSES = len(CLASS_NAMES)

print("Number of classses: {count}".format(count = NUM_CLASSES))

In [None]:
### Preprocess train dataset ###
files = list(LABEL_DICT.keys())

with tqdm(total = len(files), file=stdout) as progress_bar:
    preprocess_raw_dataset(RAW_TRAIN_SET_LOC, files, TRAIN_SET_LOC, IMG_SIZE, 256, progress_bar = progress_bar)

"""
train_raw_files = ["0000e88ab.jpg"]
image = imread(locate_img(RAW_TRAIN_SET_LOC, "0000e88ab.jpg"))
resized = load_dataset(RAW_TRAIN_SET_LOC, train_raw_files)
print(resized[0])
"""

In [None]:
def display_img(source_path, label_dict, num_files = 10):
    files = list(label_dict.keys())[:num_files]

    x = load_dataset(source_path, files)
    x = to_grayscale(x)

    y = [CLASS_NAME_MAP[LABEL_DICT[image]] for image in files]
    y = to_categorical(y, num_classes = NUM_CLASSES)

    #Print sample
    plt.figure()

    print(x[3])
    plt.imshow(x[0], cmap='gray')
    #plt.imshow(cvtColor((x[4]).astype('uint8'), COLOR_BGR2RGB)) #SAMPLE_IMG_ID

    print(y[4])

In [None]:
files = list(LABEL_DICT.keys())[:10]

x = load_dataset(TRAIN_SET_LOC, files)
x = to_grayscale(x)

y = [CLASS_NAME_MAP[LABEL_DICT[image]] for image in files]
y = to_categorical(y, num_classes = NUM_CLASSES)

#Print sample
plt.figure()

print(x[3])
plt.imshow(x[0], cmap='gray')
#plt.imshow(cvtColor((x[4]).astype('uint8'), COLOR_BGR2RGB)) #SAMPLE_IMG_ID

print(y[4])

In [None]:
### Create the model for gray-scale inputs ###
model = Sequential()

input_shape = IMG_SIZE

model.add(Conv1D(32, kernel_size = 3, activation='relu', input_shape=input_shape))
model.add(Conv1D(32, kernel_size = 3, activation='relu'))
model.add(MaxPool1D(pool_size=2))
model.add(Dropout(0.25))

model.add(Conv1D(64, kernel_size = 3, activation='relu'))
model.add(Conv1D(64, kernel_size = 3, activation='relu'))
model.add(MaxPool1D(pool_size=2))
model.add(Dropout(0.25))

model.add(Flatten())
model.add(Dense(256, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(NUM_CLASSES, activation='softmax'))

#Compile the model
sgd = SGD(lr=0.01, decay=1e-6, momentum=0.9, nesterov=True)
model.compile(optimizer=sgd, loss='categorical_crossentropy', metrics=['accuracy'])

#Print model summary
#print(model.summary())

#Training and validation sets
files = list(LABEL_DICT.keys())[:2048]
num_files = len(files)
batch_size = 128
validation_split = 0.2
split_marker = int(num_files*(1 - validation_split))
train_set = files[:split_marker]
validation_set = files[split_marker:]

#Train the model
model.fit_generator(
    load_model_data(TRAIN_SET_LOC, train_set, batch_size, CLASS_NAME_MAP, LABEL_DICT, NUM_CLASSES),
    steps_per_epoch = (len(train_set) + batch_size - 1)/batch_size,
    epochs = 20,
    validation_data=load_model_data(TRAIN_SET_LOC, validation_set, batch_size, CLASS_NAME_MAP, LABEL_DICT, NUM_CLASSES),
    validation_steps=(len(validation_set) + batch_size - 1)/batch_size)

In [None]:
### Create the model ###
model = Sequential()

input_shape = IMG_SIZE

model.add(MaxPool2D((5, 5), (2, 2), 'valid', input_shape=input_shape))

model.add(Conv2D(128, kernel_size=3, padding='same', activation='relu'))
model.add(MaxPool2D((5, 5), (2, 2), 'valid'))

model.add(Conv2D(64, kernel_size=3, activation='relu'))
model.add(MaxPool2D((5, 5), (2, 2), 'valid'))

model.add(Conv2D(32, kernel_size=3, activation='relu'))
model.add(MaxPool2D((5, 5), (2, 2), 'valid'))

model.add(Conv2D(8, kernel_size=3, activation='relu'))
model.add(MaxPool2D((5, 5), (2, 2), 'valid'))

model.add(Conv2D(4, kernel_size=3, activation='relu'))
model.add(MaxPool2D((5, 5), (2, 2), 'valid'))

model.add(Flatten())
model.add(Dense(NUM_CLASSES))
model.add(Activation('softmax'))

#Compile the model
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

#Print model summary
print(model.summary())

#Train the model
files = list(LABEL_DICT.keys())[:5096]
batch_size = 16
model.fit_generator(
    load_model_data(TRAIN_SET_LOC, files, 16, CLASS_NAME_MAP, LABEL_DICT, NUM_CLASSES),
    epochs = 20, 
    steps_per_epoch = len(files)/batch_size + 1, 
    use_multiprocessing = True)
"""
for files in batch(list(LABEL_DICT.keys()), 256):
    x, y = load_image_set(TRAIN_SET_LOC, files, CLASS_NAME_MAP, LABEL_DICT, NUM_CLASSES)
    model.fit(x, y, batch_size = 16, validation_split = 0.2, epochs=3)
"""

#VG(model_to_dot(model).create(prog='dot', format='s

In [None]:
#Scratch
#img = imread(locate_train_img("0000e88ab.jpg"))

In [None]:
print(X_TRAIN.shape)