In [1]:
import pandas as pd
import numpy as np
import tensorflow as tf
import keras
from keras import models
from keras import Sequential
from keras.layers import Dense, Dropout, Flatten, Conv2D, MaxPooling2D, BatchNormalization
from keras.optimizers import Adam, SGD
from keras.preprocessing.image import ImageDataGenerator
from keras.callbacks import ModelCheckpoint
from test_human import get_image
from sklearn import metrics
import os
import PIL

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


In [2]:
data = pd.read_csv('proposals.csv')

In [3]:
test = data[data.tile.isin(['1_24', '2_24', '3_24'])]
train = data[data.tile.isin(['1_25', '2_25', '3_25'])]

In [4]:
def make_directory(df, dirname):
    """Used to make a training and validation directory.
    Makes directory of name dirname and fills it with images in df."""
    os.system('mkdir {}'.format(dirname))
    path = './{}/'.format(dirname)
    os.system('mkdir {}0/'.format(path))
    os.system('mkdir {}1/'.format(path))
    for i, row in df.iterrows():
        id_ = row['id']
        crater = row['crater']
        img = PIL.Image.fromarray(get_image(id_))
        file_path = None
        if crater == 0:
            file_path = path + '0/'
        elif crater == 1:
            file_path = path + '1/'
        img.save(file_path+id_+'.bmp')
    return

In [5]:
#make_directory(test, 'test')
#make_directory(train, 'train')

In [14]:
batch_size = 16
width, height = 28, 28

traingen = ImageDataGenerator(rescale=1./255)
train_gen = traingen.flow_from_directory(
    './train/',
    color_mode='grayscale',
    class_mode='binary',
    target_size = (width, height),
    batch_size = batch_size
)
testgen = ImageDataGenerator(rescale=1./255)
test_gen = testgen.flow_from_directory(
    './test/',
    color_mode='grayscale',
    class_mode='binary',
    target_size = (width, height),
    batch_size = batch_size
)

Found 2750 images belonging to 2 classes.
Found 2160 images belonging to 2 classes.


In [15]:
mnist_model = models.load_model('./models/mnist_cnn2.hdf5')
mnist_model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_1 (Conv2D)            (None, 26, 26, 32)        320       
_________________________________________________________________
batch_normalization_1 (Batch (None, 26, 26, 32)        128       
_________________________________________________________________
conv2d_2 (Conv2D)            (None, 24, 24, 64)        18496     
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 12, 12, 64)        0         
_________________________________________________________________
batch_normalization_2 (Batch (None, 12, 12, 64)        256       
_________________________________________________________________
dropout_1 (Dropout)          (None, 12, 12, 64)        0         
_________________________________________________________________
flatten_1 (Flatten)          (None, 9216)              0         
__________

In [28]:
new_model = Sequential()
for i, layer in enumerate(mnist_model.layers[:8]):
    new_model.add(layer)
#for layer in new_model.layers[:4]:
#    layer.trainable=False
new_model.add(Dropout(.9, name='newdrop1'))
new_model.add(Dense(256, activation='relu'))
new_model.add(BatchNormalization(name='newnorm'))
new_model.add(Dropout(.9, name='newdrop2'))
new_model.add(Dense(1, activation='sigmoid'))

In [29]:
new_model.compile(
    loss='binary_crossentropy',
    optimizer=SGD(lr=.001),
    metrics=['accuracy']
)
new_model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_1 (Conv2D)            (None, 26, 26, 32)        320       
_________________________________________________________________
batch_normalization_1 (Batch (None, 26, 26, 32)        128       
_________________________________________________________________
conv2d_2 (Conv2D)            (None, 24, 24, 64)        18496     
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 12, 12, 64)        0         
_________________________________________________________________
batch_normalization_2 (Batch (None, 12, 12, 64)        256       
_________________________________________________________________
dropout_1 (Dropout)          (None, 12, 12, 64)        0         
_________________________________________________________________
flatten_1 (Flatten)          (None, 9216)              0         
__________

In [30]:
checkpointer = ModelCheckpoint('./models/pretrained_classifier1.hdf5', save_best_only=True, verbose=True)

history = new_model.fit_generator(
        train_gen,
        steps_per_epoch=len(train) // batch_size,
        epochs=100,
        validation_data=test_gen,
        validation_steps=len(test) // batch_size,
        verbose=True,
        shuffle=True,
        callbacks=[checkpointer]
)

Epoch 1/100
Epoch 00001: val_loss improved from inf to 0.42766, saving model to ./models/pretrained_classifier1.hdf5
Epoch 2/100
Epoch 00002: val_loss improved from 0.42766 to 0.33288, saving model to ./models/pretrained_classifier1.hdf5
Epoch 3/100
Epoch 00003: val_loss improved from 0.33288 to 0.32086, saving model to ./models/pretrained_classifier1.hdf5
Epoch 4/100
Epoch 00004: val_loss improved from 0.32086 to 0.31944, saving model to ./models/pretrained_classifier1.hdf5
Epoch 5/100
Epoch 00005: val_loss improved from 0.31944 to 0.31931, saving model to ./models/pretrained_classifier1.hdf5
Epoch 6/100
Epoch 00006: val_loss did not improve
Epoch 7/100
Epoch 00007: val_loss did not improve
Epoch 8/100
Epoch 00008: val_loss did not improve
Epoch 9/100
Epoch 00009: val_loss did not improve
Epoch 10/100
 30/171 [====>.........................] - ETA: 14s - loss: 0.5409 - acc: 0.7244

KeyboardInterrupt: 

In [37]:
model = Sequential()


model.add(BatchNormalization(input_shape=(width, height, 1)))
model.add(Conv2D(32, kernel_size=(3, 3),
                 activation='relu',
                 input_shape=(width, height, 1)))
model.add(BatchNormalization())
model.add(Conv2D(64, (3, 3), activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(BatchNormalization())
model.add(Dropout(0.25))
model.add(Flatten())
model.add(BatchNormalization())
model.add(Dropout(0.9))
model.add(Dense(256, activation='relu'))
model.add(BatchNormalization())
model.add(Dropout(0.9))
model.add(Dense(1, activation='sigmoid'))

model.compile(
    loss='binary_crossentropy',
    optimizer=SGD(lr=.001),
    metrics=['accuracy']
)
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
batch_normalization_13 (Batc (None, 28, 28, 1)         4         
_________________________________________________________________
conv2d_7 (Conv2D)            (None, 26, 26, 32)        320       
_________________________________________________________________
batch_normalization_14 (Batc (None, 26, 26, 32)        128       
_________________________________________________________________
conv2d_8 (Conv2D)            (None, 24, 24, 64)        18496     
_________________________________________________________________
max_pooling2d_4 (MaxPooling2 (None, 12, 12, 64)        0         
_________________________________________________________________
batch_normalization_15 (Batc (None, 12, 12, 64)        256       
_________________________________________________________________
dropout_10 (Dropout)         (None, 12, 12, 64)        0         
__________

In [38]:
checkpointer = ModelCheckpoint('./models/classifier2.hdf5', save_best_only=True, verbose=True)

history = model.fit_generator(
        train_gen,
        steps_per_epoch=len(train) // batch_size,
        epochs=100,
        validation_data=test_gen,
        validation_steps=len(test) // batch_size,
        verbose=True,
        shuffle=True,
        callbacks=[checkpointer]
)

Epoch 1/100
Epoch 00001: val_loss improved from inf to 0.46742, saving model to ./models/classifier2.hdf5
Epoch 2/100
Epoch 00002: val_loss improved from 0.46742 to 0.40306, saving model to ./models/classifier2.hdf5
Epoch 3/100
Epoch 00003: val_loss improved from 0.40306 to 0.37957, saving model to ./models/classifier2.hdf5
Epoch 4/100

KeyboardInterrupt: 