In [1]:
import pandas as pd
import numpy as np
import glob
from PIL import Image
from tqdm import tqdm
import imageio
import keras
from tensorflow.image import rot90, crop_to_bounding_box
import tensorflow as tf

Using TensorFlow backend.


In [2]:
path = './data/zoo/galaxy-zoo-the-galaxy-challenge/'

In [3]:
if False:

    for imagename in tqdm(glob.glob(path+'images_training_rev1/*.jpg')):
        im = Image.open(imagename)
        w, h = im.size
        im = im.crop((108, 108, w-109, h-109))
        im = im.resize((69,69))
        im.save(path+'train/'+imagename.split('/')[-1].split('.')[0]+'.png')

    for imagename in tqdm(glob.glob(path+'images_test_rev1/*.jpg')):
        im = Image.open(imagename)
        w, h = im.size
        im = im.crop((108, 108, w-109, h-109))
        im = im.resize((69,69))
        im.save(path+'test/'+imagename.split('/')[-1].split('.')[0]+'.png')

100%|██████████| 61578/61578 [27:29<00:00, 37.19it/s]  


In [8]:
def crop_rot(image, x0, y0, rot=0):
    return crop_to_bounding_box(rot90(image, k=rot), x0, y0, 45, 45)


main_input = keras.Input(shape=(69,69,3))
top_left = [keras.layers.core.Lambda(lambda x: crop_rot(x, 0, 0, k), 
                                    output_shape=(45,45,3))(main_input) for k in range(4)]
top_right = [keras.layers.core.Lambda(lambda x: crop_rot(x, 0, 24, k), 
                                    output_shape=(45,45,3))(main_input) for k in range(4)]
bottom_left = [keras.layers.core.Lambda(lambda x: crop_rot(x, 24, 0, k), 
                                    output_shape=(45,45,3))(main_input) for k in range(4)]
bottom_right = [keras.layers.core.Lambda(lambda x: crop_rot(x, 24, 24, k), 
                                    output_shape=(45,45,3))(main_input) for k in range(4)]

conv_32 = keras.layers.Conv2D(32, (6,6), activation='relu', input_shape=(45,45,3))
maxpool_20 = keras.layers.MaxPooling2D(pool_size=2)
conv_64 = keras.layers.Conv2D(64, (5,5), activation='relu')
maxpool_8 = keras.layers.MaxPooling2D(pool_size=2)
conv_128_0 = keras.layers.Conv2D(128, (3,3), activation='relu')
conv_128_1 = keras.layers.Conv2D(128, (3,3), activation='relu')
maxpool_2 = keras.layers.MaxPooling2D(pool_size=2)

convolutions_output = []

for output in [*top_left, *top_right, *bottom_left, *bottom_right]:
    x = conv_32(output)
    x = maxpool_20(x)
    x = conv_64(x)
    x = maxpool_8(x)
    x = conv_128_0(x)
    x = conv_128_1(x)
    x = maxpool_2(x)
    x = keras.layers.Flatten()(x)
    convolutions_output.append(x)
    
merge = keras.layers.concatenate(convolutions_output)
dropout_0 = keras.layers.Dropout(0.5)(merge)
dense_0 = keras.layers.Dense(2048, activation='relu')(dropout_0)
dropout_1 = keras.layers.Dropout(0.5)(dense_0)
dense_1 = keras.layers.Dense(2048, activation='relu')(dropout_1)
dropout_2 = keras.layers.Dropout(0.5)(dense_1)
main_output = keras.layers.Dense(37, activation='relu')(dropout_2)

big_model = keras.Model(inputs=[main_input], outputs=[main_output])
big_model.compile(optimizer='adam', loss='mse')
keras.utils.plot_model(big_model, to_file='model.png', show_shapes=True, show_layer_names=False)
big_model.summary()

__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_6 (InputLayer)            (None, 69, 69, 3)    0                                            
__________________________________________________________________________________________________
lambda_81 (Lambda)              (None, 45, 45, 3)    0           input_6[0][0]                    
__________________________________________________________________________________________________
lambda_82 (Lambda)              (None, 45, 45, 3)    0           input_6[0][0]                    
__________________________________________________________________________________________________
lambda_83 (Lambda)              (None, 45, 45, 3)    0           input_6[0][0]                    
__________________________________________________________________________________________________
lambda_84 

In [3]:
train_set = np.empty((61578, 69, 69, 3), dtype='int16')
i = 0
for imagename in tqdm(glob.glob(path+'train/*.png')):
    train_set[i] = imageio.imread(imagename)
    i += 1

100%|██████████| 61578/61578 [19:01<00:00, 53.94it/s]


In [15]:
solutions = pd.read_csv(path+'training_solutions_rev1.csv', index_col='GalaxyID')
solutions.head()

Unnamed: 0_level_0,Class1.1,Class1.2,Class1.3,Class2.1,Class2.2,Class3.1,Class3.2,Class4.1,Class4.2,Class5.1,Class5.2,Class5.3,Class5.4,Class6.1,Class6.2,Class7.1,Class7.2,Class7.3,Class8.1,Class8.2,Class8.3,Class8.4,Class8.5,Class8.6,Class8.7,Class9.1,Class9.2,Class9.3,Class10.1,Class10.2,Class10.3,Class11.1,Class11.2,Class11.3,Class11.4,Class11.5,Class11.6
GalaxyID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1
100008,0.383147,0.616853,0.0,0.0,0.616853,0.038452,0.578401,0.418398,0.198455,0.0,0.104752,0.512101,0.0,0.054453,0.945547,0.201463,0.181684,0.0,0.0,0.027226,0.0,0.027226,0.0,0.0,0.0,0.0,0.0,0.0,0.279952,0.138445,0.0,0.0,0.092886,0.0,0.0,0.0,0.325512
100023,0.327001,0.663777,0.009222,0.031178,0.632599,0.46737,0.165229,0.591328,0.041271,0.0,0.236781,0.160941,0.234877,0.189149,0.810851,0.0,0.135082,0.191919,0.0,0.0,0.140353,0.0,0.048796,0.0,0.0,0.012414,0.0,0.018764,0.0,0.131378,0.45995,0.0,0.591328,0.0,0.0,0.0,0.0
100053,0.765717,0.177352,0.056931,0.0,0.177352,0.0,0.177352,0.0,0.177352,0.0,0.11779,0.059562,0.0,0.0,1.0,0.0,0.741864,0.023853,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
100078,0.693377,0.238564,0.068059,0.0,0.238564,0.109493,0.129071,0.189098,0.049466,0.0,0.0,0.113284,0.12528,0.320398,0.679602,0.408599,0.284778,0.0,0.0,0.0,0.096119,0.096119,0.0,0.128159,0.0,0.0,0.0,0.0,0.094549,0.0,0.094549,0.189098,0.0,0.0,0.0,0.0,0.0
100090,0.933839,0.0,0.066161,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.029383,0.970617,0.494587,0.439252,0.0,0.0,0.0,0.0,0.0,0.0,0.029383,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [16]:
X_train = train_set[:50000]
X_valid = train_set[50000:]
y_train = solutions.iloc[:50000, :]
y_valid = solutions.iloc[50000:, :]

In [17]:
callbacks = [
    keras.callbacks.ModelCheckpoint('best_weights.h5', save_best_only=True, save_weights_only=True),  # save best model
    keras.callbacks.EarlyStopping(patience=20, verbose=1)  # early stop
]

big_model.fit(X_train, y_train, batch_size=100, epochs=100, 
              callbacks=callbacks, shuffle=True, 
              validation_data=(X_valid, y_valid))

Instructions for updating:
Use tf.cast instead.
Train on 50000 samples, validate on 11578 samples
Epoch 1/100
Epoch 2/100

KeyboardInterrupt: 

In [None]:
del train_set
del X_train
del X_valid

test_set = np.empty((79975, 69, 69, 3), dtype='int16')
i = 0
for imagename in tqdm(glob.glob(path+'test/*.png')):
    test_set[i] = imageio.imread(imagename)
    i += 1

In [None]:
submission = model.predict(test_set)

In [None]:
tmp = pd.DataFrame(submission)
tmp.head()

In [None]:
tmp.describe()