# Hackathon

Some utilities

## Import Utils

In [1]:
!pip install --upgrade pip

Requirement already up-to-date: pip in /usr/local/lib/python3.5/dist-packages


In [2]:
!pip install keras



In [20]:
import keras
import h5py as h5
import numpy as np

PATH_DATA = '../full.h5'
PATH_PREDICT_WITHOUT_GT = '../pred_eighties_from_full_1_without_gt.h5'
PATH_SUBMIT = 'pred_from_full_Mostafa_Paul.h5'

In [2]:
BATCH_SIZE = 64
from keras.models import Sequential
from keras.layers import Dense, Flatten, Conv2D, BatchNormalization, Activation, MaxPooling2D
import keras.layers.normalization 
from keras.callbacks import Callback
%load_ext autoreload
%autoreload 2

In [3]:
def get_idxs(h5_path):
    f = h5.File(h5_path)
    return range(len(f['S2']))

def shuffle_idx(sample_idxs):
    return list(np.random.permutation(sample_idxs))

def split_train_val(sample_idxs, proportion):
    n_samples = len(sample_idxs)
    return sample_idxs[:int((1.-proportion)*n_samples)], sample_idxs[int((1.-proportion)*n_samples):]

def get_batch_count(idxs, batch_size):
    batch_count = int(len(idxs)//batch_size)
    remained_samples = len(idxs)%batch_size
    if remained_samples > 0:
        batch_count += 1

    return batch_count

In [4]:
def generator(h5_path, batch_size, idxs):
    f = h5.File(h5_path, 'r')
    idxs = shuffle_idx(idxs)
    while True : 
        rd = np.random.randint(len(idxs)-10000)
        my_idxs = shuffle_idx(idxs[rd:rd+10000])
        batch_count = get_batch_count(my_idxs, batch_size)
        for b in range(batch_count):
            batch_idxs = my_idxs[b*batch_size:(b+1)*batch_size]
            batch_idxs = sorted(batch_idxs)
            X = f['S2'][batch_idxs, :,:,:]
            Y = f['TOP_LANDCOVER'][batch_idxs, :]
            yield np.array(X), keras.utils.np_utils.to_categorical(np.array(Y), 23)

In [5]:
idxs = get_idxs(PATH_DATA)
shuffled_idxs = shuffle_idx(idxs)
train_idxs, val_idxs = split_train_val(shuffled_idxs, 0.2)

In [6]:
train_gen = generator(PATH_DATA, BATCH_SIZE, train_idxs)
train_batch_count = get_batch_count(train_idxs, BATCH_SIZE)

val_gen = generator(PATH_DATA, BATCH_SIZE, val_idxs)
val_batch_count = get_batch_count(val_idxs, BATCH_SIZE)

In [7]:
print(train_batch_count, val_batch_count)

233728 58432


# Instanciation du model

In [8]:
input_shape = (16,16,4)

In [25]:


from keras.models import Model
from keras.layers import Input, Conv3D, Conv2D, Dropout, MaxPooling2D, Flatten, Activation, AveragePooling2D, concatenate, add



inp = Input(shape = input_shape)

x = Conv2D(32, (3,3))(inp)
x = BatchNormalization(axis=-1)(x)
x = Activation("relu")(x)
x = Conv2D(32, (3,3))(x)
x = BatchNormalization(axis=-1)(x)
x = Activation("relu")(x)
x = MaxPooling2D(pool_size = (2,2))(x)

x = Conv2D(64, (3,3))(x)
x = BatchNormalization(axis=-1)(x)
x = Activation("relu")(x)
x = Conv2D(64, (3,3))(x)
x = BatchNormalization(axis=-1)(x)
x = Activation("relu")(x)
x = MaxPooling2D(pool_size = (2,2))(x)


x = Flatten()(x)

x = Dense(256)(x)
x = BatchNormalization()(x)
x = Activation("relu")(x)
x = Dropout(0.2)(x)
x = Dense(23)(x)
x = Activation('softmax')(x)

model = Model(inp, x)

optim = keras.optimizers.Adam(lr=0.0001)
#optim = keras.optimizers.rmsprop(lr=0.001, decay=1e-6)

model.compile(optimizer=optim,
              loss='categorical_crossentropy',
              metrics=['accuracy'])


In [26]:
from keras.models import Model
from keras.layers import Input, Conv3D, Conv2D, Dropout, MaxPooling2D, Flatten, Activation, AveragePooling2D, concatenate, add



inp = Input(shape = input_shape)

x1 = Conv2D(32, (3,3))(inp)
x1 = BatchNormalization(axis=-1)(x1)
x1 = Activation("relu")(x1)

x2 = Conv2D(32, (1,1))(x1)
x2 = BatchNormalization(axis=-1)(x2)
x2 = Activation("relu")(x2)


conc1 = concatenate([x1, x2])
conc1 = MaxPooling2D(pool_size = (2,2))(conc1)


x3 = Conv2D(64, (3,3))(conc1)
x3 = BatchNormalization(axis=-1)(x3)
x3 = Activation("relu")(x3)

x4 = Conv2D(64, (1,1))(x3)
x4 = BatchNormalization(axis=-1)(x4)
x4 = Activation("relu")(x4)

conc2 = concatenate([x3, x4])
conc2 = MaxPooling2D(pool_size = (2,2))(conc2)

x5 = Flatten()(conc2)


x5 = Dense(256)(x5)
x5 = BatchNormalization()(x5)
x5 = Activation("relu")(x5)
x5 = Dropout(0.2)(x5)
x5 = Dense(23)(x5)
x5 = Activation('softmax')(x5)

model = Model(inp, x5)

optim = keras.optimizers.Adam(lr=0.0001)
#optim = keras.optimizers.rmsprop(lr=0.001, decay=1e-6)

model.compile(optimizer=optim,
              loss='categorical_crossentropy',
              metrics=['accuracy'])


In [9]:
from keras.models import Model
from keras.layers import Input, Conv3D, Conv2D, Dropout, MaxPooling2D, Flatten, Activation, AveragePooling2D, concatenate, add


inp = Input(shape = input_shape)

x1 = Conv2D(32, (3,3), padding = 'same')(inp)
x1 = BatchNormalization(axis=-1)(x1)
x1 = Activation("relu")(x1)

conc0 = concatenate([x1, inp])

x2 = Conv2D(32, (3,3), padding = 'same')(x1)
x2 = BatchNormalization(axis=-1)(x2)
x2 = Activation("relu")(x2)


conc1 = concatenate([conc0, x2])
#conc1 = MaxPooling2D(pool_size = (2,2))(conc1) #remove Maxpooling


x3 = Conv2D(32, (3,3), padding = 'same')(conc1)
x3 = BatchNormalization(axis=-1)(x3)
x3 = Activation("relu")(x3)

conc2 = concatenate([x3, conc1])
conc2 = MaxPooling2D(pool_size = (2,2))(conc2)

x4 = Conv2D(64, (3,3), padding = 'same')(conc2)
x4 = BatchNormalization(axis=-1)(x4)
x4 = Activation("relu")(x4)

conc3 = concatenate([conc2, x4])

x5 = Conv2D(64, (3,3), padding = 'same')(conc3) # change to 128
x5 = BatchNormalization(axis=-1)(x5)
x5 = Activation("relu")(x5)

conc4 = concatenate([x5, conc3])

conc4 = MaxPooling2D(pool_size = (2,2))(conc4)

# add 256

out = Flatten()(conc4)


out = Dense(512)(out)
out = BatchNormalization()(out)
out = Activation("relu")(out)
out = Dropout(0.2)(out)
out = Dense(23)(out)
out = Activation('softmax')(out)

model2 = Model(inp, out)

optim = keras.optimizers.Adam(lr=0.0001)
#optim = keras.optimizers.rmsprop(lr=0.001, decay=1e-6)

model2.compile(optimizer=optim,
              loss='categorical_crossentropy',
              metrics=['accuracy'])

In [11]:
model2.summary()
from keras.callbacks import EarlyStopping
cback = EarlyStopping(monitor ='val_loss', patience = 3, mode = 'min')

__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            (None, 16, 16, 4)    0                                            
__________________________________________________________________________________________________
conv2d_1 (Conv2D)               (None, 16, 16, 32)   1184        input_1[0][0]                    
__________________________________________________________________________________________________
batch_normalization_1 (BatchNor (None, 16, 16, 32)   128         conv2d_1[0][0]                   
__________________________________________________________________________________________________
activation_1 (Activation)       (None, 16, 16, 32)   0           batch_normalization_1[0][0]      
__________________________________________________________________________________________________
conv2d_2 (

# Fit

In [12]:
history = model2.fit_generator(train_gen, steps_per_epoch=100, epochs=15, verbose=1, validation_data=val_gen, nb_val_samples=20)

  """Entry point for launching an IPython kernel.
  """Entry point for launching an IPython kernel.


Epoch 1/15

  % delta_t_median)




  % delta_t_median)


Epoch 2/15

  % delta_t_median)


Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
 13/100 [==>...........................] - ETA: 6:10 - loss: 1.3717 - acc: 0.5481

KeyboardInterrupt: 

In [31]:
model.summary()


__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_6 (InputLayer)            (None, 16, 16, 4)    0                                            
__________________________________________________________________________________________________
conv2d_26 (Conv2D)              (None, 16, 16, 32)   1184        input_6[0][0]                    
__________________________________________________________________________________________________
batch_normalization_30 (BatchNo (None, 16, 16, 32)   128         conv2d_26[0][0]                  
__________________________________________________________________________________________________
activation_34 (Activation)      (None, 16, 16, 32)   0           batch_normalization_30[0][0]     
__________________________________________________________________________________________________
conv2d_27 

In [13]:
model2.save("../shity_model.dqf")

## Prediction routines

In order to submit a result here are some gits

In [16]:
import os 
def prediction_generator(h5_path, batch_size, idxs):
    f = h5.File(h5_path, 'r')

    batch_count = get_batch_count(idxs, batch_size)
    
    for b in range(batch_count):
        batch_idxs = idxs[b*batch_size:(b+1)*batch_size]
        batch_idxs = sorted(batch_idxs)
        X = f['S2'][batch_idxs, :,:,:]
        yield np.array(X)

def build_h5_pred_file(pred, h5_output_path):
    if os.path.exists(h5_output_path):
        os.remove(h5_output_path)
    f = h5.File(h5_output_path, 'w')
    top_landcover_submit = f.create_dataset("TOP_LANDCOVER", (len(pred), 1), maxshape=(None, 1))
    top_landcover_submit[:, 0] = pred
    f.close()
    
    return 1

In [17]:
from keras.models import load_model
model = load_model("../shity_model.dqf")

In [18]:
model.summary()


__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            (None, 16, 16, 4)    0                                            
__________________________________________________________________________________________________
conv2d_1 (Conv2D)               (None, 16, 16, 32)   1184        input_1[0][0]                    
__________________________________________________________________________________________________
batch_normalization_1 (BatchNor (None, 16, 16, 32)   128         conv2d_1[0][0]                   
__________________________________________________________________________________________________
activation_1 (Activation)       (None, 16, 16, 32)   0           batch_normalization_1[0][0]      
__________________________________________________________________________________________________
conv2d_2 (

In [19]:
pred_idx = get_idxs(PATH_PREDICT_WITHOUT_GT)
print(len(pred_idx))
pred_gen = prediction_generator(PATH_PREDICT_WITHOUT_GT, BATCH_SIZE, pred_idx)
prediction = model.predict_generator(pred_gen, steps=get_batch_count(pred_idx, BATCH_SIZE), verbose=1)
print(len(prediction))
build_h5_pred_file(np.argmax(prediction, axis = 1), PATH_SUBMIT)

241700
241700


1

## Some ideas for monitoring

In [None]:
def gt_generator(h5_path, batch_size, idxs):
    f = h5.File(h5_path, 'r')

    batch_count = get_batch_count(idxs, batch_size)
    
    for b in range(batch_count):
        batch_idxs = idxs[b*batch_size:(b+1)*batch_size]
        batch_idxs = sorted(batch_idxs)
        Y = f['TOP_LANDCOVER'][batch_idxs, :]
        yield keras.utils.np_utils.to_categorical(np.array(Y), 23)

gt_gen = gt_generator(PATH_PREDICT_WITH_GT, BATCH_SIZE, pred_idx)
gt = []
for elem in gt_gen:
    gt.append(elem)
gt = np.vstack(gt)

In [None]:
import matplotlib.pyplot as plt
import itertools
def plot_confusion_matrix(cm, classes,
                          normalize=False,
                          title='Confusion matrix',
                          cmap=plt.cm.Blues):
    """
    This function prints and plots the confusion matrix.
    Normalization can be applied by setting `normalize=True`.
    """
    if normalize:
        cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]
        print("Normalized confusion matrix")
    else:
        print('Confusion matrix, without normalization')

    plt.imshow(cm, interpolation='nearest', cmap=cmap)
    plt.title(title)
    plt.colorbar()
    tick_marks = np.arange(len(classes))
    plt.xticks(tick_marks, classes, rotation=45)
    plt.yticks(tick_marks, classes)

    fmt = '.2f' if normalize else 'd'
    thresh = cm.max() / 2.
    for i in range(cm.shape[0]):
        for j in range(cm.shape[1]):
            plt.text(j, i, format(cm[i, j], fmt),
                     horizontalalignment="center",
                     color="white" if cm[i, j] > thresh else "black",fontsize=7)

    plt.tight_layout()
    plt.ylabel('True label')
    plt.xlabel('Predicted label')

In [None]:
def clean_confusion_matrix(confusion_matrix, classes):
    real_classes = []
    for c in range(len(classes)):
        if np.sum(confusion_matrix[:,c])+np.sum(confusion_matrix[c, :]) != 0:
            real_classes.append(c)
    real_confusion_matrix = np.empty((len(real_classes), len(real_classes)))  
    for c_index in range(len(real_classes)):
        real_confusion_matrix[c_index,:] = confusion_matrix[real_classes[c_index], real_classes]
    return real_confusion_matrix, real_classes

In [None]:
%matplotlib notebook
from sklearn.metrics import confusion_matrix
y_true = np.argmax(gt, axis=1)
y_pred = np.argmax(prediction, axis = 1)

real_cnf_matrix, real_classes = clean_confusion_matrix(confusion_matrix(y_true, y_pred, labels= range(23)), range(23))
plot_confusion_matrix(real_cnf_matrix, classes = real_classes, normalize=True)

In [None]:
somme = 0
for i in range (len(real_cnf_matrix)):    
    somme = somme + real_cnf_matrix[i,i] 
somme_t = sum(sum(real_cnf_matrix))
somme/somme_t


In [21]:
import pandas as pd
y_pred = np.argmax(prediction, axis = 1)
PATH_SUBMIT1 = "result_bfm_3.csv"
df2 = pd.DataFrame(y_pred, columns=['TOP_LANDCOVER'])
df2.to_csv(PATH_SUBMIT1, index_label="ID")