# Gleam
Gleam ("lueur") est le nom donné à ce notebook, le produit de ce travail de bachelor. Il permet d'entraîner un réseau de neurones à convolutions sur des images satellites nocturnes dans le but de prédire la répartition de la population sur une vaste région avec une précision supérieure à toutes autres sources de données disponibles.

## Région d'intérêt
La région qui nous intéresse le plus dans ce travail est la **Colombie**. Le conflit armé colombien s'est apaisé durant ces 5 dernières années, ce qui a engendré un fort déplacement de la population vers la forêt amazonienne devenue moins dangereuse. Les rencements disponibles ne permettent pas de quantifier ni de localiser avec précision ces mouvements de population. Les images satellites nocturnes récentes, cependant, peuvent révéler ces informations.

## Fichiers
Les jeux de données en entrée doivent être des **rasters** au format GeoTIFF possédant deux couches. La première couche est l'image satellite, la seconde doit contenir les données de densité de population (habitants par pixel).

# Imports

In [27]:
import rasterio
import numpy as np
import keras.layers.core as core
import keras.layers.convolutional as conv
import keras.models as models
import keras.callbacks
from sklearn.model_selection import KFold
from keras import optimizers
import time

# Parameters

In [28]:
input_tile_size = 32
input_tile_offset = 32 # same as input_tile_size for testing purposes, otherwise set to 8
training_dataset = '../../data/lightpop_merged/2015_brazil.tif'
nb_epoch = 200

# Preprocessing

In [29]:

print('opening raster')

raster = rasterio.open(training_dataset)

matrix_x = raster.read(1)
matrix_y = raster.read(2)

X = []
Y = []
col = 0
while col + input_tile_size < matrix_x.shape[1]:
    row = 0
    while row + input_tile_size < matrix_x.shape[0]:
        pop = np.sum(matrix_y[row: row + input_tile_size, col: col + input_tile_size])
        # only use tiles that have people living on it
        if pop > 0:
            X.append(matrix_x[row: row + input_tile_size, col: col + input_tile_size])
            Y.append(pop)

        row += input_tile_offset
    col += input_tile_offset

raster.close()

matrix_x, matrix_y = None, None  # free some memory
X, Y = np.array(X), np.array(Y)
X = np.expand_dims(X, axis=3)  # add the color channel as a new dimension

print('input shape (observations, obs_width, obs_height, channels) : ' + str(X.shape))

opening raster
input shape (observations, obs_width, obs_height, channels) : (39157, 32, 32, 1)


# Training

In [None]:
model_birthday = time.strftime("%Y-%m-%d_%H-%M-%S", time.gmtime())  # used to identify generated files (logs and models)

print('configuring cnn')

# kernel size for every convolution layer
kernel_size = (3, 3)

# input dimensions
img_count, img_rows, img_cols, img_channel_count = X.shape

# k-fold split
kfold = KFold(n_splits = 4, shuffle=True, random_state=None)

# initialize statistics aggregate
kfold_mse = []
kfold_mae = []
kfold_sae = []

print('logs will be saved to logs/' + model_birthday)

for train, test in kfold.split(X, Y):
    cnn = models.Sequential()
    cnn.add(conv.Convolution2D(filters=32, kernel_size=kernel_size, activation="relu", padding='same',
                               input_shape=(img_rows, img_cols, img_channel_count)))
    
    cnn.add(conv.MaxPooling2D(strides=(2, 2)))
    
    cnn.add(conv.Convolution2D(filters=64, kernel_size=kernel_size, activation="relu", padding='same'))

    cnn.add(conv.MaxPooling2D(strides=(2, 2)))
    
    cnn.add(conv.Convolution2D(filters=128, kernel_size=kernel_size, activation="relu", padding='same'))

    cnn.add(conv.MaxPooling2D(strides=(2, 2)))
    
    cnn.add(core.Flatten())
    cnn.add(core.Dropout(0.5))
    cnn.add(core.Dense(64))
    cnn.add(core.Dense(1))

    cnn.summary()
    cnn.compile(loss="mean_squared_error", optimizer=optimizers.Adam(lr=0.02, decay=0.0), metrics=["mse", "mae"])

    # logs for tensorboard
    tensorboard = keras.callbacks.TensorBoard(log_dir="logs/" + model_birthday)

    # checkpoints
    # checkpoint = keras.callbacks.ModelCheckpoint('models/' + model_birthday + '.h5', save_weights_only=False)

    # reduce learning rate when we stopped learning anything
    rlrp = keras.callbacks.ReduceLROnPlateau(monitor='loss', factor=0.5, patience=15, verbose=1, mode='auto',
                                             min_lr=0.00001)

    print('training ...')

    cnn.fit(X[train], Y[train], batch_size=1024, epochs=nb_epoch, verbose=2, callbacks=[tensorboard, rlrp],
            sample_weight=None)

    cnn.save('models/' + model_birthday + '.h5')

    print('model saved to models/' + model_birthday + '.h5')
    
    evaluation = cnn.evaluate(X[test], Y[test], verbose=2, batch_size=1024)
    evaluation = dict(zip(cnn.metrics_names, evaluation))
    kfold_mse.append(evaluation['mean_squared_error'])
    kfold_mae.append(evaluation['mean_absolute_error'])
    kfold_sae.append(evaluation['mean_absolute_error'] * len(Y[test]))

print('done !')
print('K-fold validation results :')
print('Mean squared error : %.2f (std %.2f)' % (np.mean(kfold_mse), np.std(kfold_mse)))
print('Mean absolute error : %.2f (std %.2f)' % (np.mean(kfold_mae), np.std(kfold_mae)))
print('Sum of absolute errors : %.2f (std %.2f)' % (np.mean(kfold_sae), np.std(kfold_sae)))

configuring cnn
logs will be saved to logs/2018-07-04_19-52-38
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_91 (Conv2D)           (None, 32, 32, 32)        320       
_________________________________________________________________
max_pooling2d_85 (MaxPooling (None, 16, 16, 32)        0         
_________________________________________________________________
conv2d_92 (Conv2D)           (None, 16, 16, 64)        18496     
_________________________________________________________________
max_pooling2d_86 (MaxPooling (None, 8, 8, 64)          0         
_________________________________________________________________
conv2d_93 (Conv2D)           (None, 8, 8, 128)         73856     
_________________________________________________________________
max_pooling2d_87 (MaxPooling (None, 4, 4, 128)         0         
_________________________________________________________________
flatten_42 (F

Epoch 58/200
 - 1s - loss: 1166713195.1167 - mean_squared_error: 1166713195.1167 - mean_absolute_error: 8072.8101
Epoch 59/200
 - 1s - loss: 1495981043.7697 - mean_squared_error: 1495981043.7697 - mean_absolute_error: 8317.2677
Epoch 60/200
 - 1s - loss: 2401860914.5122 - mean_squared_error: 2401860914.5122 - mean_absolute_error: 8645.1609
Epoch 61/200
 - 1s - loss: 2199968410.1518 - mean_squared_error: 2199968410.1518 - mean_absolute_error: 8568.7772
Epoch 62/200
 - 1s - loss: 2470144680.2825 - mean_squared_error: 2470144680.2825 - mean_absolute_error: 9047.3187
Epoch 63/200
 - 1s - loss: 1832191261.1920 - mean_squared_error: 1832191261.1920 - mean_absolute_error: 8569.0856
Epoch 64/200
 - 1s - loss: 1943642430.8079 - mean_squared_error: 1943642430.8079 - mean_absolute_error: 8375.1442
Epoch 65/200
 - 1s - loss: 3198273348.7400 - mean_squared_error: 3198273348.7400 - mean_absolute_error: 9006.8213
Epoch 66/200
 - 1s - loss: 3895954891.0949 - mean_squared_error: 3895954891.0949 - mean_

Epoch 129/200
 - 1s - loss: 941793873.0575 - mean_squared_error: 941793873.0575 - mean_absolute_error: 7101.1491
Epoch 130/200
 - 1s - loss: 728956438.9744 - mean_squared_error: 728956438.9744 - mean_absolute_error: 6853.6922
Epoch 131/200
 - 1s - loss: 783179013.9800 - mean_squared_error: 783179013.9800 - mean_absolute_error: 6979.3213
Epoch 132/200
 - 1s - loss: 723648977.2972 - mean_squared_error: 723648977.2972 - mean_absolute_error: 6747.9808
Epoch 133/200
 - 1s - loss: 846870211.2210 - mean_squared_error: 846870211.2210 - mean_absolute_error: 6886.0574
Epoch 134/200
 - 1s - loss: 806211475.0930 - mean_squared_error: 806211475.0930 - mean_absolute_error: 6990.5834
Epoch 135/200
 - 1s - loss: 952563873.8056 - mean_squared_error: 952563873.8056 - mean_absolute_error: 7041.0539
Epoch 136/200
 - 1s - loss: 826774020.5657 - mean_squared_error: 826774020.5657 - mean_absolute_error: 6964.6706
Epoch 137/200
 - 1s - loss: 1045515602.9949 - mean_squared_error: 1045515602.9949 - mean_absolut

Epoch 200/200
 - 1s - loss: 560818546.1788 - mean_squared_error: 560818546.1788 - mean_absolute_error: 6360.0828
model saved to models/2018-07-04_19-52-38.h5
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_94 (Conv2D)           (None, 32, 32, 32)        320       
_________________________________________________________________
max_pooling2d_88 (MaxPooling (None, 16, 16, 32)        0         
_________________________________________________________________
conv2d_95 (Conv2D)           (None, 16, 16, 64)        18496     
_________________________________________________________________
max_pooling2d_89 (MaxPooling (None, 8, 8, 64)          0         
_________________________________________________________________
conv2d_96 (Conv2D)           (None, 8, 8, 128)         73856     
_________________________________________________________________
max_pooling2d_90 (MaxPooling (None, 4, 4, 128)    

Epoch 57/200
 - 1s - loss: 2578140224.6102 - mean_squared_error: 2578140224.6102 - mean_absolute_error: 8947.8570
Epoch 58/200
 - 1s - loss: 2320514174.3786 - mean_squared_error: 2320514174.3786 - mean_absolute_error: 9038.2258
Epoch 59/200
 - 1s - loss: 3435752404.2408 - mean_squared_error: 3435752404.2408 - mean_absolute_error: 9692.3188
Epoch 60/200
 - 1s - loss: 2038644457.8589 - mean_squared_error: 2038644457.8589 - mean_absolute_error: 8944.1723
Epoch 61/200
 - 1s - loss: 2111764076.8227 - mean_squared_error: 2111764076.8227 - mean_absolute_error: 8824.9583
Epoch 62/200
 - 1s - loss: 2179568935.6797 - mean_squared_error: 2179568935.6797 - mean_absolute_error: 9256.0779
Epoch 63/200
 - 1s - loss: 2150579267.6786 - mean_squared_error: 2150579267.6786 - mean_absolute_error: 9162.1626
Epoch 64/200
 - 1s - loss: 1874447850.8003 - mean_squared_error: 1874447850.8003 - mean_absolute_error: 8771.6225
Epoch 65/200
 - 1s - loss: 1769053732.2277 - mean_squared_error: 1769053732.2277 - mean_

Epoch 129/200
 - 1s - loss: 702858991.2983 - mean_squared_error: 702858991.2983 - mean_absolute_error: 6839.5028
Epoch 130/200
 - 1s - loss: 844564631.7625 - mean_squared_error: 844564631.7625 - mean_absolute_error: 7016.2806
Epoch 131/200
 - 1s - loss: 676928206.7142 - mean_squared_error: 676928206.7142 - mean_absolute_error: 6827.1797
Epoch 132/200
 - 1s - loss: 653978188.7791 - mean_squared_error: 653978188.7791 - mean_absolute_error: 6725.6305
Epoch 133/200
 - 1s - loss: 696689993.7630 - mean_squared_error: 696689993.7630 - mean_absolute_error: 6765.5115
Epoch 134/200
 - 1s - loss: 718618529.4296 - mean_squared_error: 718618529.4296 - mean_absolute_error: 6931.3034
Epoch 135/200
 - 1s - loss: 733762043.7461 - mean_squared_error: 733762043.7461 - mean_absolute_error: 6954.1398
Epoch 136/200
 - 1s - loss: 824494248.9000 - mean_squared_error: 824494248.9000 - mean_absolute_error: 6966.3232
Epoch 137/200
 - 1s - loss: 680852909.0232 - mean_squared_error: 680852909.0232 - mean_absolute_

# Prediction


In [None]:
prediction_dataset = '../../data/lightrasters_noaa/2012_south_america.tif'

print('loading model')

cnn = models.load_model("models/" + model_birthday + ".h5")

print('opening raster')

raster = rasterio.open(prediction_dataset)
band = raster.read(1)
profile = raster.profile
profile.update(count=1)
width, height = raster.width, raster.height

# preprocess
matrix_x = raster.read(1)
tiles_x = []
y = 0
while y + input_tile_size < matrix_x.shape[1]:
    x = 0
    while x + input_tile_size < matrix_x.shape[0]:
        tiles_x.append(matrix_x[x: x + input_tile_size, y: y + input_tile_size])
        x += input_tile_size
    y += input_tile_size
testX = np.array(tiles_x)

raster.close()
testX = np.expand_dims(testX, axis=3)

print('generating raster')

predicted_tiles = cnn.predict(testX, verbose=0)

tiles_x = []
predicted_raster = np.zeros(shape=(raster.height, raster.width))
y = 0
pred_index = 0
while y + input_tile_size < width:
    x = 0
    while x + input_tile_size < height:
        in_tile = band[x: x + input_tile_size, y: y + input_tile_size]
        if np.max(in_tile) <= 0:
            predicted_raster[x: x + input_tile_size, y: y + input_tile_size] = 0
        else:
            weights = in_tile / np.max(in_tile)  # normalize visible light between 0 and 1 to avoid overflows
            weights = np.exp(weights) - 1  # visible light is perceived logarithmically => counteract with exp
            weights = weights / np.sum(weights)  # the sum of all weights must be 1
            predicted_raster[x: x + input_tile_size, y: y + input_tile_size] = predicted_tiles[pred_index] * weights

        pred_index += 1
        x += input_tile_size
    y += input_tile_size

predicted_raster = np.array(predicted_raster)

with rasterio.open('predictions/' + model_birthday + '.tif', 'w', **profile) as dst:
    dst.write(predicted_raster.astype(rasterio.float32), 1)
print("prediction saved to predictions/" + model_birthday + '.tif')

print('prediction done !')
