## Import libraries

In [1]:
import tensorflow as tf
import pandas as pd
import numpy as np
from tqdm import tqdm
from sklearn.model_selection import train_test_split

from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping, ReduceLROnPlateau
from tensorflow.keras.applications.inception_v3 import InceptionV3

from utils import *

## Load training and test Sentinel data

In [136]:
with open("s1_colnames.txt", "r") as f:
    s1_colnames = f.read().split(',')

In [153]:
s1_train = pd.read_csv('data/s1/train_dataset.csv', names = s1_colnames)
s1_test = pd.read_csv('data/s1/test_dataset.csv', names = s1_colnames)

In [160]:
with open("s2_colnames.txt", "r") as f:
    s2_colnames = f.read().split(',')

In [161]:
s2_train = pd.read_csv('data/s2/train_dataset.csv', names = s2_colnames)
s2_test = pd.read_csv('data/s2/test_dataset.csv', names = s2_colnames)

## Calclulate class weights to use for training

In [145]:
from sklearn.utils import class_weight
class_weights = class_weight.compute_class_weight(
                                        class_weight = "balanced",
                                        classes = np.unique(s2_train.label),
                                        y = s2_train.label                                                 
                                    )
class_weights = dict(zip(np.unique(s2_train.label), class_weights))

## Get monthly mean values of Sentinel-2 bands

In [164]:
bands =  ['B02', 'B03', 'B04', 'B05', 'B06', 'B07', 'B08', 'B8A', 'B11', 'B12']
monthly_train = []
s2_train_full = []
for b in tqdm(bands):
    df = s2_train.filter(regex=b)
    s2_train_full.append(df)
    df.columns = [pd.to_datetime(x.split("_")[-1]).date().month  for x in df.columns]
    df = df.groupby(level=0, axis=1).apply(lambda x: x.apply(np.mean, axis=1))
    df = df.drop(columns = [3])
    monthly_train.append(df)
monthly_train = np.dstack(monthly_train)
s2_train_full = np.dstack(s2_train_full)

monthly_test = []
s2_test_full = []
for b in tqdm(bands):
    df = s2_test.filter(regex=b)
    s2_test_full.append(df)
    df.columns = [pd.to_datetime(x.split("_")[-1]).date().month  for x in df.columns]
    df = df.groupby(level=0, axis=1).apply(lambda x: x.apply(np.mean, axis=1))
    df = df.drop(columns = [3])
    monthly_test.append(df)
monthly_test = np.dstack(monthly_test)
s2_test_full = np.dstack(s2_test_full)

100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [03:05<00:00, 18.51s/it]
100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:20<00:00,  2.05s/it]


In [165]:
# Ensure correct order of S1 data
s1_train = np.dstack([s1_train.filter(regex='VH_SAR'), s1_train.filter(regex='VV_SAR'), 
                      s1_train.filter(regex='VH_COH'), s1_train.filter(regex='VV_COH')])
s1_test = np.dstack([s1_test.filter(regex='VH_SAR'), s1_test.filter(regex='VV_SAR'), 
                      s1_test.filter(regex='VH_COH'), s1_test.filter(regex='VV_COH')])

## Stack S1 and S2  together
The final shape of the data is (n_samples, n_dates, n_bands).
n_dates is equal to the total number of monthes, e.g. 7
n_bands is equal to 14 with corresponds to 10 bands of Sentinel-2 and the 4 features of Sentinel-1 (VV/VH backscatter and coherence)

In [168]:
X_train = np.dstack((monthly_train, s1_train))
# X_train = s2_train_full.copy()
X_test = np.dstack((monthly_test, s1_test))
# X_test = s2_test_full.copy()
y_train = pd.get_dummies(s2_train.label).values
y_test = pd.get_dummies(s2_test.label).values
ids_train = s2_train.iloc[:,1].values
ids_test = s2_test.iloc[:,1].values
X_train, X_val, y_train, y_val, ids_train, ids_val = train_test_split(
            X_train, y_train, ids_train, test_size=0.1, random_state=420)
print(X_train.shape, X_val.shape)
print(y_train.shape, y_val.shape)

(30002, 7, 14) (3334, 7, 14)
(30002, 2) (3334, 2)


## Train a custom lightweight model on the Satellite data

In [169]:

lr = 0.001
sat_model_filename = "lstm_att"
if os.path.exists('models'):
    os.mkdir('models')
model_path = "models/{}.h5".format(sat_model_filename)

checkpoint = ModelCheckpoint(model_path, monitor='val_loss', 
                             verbose=1, save_best_only=True, save_weights_only=False, mode='auto')  

early = EarlyStopping(monitor='val_acc', min_delta=0.001, patience=10, verbose=0, mode='auto')
reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=4, min_lr=0.0001)

dropout = 0.5
layers = [256, 128]
batch_size = 256
n_steps = X_train.shape[1]
mdl = model_train(X_train, y_train, X_val, y_val, file_path, n_features = 14, n_steps = n_steps,
                  scale = scale, batch_size = batch_size, n_classes = 2,class_weights = None,
                  layers = layers, dropout = dropout, lr = lr)
y_pred = np.argmax(mdl.predict(X_test), axis = 1)
print(classification_report(np.argmax(y_test, axis = 1), y_pred, digits = 4))

2022-06-13 12:36:39.284401: I tensorflow/core/profiler/lib/profiler_session.cc:164] Profiler session started.
2022-06-13 12:36:39.285281: I tensorflow/core/profiler/lib/profiler_session.cc:164] Profiler session started.


Epoch 1/100
Epoch 00001: val_loss improved from inf to 0.38656, saving model to model.h5
Epoch 2/100
Epoch 00002: val_loss did not improve from 0.38656
Epoch 3/100
Epoch 00003: val_loss improved from 0.38656 to 0.31749, saving model to model.h5
Epoch 4/100
Epoch 00004: val_loss improved from 0.31749 to 0.30178, saving model to model.h5
Epoch 5/100
Epoch 00005: val_loss improved from 0.30178 to 0.28662, saving model to model.h5
Epoch 6/100
Epoch 00006: val_loss improved from 0.28662 to 0.27633, saving model to model.h5
Epoch 7/100
Epoch 00007: val_loss improved from 0.27633 to 0.25810, saving model to model.h5
Epoch 8/100
Epoch 00008: val_loss did not improve from 0.25810
Epoch 9/100
Epoch 00009: val_loss did not improve from 0.25810
Epoch 10/100
Epoch 00010: val_loss did not improve from 0.25810
Epoch 11/100
Epoch 00011: val_loss improved from 0.25810 to 0.24087, saving model to model.h5
Epoch 12/100
Epoch 00012: val_loss did not improve from 0.24087
Epoch 13/100
Epoch 00013: val_loss 

Epoch 26/100
Epoch 00026: val_loss did not improve from 0.20898
Epoch 27/100
Epoch 00027: val_loss improved from 0.20898 to 0.20436, saving model to model.h5
Epoch 28/100
Epoch 00028: val_loss did not improve from 0.20436
Epoch 29/100
Epoch 00029: val_loss improved from 0.20436 to 0.20382, saving model to model.h5
Epoch 30/100
Epoch 00030: val_loss did not improve from 0.20382
Epoch 31/100
Epoch 00031: val_loss improved from 0.20382 to 0.20211, saving model to model.h5
Epoch 32/100
Epoch 00032: val_loss did not improve from 0.20211
Epoch 33/100
Epoch 00033: val_loss improved from 0.20211 to 0.19640, saving model to model.h5
Epoch 34/100
Epoch 00034: val_loss improved from 0.19640 to 0.19535, saving model to model.h5
Epoch 35/100
Epoch 00035: val_loss improved from 0.19535 to 0.19278, saving model to model.h5
Epoch 36/100
Epoch 00036: val_loss did not improve from 0.19278
Epoch 37/100
Epoch 00037: val_loss did not improve from 0.19278
Epoch 38/100
Epoch 00038: val_loss did not improve f

Epoch 51/100
Epoch 00051: val_loss did not improve from 0.17853
Epoch 52/100
Epoch 00052: val_loss did not improve from 0.17853
Epoch 53/100
Epoch 00053: val_loss improved from 0.17853 to 0.17837, saving model to model.h5
Epoch 54/100
Epoch 00054: val_loss did not improve from 0.17837
Epoch 55/100
Epoch 00055: val_loss did not improve from 0.17837
Epoch 56/100
Epoch 00056: val_loss did not improve from 0.17837
Epoch 57/100
Epoch 00057: val_loss did not improve from 0.17837
Epoch 58/100
Epoch 00058: val_loss did not improve from 0.17837
Epoch 59/100
Epoch 00059: val_loss did not improve from 0.17837
Epoch 60/100
Epoch 00060: val_loss did not improve from 0.17837

Epoch 00060: ReduceLROnPlateau reducing learning rate to 0.00010000000474974513.
Epoch 61/100
Epoch 00061: val_loss did not improve from 0.17837
Epoch 62/100
Epoch 00062: val_loss did not improve from 0.17837
Epoch 63/100
Epoch 00063: val_loss did not improve from 0.17837
Epoch 64/100
Epoch 00064: val_loss did not improve from 

## Create generators to train a CNN on the street-level images

In [170]:

IMG_SIZE = 150
BATCH_SIZE = 64
NUM_CLASSES = 2
train_dir = "data/streetLevel_patches/train/"
test_dir = "data/streetLevel_patches/test/"

train_datagen = ImageDataGenerator(
        rescale=1./255,
        shear_range=0.2,
        zoom_range=0.2,
        horizontal_flip=True)
train_generator = train_datagen.flow_from_directory(
        train_dir,
        target_size=(IMG_SIZE,IMG_SIZE),
        batch_size=BATCH_SIZE,
        color_mode='rgb',
        shuffle=True,
        class_mode='binary',
        subset="training",
        classes=['Grassland', 'Non_Grassland'])

test_datagen = ImageDataGenerator(
    rescale=1./255,
    validation_split=0.5
)

val_generator = test_datagen.flow_from_directory(
    test_dir, 
    target_size=(IMG_SIZE,IMG_SIZE),
    batch_size=BATCH_SIZE,
    color_mode='rgb',
    shuffle=True,
    class_mode='binary',
    subset="training",
    classes=['Grassland', 'Non_Grassland']
)

test_generator = test_datagen.flow_from_directory(
    test_dir, 
    target_size=(IMG_SIZE,IMG_SIZE),
    batch_size=BATCH_SIZE,
    color_mode='rgb',
    shuffle=False,
    class_mode='binary',
    subset="validation",
    classes=['Grassland', 'Non_Grassland']
)


Found 2904 images belonging to 2 classes.
Found 364 images belonging to 2 classes.
Found 363 images belonging to 2 classes.


## Calclulate class weights

In [171]:
total_train_grass = len(os.listdir(train_dir + 'Grassland'))
total_train_nongrass = len(os.listdir(train_dir + 'Non_Grassland'))
class_weights = class_weight.compute_class_weight('balanced',classes = [0,1], 
                                                  y=[0]*total_train_grass + [1]*total_train_nongrass)
class_weights = {i : class_weights[i] for i in range(2)}

## Fine-tune a pre-trained InceptionV3 network 

In [172]:

model_filename = "inceptionv3"

model_path = "models/{}.h5".format(model_filename)
checkpoint = ModelCheckpoint(model_path, monitor='val_loss', 
                             verbose=1, save_best_only=True, save_weights_only=False, mode='auto')  

early = EarlyStopping(monitor='val_acc', min_delta=0.001, patience=10, verbose=0, mode='auto')
reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=4, min_lr=0.0001)

EPOCHS = 100
base_model = InceptionV3(input_shape = (150, 150, 3), include_top = False, weights = 'imagenet')

for layer in base_model.layers:
    layer.trainable = False

x = tf.keras.layers.Flatten()(base_model.output)
x = tf.keras.layers.Dense(1024, activation='relu')(x)
x = tf.keras.layers.Dropout(0.5)(x)
x = tf.keras.layers.Dense(512, activation='relu')(x)
x = tf.keras.layers.Dropout(0.5)(x)
x = tf.keras.layers.Dense(1, activation='sigmoid')(x)
model = tf.keras.models.Model(base_model.input, x)

model.compile(optimizer = tf.keras.optimizers.SGD(lr=0.01, momentum=0.9), 
              loss = 'binary_crossentropy',metrics = ['acc', f1_m, recall_m, precision_m])
inceptv3_hist = model.fit(train_generator,
    epochs = EPOCHS, 
    steps_per_epoch = train_generator.samples//BATCH_SIZE, 
    validation_data = val_generator, 
    validation_steps = val_generator.samples//BATCH_SIZE,  
    verbose = 1, 
    callbacks = [checkpoint, early, reduce_lr], 
    class_weight = class_weights,
)

Epoch 1/100
Epoch 00001: val_loss improved from inf to 0.28979, saving model to /nfs/data2/IVMSP_callisto/Images/binary/models/inceptionv3.h5
Epoch 2/100
Epoch 00002: val_loss improved from 0.28979 to 0.28946, saving model to /nfs/data2/IVMSP_callisto/Images/binary/models/inceptionv3.h5
Epoch 3/100
Epoch 00003: val_loss did not improve from 0.28946
Epoch 4/100
Epoch 00004: val_loss did not improve from 0.28946
Epoch 5/100
Epoch 00005: val_loss did not improve from 0.28946
Epoch 6/100
Epoch 00006: val_loss did not improve from 0.28946
Epoch 7/100
Epoch 00007: val_loss did not improve from 0.28946
Epoch 8/100
Epoch 00008: val_loss did not improve from 0.28946
Epoch 9/100
Epoch 00009: val_loss did not improve from 0.28946
Epoch 10/100
Epoch 00010: val_loss did not improve from 0.28946
Epoch 11/100
Epoch 00011: val_loss did not improve from 0.28946
Epoch 12/100
Epoch 00012: val_loss did not improve from 0.28946
Epoch 13/100
Epoch 00013: val_loss improved from 0.28946 to 0.27140, saving mod

In [269]:
# New data generators for prediction of all images

train_datagen_p = ImageDataGenerator(
        rescale=1./255)
train_datagen_p = train_datagen_p.flow_from_directory(
        train_dir,
        target_size=(IMG_SIZE,IMG_SIZE),
        batch_size=1,
        color_mode='rgb',
        shuffle=False,
        class_mode='binary',
        subset="training",
        classes=['Grassland', 'Maize'])

test_datagen_p = ImageDataGenerator(
    rescale=1./255,
)

test_datagen_p = test_datagen_p.flow_from_directory(
    test_dir, 
    target_size=(IMG_SIZE,IMG_SIZE),
    batch_size=1,
    color_mode='rgb',
    shuffle=False,
    class_mode='binary',
    subset="training",
    classes=['Grassland', 'Maize']
)

filenames_train = train_datagen_p.filenames
nb_samples_train = len(filenames_train)
filenames_test = test_datagen_p.filenames
nb_samples_test = len(filenames_test)

predict_train = model.predict_generator(train_datagen_p,steps = nb_samples_train)
predict_test = model.predict_generator(test_datagen_p,steps = nb_samples_test)

Found 7848 images belonging to 2 classes.
Found 2254 images belonging to 2 classes.


## Combine predictions of satellite data and street-level images

In [187]:
mapillary = pd.read_csv('parcel_annotations.csv ')

In [205]:
# Here we get the result from the ground level for all instances
probs = list(predict_train.flatten()) + list(predict_test.flatten())
names = [int(x.split("/")[1].split("_")[0]) for x in filenames_train + filenames_test]
direction = [x.split("/")[1].split("_")[1].split('.')[0] for x in filenames_train + filenames_test]
results = pd.DataFrame([names, direction, probs]).T
results.columns = ['image_id', 'direction', 'inception_prob']

In [207]:
# Then the results are merged with the rest of the information we have for each image
# in order to link it with the corresponing parcel
results = pd.merge(mapillary, results, on = ['image_id', 'direction'], how = 'inner')

In [247]:
# We now exctract the prediction for all instaces from the space level
X_all = np.vstack([X_train, X_val, X_test])
ids_all = list(ids_train) + list(ids_val) + list(ids_test)
predictions_all = mdl.predict(X_all)
sat_preds = pd.DataFrame([ids_all, predictions_all[:,0]]).T
sat_preds.columns = ['id', 'sat_prob']

In [260]:
# and finally we merge in a single DataFrame the two different predictions
space2ground = pd.merge(results, sat_preds, on = ['id'], how = 'inner')

In [263]:
space2ground['ground_pred'] = 'Grassland'
space2ground['space_pred'] = 'Grassland'
space2ground.loc[space2ground.inception_prob > 0.5, 'ground_pred'] = 'Non_Grassland'
space2ground.loc[space2ground.sat_prob > 0.5, 'space_pred'] = 'Non_Grassland'
space2ground.loc[space2ground.label != 'Grassland'] = 'Non_Grassland'

In [267]:
space2ground[space2ground.space_pred != space2ground.label]

Unnamed: 0,id,label,image_date,image_id,direction,inception_prob,sat_prob,ground_pred,space_pred
803,18649,Grassland,2017-05-07,5458128290896369,right,0.133706,0.624789,Grassland,Non_Grassland
804,18649,Grassland,2017-05-07,3696418997152258,right,0.302674,0.624789,Grassland,Non_Grassland
3198,17328,Grassland,2017-07-30,2826001634381413,left,0.579689,0.822648,Non_Grassland,Non_Grassland
3199,17328,Grassland,2017-07-30,500336977985307,left,0.573389,0.822648,Non_Grassland,Non_Grassland
3250,8605,Grassland,2017-05-17,4275714419175593,left,0.439639,0.587774,Grassland,Non_Grassland
3251,8605,Grassland,2017-04-02,212647310321771,right,0.524626,0.587774,Non_Grassland,Non_Grassland
3252,8605,Grassland,2017-04-02,283619173500861,right,0.518648,0.587774,Non_Grassland,Non_Grassland
3253,8605,Grassland,2017-04-02,500827311102460,right,0.854017,0.587774,Non_Grassland,Non_Grassland
4719,3938,Grassland,2017-10-10,215088676792486,left,0.999936,0.991723,Non_Grassland,Non_Grassland
4720,3938,Grassland,2017-10-10,2001635236653224,right,0.99993,0.991723,Non_Grassland,Non_Grassland
