In [1]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load in 
from itertools import permutations, combinations, cycle
import os 
from random import sample, shuffle 
import gc 

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import seaborn as sns
import cv2 as cv
import sklearn.metrics
import re
from efficientnet import tfkeras as efn 
from pathlib import Path
import rasterio 
from rasterio import features

from tensorflow.keras import Sequential
from tensorflow.keras.layers import Dense, Flatten, Input
from tensorflow.keras.metrics import Recall
from tensorflow.keras.utils import plot_model
from tensorflow.keras.models import Model
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping, EarlyStopping, CSVLogger, TensorBoard, ReduceLROnPlateau
import tensorflow as tf 
import segmentation_models as sm

print(tf.__version__)
print(sm.__version__)

from tensorflow import keras
from tqdm import tqdm 

import matplotlib.pyplot as plt

from shapely.geometry import Polygon
from shapely import wkt

from typing import List, Tuple
from slacker import Slacker
slack = Slacker('xoxp-406617419703-407736556887-975525827328-1c7c24b94d95408268b84ada0b16d937')


TRAIN_COMMON_PATH = Path('train/AOI_11_Rotterdam')
TRAIN_SAR_PATH = TRAIN_COMMON_PATH/'SAR-Intensity'
TRAIN_GT_PATH = TRAIN_COMMON_PATH/'train_ground_truth'
TEST_SAR_PATH = None #TODO
TEST_GT_PATH = None #TODO

FILENAME_PATTERN = re.compile('SN6_Train_AOI_11_Rotterdam_SAR-Intensity_(\d*_\d*_tile_\d*).tif')


Segmentation Models: using `tf.keras` framework.
2.1.0
1.0.1


In [2]:
def preprocess_to_display(x, n_channels, normalize=True):
    preprocessed = x.copy()
    if normalize:
        preprocessed = preprocessed/preprocessed.max()
    if n_channels > 1:
        preprocessed = np.moveaxis(preprocessed[:3], 0, -1)
    else: 
        preprocessed = preprocessed[0]
    return preprocessed

def get_sar_imagery_statistics(path):
    array = get_array_from_tiff(path)
    means = array.mean(axis=(1,2))
    stds = array.std(axis=(1,2))
    return means, stds

def get_array_from_tiff(path):
    with rasterio.open(path) as src: 
        im = src.read()
        tsm = src.transform
    return im, tsm

def get_id_from_filename(filename):
    return FILENAME_PATTERN.match(filename)[1]

def get_polygons_in_image(rstr_filename):
    image_id = get_id_from_filename(rstr_filename)
    return buildings.loc[buildings['ImageId']==image_id,'PolygonWKT_Pix']

In [3]:
buildings = pd.read_csv(TRAIN_COMMON_PATH/'SummaryData/SN6_Train_AOI_11_Rotterdam_Buildings.csv',engine='python')
buildings.head()

Unnamed: 0,ImageId,TileBuildingId,PolygonWKT_Pix,Mean_Building_Height,Median_Building_Height,StdDev_Building_Height
0,20190822070610_20190822070846_tile_3721,0,"POLYGON ((299.6396801332012 349.3765436094254,...",9.962397,9.96,0.006495
1,20190822070610_20190822070846_tile_3721,1,"POLYGON ((115.5360228798818 339.845588516444, ...",2.81,2.81,0.0
2,20190822070610_20190822070846_tile_3721,2,"POLYGON ((768.9086768317502 329.8960437048227,...",14.42,14.42,0.0
3,20190822070610_20190822070846_tile_3721,3,"POLYGON ((755.8174585120287 330.0953964963555,...",14.42,14.42,0.0
4,20190822070610_20190822070846_tile_3721,4,"POLYGON ((392.8786215754226 335.6222213506699,...",8.59,8.59,0.0


In [4]:
!ls train/AOI_11_Rotterdam

PAN	PS-RGBNIR  SAR-Intensity      SummaryData	 train_ground_truth
PS-RGB	RGBNIR	   SAR-Intensity.zip  geojson_buildings


In [5]:
TRAIN_FRAC = 0.8
N_TRAIN = 3401
BATCH_SIZE = 32
STEP_PER_EPOCH = int(TRAIN_FRAC*N_TRAIN)//BATCH_SIZE+1
VAL_STEPS_PER_EPOCH = int((1-TRAIN_FRAC)*N_TRAIN)//BATCH_SIZE+1
N_EPOCHS = 40
LOG_DIR = 'logs/'
MODELS_DIR = "models/"
LOAD_MODEL = True
DEBUG = False
if DEBUG:
    STEP_PER_EPOCH= 10
    VAL_STEPS_PER_EPOCH= 10

In [6]:
TRAINING_DATASET_SIZE=200840
#TRAINING_DATASET_SIZE =200
HEIGHT = 128
WIDTH = 128

class SpaceNetPipeline:
    def __init__(self, shuffle=False, batch_type='multiple_images', batch_size=BATCH_SIZE, train_val_frac = 0.8, verbose=False):
        
        #assert not (mode=='test' and shuffle==True), 'Error: in test mode, the values should not be shuffled.'

        self.batch_size = batch_size 
        self.image_path = {'fit':TRAIN_SAR_PATH,
                           'test':TEST_SAR_PATH}
        self.gt_path = TRAIN_GT_PATH
        
        self.orientations = pd.read_csv('train/AOI_11_Rotterdam/SummaryData/SAR_orientations.txt',sep=' ', header=None)
        self.orientations.columns = ["image_timestamps", "orientation"]
        self.verbose = verbose
        self.generators = {}
        self.data_ids = {}
        self.transforms = {}
        self.results = {}
        
        fit_image_files = os.listdir(self.image_path['fit'])
        if shuffle == True: 
            shuffle(fit_image_files)
        n_train = int(train_val_frac*len(fit_image_files))
        self.data_ids["train"] = fit_image_files[:n_train]
        self.data_ids["validation"] = fit_image_files[n_train:]
        if batch_type == 'full_image':
            self.generators["train"] = cycle((x for x in fit_image_files[:n_train]))
            self.generators['validation'] = cycle((x for x in fit_image_files[n_train:]))
        elif batch_type == 'multiple_images':    
            self.generators["train"] = cycle((fit_image_files[i:i+batch_size] for i in range(0, len(fit_image_files[:n_train]), batch_size))) 
            self.generators['validation'] = cycle((fit_image_files[i:i+batch_size] for i in range(0, len(fit_image_files[:n_train]), batch_size))) 

        self.data_ids["test"] = None
        self.generators["test"] = None
        
    def print_if_verbose(self, *args, status='always'):
        if self.verbose and status=='always':
            print(*args)
        if self.verbose=='debug' and status=='debug':
            print(*args)

    def normalize(self, batch, normalization_type=None):
        if normalization_type is None:
            normalized_batch=batch 
        elif normalization_type=='divide':
            normalized_batch=batch/255
        return normalized_batch

    def get_xy_image(self, im_id, mode='train'):
        print("1.1")
        x_image, tsm = get_array_from_tiff(self.image_path['fit']/im_id)
        if mode in ('train','validation'):
            print("1.2")
            y_image, tsm = get_array_from_tiff(self.gt_path/im_id)
            y_image = y_image[0]
        else:
            print("1.3")
            y_image = None
        return x_image, y_image, tsm

    def process_x_batch_list(self, x_batch_list):
        x_resized = np.ndarray(shape=(len(x_batch_list),HEIGHT,WIDTH,3))
        for i in range(len(x_batch_list)):
            for j in range(3):
                x_resized[i,...,j] = cv.resize(x_batch_list[i][j],dsize=(WIDTH,HEIGHT))
        x_batch_normalized = self.normalize(x_resized)
        del x_batch_list
        del x_resized
        return x_batch_normalized
    
    def process_y_batch_list(self, y_batch_list):
        y_resized = np.ndarray(shape=(len(y_batch_list),HEIGHT,WIDTH))
        for i in range(len(y_batch_list)):
            y_resized[i] = cv.resize(y_batch_list[i],dsize=(WIDTH,HEIGHT))
        y_fixed_orientation = y_resized
        #y_fixed_orientation = np.flip(np.rot90(y_resized, k=4, axes=(1,2)))
        y_boolean = np.uint8(y_fixed_orientation > 0)
        y_expanded = np.expand_dims(y_boolean,axis=-1)
        del y_batch_list
        del y_boolean
        del y_fixed_orientation
        del y_resized
        return y_expanded
        
    def flow(self, mode: str ="train", with_ground_truth = True, height: int =137,width: int =236):
        '''Run the generator '''
        c = 0
        self.transforms[mode] = []
        while True:
            image_ids_to_get = next(self.generators[mode])
            x_batch_list = []
            y_batch_list = []
            
            for im_id in image_ids_to_get:
                image_orientation = self.orientations.loc[self.orientations["image_timestamps"]==re.match('.*(\d{14}_\d{14})',im_id)[1],"orientation"]
                self.print_if_verbose("\n INFO - image_ids_to_get:", im_id,"\n",status='always')
                self.print_if_verbose("\n INFO - image_orientation:",image_orientation)

                x_image, y_image, tsm = self.get_xy_image(im_id)
                self.transforms[mode].append(tsm)
                x_batch_list.append(x_image)

                if mode in ('train','validation'):
                    self.print_if_verbose("\n INFO - current mode ", mode, status="debug")
                    y_batch_list.append(y_image)
            print("Processing x list")
            x_batch_processed = self.process_x_batch_list(x_batch_list)
            
            if mode in ('train','validation'):
                print("Processing y list")
                y_batch_processed = self.process_y_batch_list(y_batch_list)
                #self.print_if_verbose(f"\n INFO - Yielding train data n°{c}/{self.batch_size/self.files_size['train']}")
                yield x_batch_processed, y_batch_processed
            else:
                c+=1
                if c % 10==0:
                    print(f"\n INFO - Step n°{c} ")
                #self.print_if_verbose(f"\n INFO - Yielding {mode} data n°{c}/{self.batch_size/self.files_size[mode]}")
                yield x_batch_processed
            gc.collect()
    
    def get_callbacks(self):
        checkpoint = ModelCheckpoint(MODELS_DIR+"model_weights-{epoch:02d}-{val_loss:.4f}--{val_iou_score:.4f}.hdf5", 
                                     monitor='val_loss', 
                                     verbose=1, 
                                     save_best_only=False, 
                                     mode='min')

        early_stopping = EarlyStopping(monitor='val_loss',
                                      min_delta=0,
                                      patience=5,
                                      verbose=0, mode='auto')
        csv_logger = CSVLogger(LOG_DIR+'training.log')

        reduce_lr_on_plateau = ReduceLROnPlateau(monitor='val_loss', factor=0.2,
                                      patience=3, min_lr=0.0001)
        
        self.callbacks = [checkpoint, early_stopping, csv_logger, reduce_lr_on_plateau]
        #self.callbacks = []
    
    def get_model(self,weights_path=None):
        self.model = sm.Unet()
        self.model.compile(
            'Adam',
            loss='binary_crossentropy',
            metrics=[sm.metrics.iou_score],
        )
        if weights_path is not None:
            self.model.load_weights(weights_path)
            
    def fit(self):
        if self.model is None: 
            raise ValueError("Model is not defined yet.")
        self.print_if_verbose("\n INFO - Training...")
        self.model.fit_generator(
           self.flow(mode="train"),
           epochs=N_EPOCHS,
           steps_per_epoch=STEP_PER_EPOCH,
           validation_steps=VAL_STEPS_PER_EPOCH,
           validation_data=self.flow(mode='validation'),
           callbacks = self.callbacks
    )
        
    def get_polygons_from_predictions(self, mode, threshold = 0.5):
        polygons = []
        print(len(self.transforms[mode]))
        for i,pred in tqdm(enumerate(self.raw_predictions)):
            print(i)
            #boolean_image = np.uint8(pred[...,0] > threshold)
            boolean_image = pred[...,0]
            pols = features.shapes(boolean_image, transform=self.transforms[mode][i])
            #polygons.append([pol[0]['coordinates'][0] for pol in pols])
            polygons.append([Polygon(pol[0]['coordinates'][0]) for pol in pols])
        self.result_polygons = [x.wkt for sublist in polygons for x in sublist]
    
    def predict(self, mode='train'):
        if self.model is None: 
            raise ValueError("Model is not defined yet.")
        
        print("\n INFO - Predictions...")
        self.raw_predictions = self.model.predict(
            self.flow(mode=mode,with_ground_truth=True),steps=100
        )
        print("predictions.shape", self.raw_predictions.shape)
        print("\n INFO - Vectorizations...")
        self.get_polygons_from_predictions(mode)
        self.get_confidende()
    
    def run_pipeline(self, fit=True, predict_train = True, predict_validation = True, predict_test = False):
        self.get_model(weights_path='models/model_weights-01-0.1455.hdf5')
        if fit:
            self.get_callbacks()
            self.fit()
        if predict_train:
            self.print_if_verbose("\n INFO - Prediction on the train set")        
            self.results['train'] = self.predict(mode='train')
        if predict_validation:
            self.print_if_verbose("\n INFO - Prediction on the validation set")        
            self.results['validation'] = self.predict(mode='validation')
        if predict_test:
            self.print_if_verbose("\n INFO - Prediction on the test set")        
            self.results['test'] = self.predict(mode='test')


In [None]:
sn_pipeline = SpaceNetPipeline(verbose=False, batch_size=8)
sn_pipeline.run_pipeline(fit=False)


 INFO - Predictions...
1.1
1.2
1.1
1.2
1.1
1.2
1.1
1.2
1.1
1.2
1.1
1.2
1.1
1.2
1.1
1.2
Processing x list
Processing y list
1.1
1.2
1.1
1.2
1.1
1.2
1.1
1.2
1.1
1.2
1.1
1.2
1.1
1.2
1.1
1.2
Processing x list
Processing y list
1.1
1.2
1.1
1.2
1.1
1.2
1.1
1.2
1.1
1.2
1.1
1.2
1.1
1.2
1.1
1.2
Processing x list
Processing y list
1.1
1.2
1.1
1.2
1.1
1.2
1.1
1.2
1.1
1.2
1.1
1.2
1.1
1.2
1.1
1.2
Processing x list
Processing y list
1.1
1.2
1.1
1.2
1.1
1.2
1.1
1.2
1.1
1.2
1.1
1.2
1.1
1.2
1.1
1.2
Processing x list
Processing y list
1.1
1.2
1.1
1.2
1.1
1.2
1.1
1.2
1.1
1.2
1.1
1.2
1.1
1.2
1.1
1.2
Processing x list
Processing y list
1.1
1.2
1.1
1.2
1.1
1.2
1.1
1.2
1.1
1.2
1.1
1.2
1.1
1.2
1.1
1.2
Processing x list
Processing y list
1.1
1.2
1.1
1.2
1.1
1.2
1.1
1.2
1.1
1.2
1.1
1.2
1.1
1.2
1.1
1.2
Processing x list
Processing y list
1.1
1.2
1.1
1.2
1.1
1.2
1.1
1.2
1.1
1.2
1.1
1.2
1.1
1.2
1.1
1.2
Processing x list
Processing y list
1.1
1.2
1.1
1.2
1.1
1.2
1.1
1.2
1.1
1.2
1.1
1.2
1.1
1.2
1.1
1.2
Processing x

1.2
1.1
1.2
1.1
1.2
1.1
1.2
1.1
1.2
1.1
1.2
1.1
1.2
1.1
1.2
Processing x list
Processing y list
1.1
1.2
1.1
1.2
1.1
1.2
1.1
1.2
1.1
1.2
1.1
1.2
1.1
1.2
1.1
1.2
Processing x list
Processing y list
1.1
1.2
1.1
1.2
1.1
1.2
1.1
1.2
1.1
1.2
1.1
1.2
1.1
1.2
1.1
1.2
Processing x list
Processing y list
1.1
1.2
1.1
1.2
1.1
1.2
1.1
1.2
1.1
1.2
1.1
1.2
1.1
1.2
1.1
1.2
Processing x list
Processing y list
1.1
1.2
1.1
1.2
1.1
1.2
1.1
1.2
1.1
1.2
1.1
1.2
1.1
1.2
1.1
1.2
Processing x list
Processing y list
1.1
1.2
1.1
1.2
1.1
1.2
1.1
1.2
1.1
1.2
1.1
1.2
1.1
1.2
1.1
1.2
Processing x list
Processing y list
1.1
1.2
1.1
1.2
1.1
1.2
1.1
1.2
1.1
1.2
1.1
1.2
1.1
1.2
1.1
1.2
Processing x list
Processing y list
1.1
1.2
1.1
1.2
1.1
1.2
1.1
1.2
1.1
1.2
1.1
1.2
1.1
1.2
1.1
1.2
Processing x list
Processing y list
1.1
1.2
1.1
1.2
1.1
1.2
1.1
1.2
1.1
1.2
1.1
1.2
1.1
1.2
1.1
1.2
Processing x list
Processing y list
1.1
1.2
1.1
1.2
1.1
1.2
1.1
1.2
1.1
1.2
1.1
1.2
1.1
1.2
1.1
1.2
Processing x list
Processing y list
1.1


0it [00:00, ?it/s]

Processing x list
Processing y list
predictions.shape (800, 128, 128, 1)

 INFO - Vectorizations...
808
0


1it [00:01,  1.04s/it]

1


2it [00:01,  1.01s/it]

2


3it [00:03,  1.05s/it]

3


4it [00:04,  1.09s/it]

4


5it [00:05,  1.06s/it]

5


6it [00:06,  1.02it/s]

6


7it [00:06,  1.14it/s]

7


8it [00:07,  1.20it/s]

8


9it [00:08,  1.26it/s]

9


10it [00:08,  1.29it/s]

10


11it [00:09,  1.36it/s]

11


12it [00:10,  1.40it/s]

12


13it [00:10,  1.40it/s]

13


14it [00:11,  1.40it/s]

14


15it [00:12,  1.47it/s]

15


16it [00:13,  1.39it/s]

16


17it [00:13,  1.33it/s]

17


18it [00:14,  1.28it/s]

18


19it [00:15,  1.31it/s]

19


20it [00:16,  1.33it/s]

20


21it [00:16,  1.31it/s]

21


22it [00:17,  1.32it/s]

22


23it [00:18,  1.32it/s]

23


24it [00:19,  1.36it/s]

24


25it [00:19,  1.37it/s]

25


26it [00:20,  1.38it/s]

26


27it [00:21,  1.44it/s]

27


28it [00:21,  1.50it/s]

28


29it [00:22,  1.52it/s]

29


30it [00:22,  1.85it/s]

30


31it [00:23,  2.00it/s]

31


32it [00:23,  2.34it/s]

32


33it [00:23,  2.43it/s]

33


34it [00:23,  2.73it/s]

34


35it [00:24,  2.86it/s]

35


36it [00:24,  2.80it/s]

36


37it [00:24,  3.04it/s]

37


38it [00:25,  2.97it/s]

38


39it [00:25,  3.20it/s]

39


40it [00:25,  3.33it/s]

40


41it [00:26,  3.03it/s]

41


42it [00:26,  3.24it/s]

42


43it [00:26,  2.97it/s]

43


44it [00:27,  3.05it/s]

44


45it [00:27,  2.92it/s]

45


46it [00:27,  2.90it/s]

46


47it [00:28,  3.05it/s]

47


48it [00:28,  2.48it/s]

48


49it [00:29,  2.70it/s]

49


50it [00:29,  2.64it/s]

50


51it [00:29,  2.86it/s]

51


52it [00:30,  2.58it/s]

52


53it [00:30,  2.04it/s]

53


54it [00:31,  2.23it/s]

54


55it [00:31,  2.50it/s]

55


56it [00:32,  2.41it/s]

56


57it [00:32,  2.64it/s]

57


58it [00:32,  2.30it/s]

58


59it [00:33,  2.35it/s]

59


60it [00:33,  2.29it/s]

60


61it [00:34,  2.52it/s]

61


62it [00:34,  2.21it/s]

62


63it [00:34,  2.46it/s]

63


64it [00:35,  2.64it/s]

64


65it [00:35,  2.51it/s]

65


66it [00:36,  2.28it/s]

66


67it [00:36,  2.38it/s]

67


68it [00:37,  2.24it/s]

68


69it [00:37,  2.26it/s]

69


70it [00:38,  2.04it/s]

70


71it [00:38,  2.14it/s]

71


72it [00:39,  2.15it/s]

72


73it [00:39,  2.24it/s]

73


74it [00:39,  2.17it/s]

74


75it [00:40,  1.95it/s]

75


76it [00:41,  1.73it/s]

76


77it [00:43,  1.01s/it]

77


78it [00:43,  1.19it/s]

78


79it [00:44,  1.40it/s]

79


80it [00:44,  1.61it/s]

80


81it [00:44,  1.80it/s]

81


82it [00:45,  1.73it/s]

82


83it [00:46,  1.75it/s]

83


84it [00:46,  1.72it/s]

84


85it [00:47,  1.79it/s]

85


86it [00:47,  1.76it/s]

86


87it [00:48,  1.38it/s]

87


88it [00:49,  1.31it/s]

88


89it [00:59,  3.52s/it]

89


In [None]:
def safe_log(x):
    y = x.copy()
    y[y==0]=1
    return 10*np.log10(y)

In [None]:
#img = next(fit_generator.flow(mode='validation'))

fig, ax = plt.subplots(2,3, figsize=(15,9))
pred = model.predict(img[0])
ax = ax.ravel()
for i in range(3):
    ax[i].imshow(img[0][0,...,i],cmap='gist_gray',vmin=15,vmax=60)
    ax[3+i].imshow(safe_log(img[0][0,...,i]),cmap='gist_gray',vmin=11,vmax=18)


1) Méthode 1: 
- Lors de la pred, on sauve les rasters
- On vectorise après raster par raster 

2) Méthode 2: 
- Pendant le training, on garde les transforme
- Après la pred  
- 