In [1]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load in 
from itertools import permutations, combinations, cycle
import os 
from random import sample, shuffle 

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import seaborn as sns
import cv2 as cv
import sklearn.metrics
import re
from efficientnet import tfkeras as efn 
from pathlib import Path
import rasterio 


from tensorflow.keras import Sequential
from tensorflow.keras.layers import Dense, Flatten, Input
from tensorflow.keras.metrics import Recall
from tensorflow.keras.utils import plot_model
from tensorflow.keras.models import Model
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping, EarlyStopping, CSVLogger, TensorBoard, ReduceLROnPlateau
import segmentation_models as sm
from tensorflow import keras

import matplotlib.pyplot as plt

from shapely.geometry import Polygon
from shapely import wkt

from typing import List, Tuple
from slacker import Slacker
slack = Slacker('xoxp-406617419703-407736556887-975525827328-1c7c24b94d95408268b84ada0b16d937')

Segmentation Models: using `tf.keras` framework.


In [2]:
import tensorflow as tf 
print(tf.__version__)
print(sm.__version__)

2.1.0
1.0.1


In [3]:
TRAIN_COMMON_PATH = Path('train/AOI_11_Rotterdam')
TRAIN_SAR_PATH = TRAIN_COMMON_PATH/'SAR-Intensity'
TRAIN_GT_PATH = TRAIN_COMMON_PATH/'train_ground_truth'
TEST_SAR_PATH = None #TODO
TEST_GT_PATH = None #TODO

FILENAME_PATTERN = re.compile('SN6_Train_AOI_11_Rotterdam_SAR-Intensity_(\d*_\d*_tile_\d*).tif')


In [4]:
def preprocess_to_display(x, n_channels, normalize=True):
    preprocessed = x.copy()
    if normalize:
        preprocessed = preprocessed/preprocessed.max()
    if n_channels > 1:
        preprocessed = np.moveaxis(preprocessed[:3], 0, -1)
    else: 
        preprocessed = preprocessed[0]
    return preprocessed

def get_sar_imagery_statistics(path):
    array = get_array_from_tiff(path)
    means = array.mean(axis=(1,2))
    stds = array.std(axis=(1,2))
    return means, stds

def get_array_from_tiff(path):
    with rasterio.open(path) as file: 
        im = file.read()
    return im

In [5]:
buildings = pd.read_csv(TRAIN_COMMON_PATH/'SummaryData/SN6_Train_AOI_11_Rotterdam_Buildings.csv',engine='python')
buildings

Unnamed: 0,ImageId,TileBuildingId,PolygonWKT_Pix,Mean_Building_Height,Median_Building_Height,StdDev_Building_Height
0,20190822070610_20190822070846_tile_3721,0,"POLYGON ((299.6396801332012 349.3765436094254,...",9.962397,9.96,0.006495
1,20190822070610_20190822070846_tile_3721,1,"POLYGON ((115.5360228798818 339.845588516444, ...",2.810000,2.81,0.000000
2,20190822070610_20190822070846_tile_3721,2,"POLYGON ((768.9086768317502 329.8960437048227,...",14.420000,14.42,0.000000
3,20190822070610_20190822070846_tile_3721,3,"POLYGON ((755.8174585120287 330.0953964963555,...",14.420000,14.42,0.000000
4,20190822070610_20190822070846_tile_3721,4,"POLYGON ((392.8786215754226 335.6222213506699,...",8.590000,8.59,0.000000
...,...,...,...,...,...,...
214677,20190822133333_20190822133635_tile_7758,5,"POLYGON ((241.8162563492078 165.4673625379801,...",5.970000,5.97,0.000000
214678,20190822133333_20190822133635_tile_7758,6,"POLYGON ((431.9838383866008 146.2224273793399,...",10.380000,10.38,0.000000
214679,20190822133333_20190822133635_tile_7758,7,"POLYGON ((128.6518265847117 111.3799640219659,...",3.870000,3.87,0.000000
214680,20190822133333_20190822133635_tile_7758,8,"POLYGON ((415.3016002546065 98.69750475697219,...",11.710000,11.71,0.000000


In [6]:
def get_id_from_filename(filename):
    return FILENAME_PATTERN.match(filename)[1]

def get_polygons_in_image(rstr_filename):
    image_id = get_id_from_filename(rstr_filename)
    return buildings.loc[buildings['ImageId']==image_id,'PolygonWKT_Pix']

In [7]:
len(pd.unique(buildings['ImageId']))

3401

In [8]:
TRAINING_DATASET_SIZE=200840
#TRAINING_DATASET_SIZE =200
HEIGHT = 256
WIDTH = 256



class ImageGen:
    def __init__(self, mode="fit", shuffle=False, batch_type='full_image', batch_size=32, train_val_frac = 0.8, verbose=False):
        
        assert not (mode=='test' and shuffle==True), 'Error: in test mode, the values should not be shuffled.'

        self.batch_size = batch_size 
        if mode == "fit":
            self.image_path = TRAIN_SAR_PATH
            self.gt_path = TRAIN_GT_PATH
        if mode == "test":
            self.image_path = TEST_SAR_PATH
            self.gt_path = TEST_GT_PATH
        
        self.generators = {}
        self.orientations = pd.read_csv('train/AOI_11_Rotterdam/SummaryData/SAR_orientations.txt',sep=' ', header=None)
        self.orientations.columns = ["image_timestamps","orientation"]
        self.verbose = verbose
        
        image_files = os.listdir(self.image_path)
        if shuffle == True: 
            shuffle(image_files)
        if batch_type == 'full_image':
            if mode == 'fit':
                n_train = int(train_val_frac*len(image_files))
                self.generators["train"] = cycle((x for x in image_files[:n_train]))
                self.generators['validation'] = cycle((x for x in image_files[n_train:]))
            elif mode == "test":
                raise ValueError("Test mode not implemented yet.")
        elif batch_type == 'multiple_images':
            n_train = int(train_val_frac*len(image_files))            
            self.generators["train"] = cycle((image_files[i:i+batch_size] for i in range(0, len(image_files[:n_train]), batch_size))) 
            self.generators['validation'] = cycle((image_files[i:i+batch_size] for i in range(0, len(image_files[:n_train]), batch_size))) 
            
    def print_if_verbose(self, *args, status='always'):
        if self.verbose and status=='always':
            print(*args)
        if self.verbose=='debug' and status=='debug':
            print(*args)

    def normalize(self,batch,normalization_type=None):
        if normalization_type is None:
            normalized_batch=batch 
        elif normalization_type=='divide':
            normalized_batch=batch/255
        return normalized_batch

    def get_x_image(self, im_id):
        batch = get_array_from_tiff(self.image_path/im_id)
        x_batch = np.ndarray(shape=(4,HEIGHT,WIDTH))
        for i in range(4):
            x_batch[i] = cv.resize(batch[i],dsize=(WIDTH,HEIGHT))
        x_batch = self.normalize(x_batch)
        x_batch = np.expand_dims(np.rollaxis(x_batch, 0, 3),axis=0)[...,:3]
        return x_batch 

    def get_y_image(self, im_id):
        batch = np.rollaxis(get_array_from_tiff(self.gt_path/im_id)[0], 0, 2)
        batch = np.rot90(batch, k=3)
        batch = np.flip(batch,axis=1)
        batch = cv.resize(batch,dsize=(WIDTH,HEIGHT))
        batch = np.uint8(batch > 0)
        batch = np.expand_dims(batch,axis=0)
        batch = np.expand_dims(batch,axis=-1)
        return batch 
    
    #def merge_as_batch(self):
        
    def flow(self, mode: str ="train", height: int =137,width: int =236):
        '''Run the generator '''
        c = 0
        while True:
            image_ids_to_get = next(self.generators[mode])
            x_batch = []
            y_batch = []
            for im_id in image_ids_to_get:
                image_orientation = self.orientations.loc[self.orientations["image_timestamps"]==re.match('.*(\d{14}_\d{14})',im_id)[1],"orientation"]
                self.print_if_verbose("\n INFO - image_ids_to_get:", im_id,"\n",status='always')
                self.print_if_verbose("\n INFO - image_orientation:",image_orientation)
                x_image = self.get_x_image(im_id)
                x_batch.append(x_image)
                if mode in ('train','validation'):
                    self.print_if_verbose("\n INFO - current mode ",mode,status="debug")
                    y_image = self.get_y_image(im_id)
                    y_batch.append(y_image)
            x_batch = np.concatenate(x_batch)
            y_batch = np.concatenate(y_batch)
            if mode in ('train','validation'):
                yield x_batch, y_batch
            elif mode=='test':
                c+=1
                print(f"\n INFO - Yielding test data n°{c}/{self.batch_size/self.files_size['test']}")
                yield x_batch

In [9]:
model = sm.Unet()
model.compile(
    'Adam',
    loss='binary_crossentropy',
    metrics=[sm.metrics.iou_score],
)

Downloading data from https://github.com/fchollet/deep-learning-models/releases/download/v0.1/vgg16_weights_tf_dim_ordering_tf_kernels_notop.h5


In [10]:
TRAIN_FRAC = 0.8
N_TRAIN = 3401
BATCH_SIZE = 32
STEP_PER_EPOCH = int(TRAIN_FRAC*N_TRAIN)//BATCH_SIZE+1
VAL_STEPS_PER_EPOCH = int((1-TRAIN_FRAC)*N_TRAIN)//BATCH_SIZE+1
N_EPOCHS = 40
LOG_DIR = 'logs/'
MODELS_DIR = "models/"
LOAD_MODEL = True
DEBUG = False
if DEBUG:
    STEP_PER_EPOCH= 10
    VAL_STEPS_PER_EPOCH= 10

In [11]:
fit_generator = ImageGen(verbose=False, batch_type="multiple_images", batch_size=BATCH_SIZE)

In [12]:
a = next(fit_generator.flow(mode='validation'))

In [13]:
a[0].shape

(32, 256, 256, 3)

In [14]:
!ls -lt models/

total 31211368
-rw-r--r-- 1 root root 285357208 Apr 10 16:58 model_weights-06-0.2983--0.2848.hdf5
-rw-r--r-- 1 root root 285357208 Apr 10 16:37 model_weights-05-0.2993--0.2814.hdf5
-rw-r--r-- 1 root root 285357208 Apr 10 16:18 model_weights-04-0.2914--0.2852.hdf5
-rw-r--r-- 1 root root 285357208 Apr 10 16:00 model_weights-03-0.2880--0.2829.hdf5
-rw-r--r-- 1 root root 285357208 Apr 10 15:42 model_weights-02-0.2782--0.2860.hdf5
-rw-r--r-- 1 root root 285357208 Apr 10 15:24 model_weights-01-0.2771--0.2840.hdf5
-rw-r--r-- 1 root root 285357208 Apr 10 13:42 model_weights-06-0.3033--0.2724.hdf5
-rw-r--r-- 1 root root 285357208 Apr 10 13:23 model_weights-05-0.3020--0.2683.hdf5
-rw-r--r-- 1 root root 285357208 Apr 10 13:05 model_weights-04-0.2942--0.2714.hdf5
-rw-r--r-- 1 root root 285357208 Apr 10 12:48 model_weights-03-0.2916--0.2681.hdf5
-rw-r--r-- 1 root root 285357208 Apr 10 12:30 model_weights-02-0.2824--0.2718.hdf5
-rw-r--r-- 1 root root 285357208 Apr 10 12:12 model_weights-

In [15]:
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping, EarlyStopping, CSVLogger, TensorBoard, ReduceLROnPlateau

checkpoint = ModelCheckpoint(MODELS_DIR+"model_weights-{epoch:02d}-{val_loss:.4f}--{val_iou_score:.4f}.hdf5", 
                             monitor='val_loss', 
                             verbose=1, 
                             save_best_only=False, 
                             mode='min')

early_stopping = EarlyStopping(monitor='val_loss',
                              min_delta=0,
                              patience=5,
                              verbose=0, mode='auto')
csv_logger = CSVLogger(LOG_DIR+'training.log')

reduce_lr_on_plateau = ReduceLROnPlateau(monitor='val_loss', factor=0.2,
                              patience=3, min_lr=0.0001)

In [16]:
if LOAD_MODEL:
    model.load_weights(f'{MODELS_DIR}/model_weights-15-0.1981.hdf5')

In [None]:













































































 fit mo
model.fit_generator(
   fit_generator.flow(mode='train'),
   epochs=N_EPOCHS,
   steps_per_epoch=STEP_PER_EPOCH,
   validation_steps=VAL_STEPS_PER_EPOCH,
   validation_data=fit_generator.flow(mode='validation'),
   callbacks = [
                #checkpoint, 
                #early_stopping, 
                #csv_logger, 
                #reduce_lr_on_plateau
                ]
)

Instructions for updating:
Please use Model.fit, which supports generators.
  ...
    to  
  ['...']
  ...
    to  
  ['...']
Train for 86 steps, validate for 22 steps
Epoch 1/40
Epoch 2/40
Epoch 3/40
Epoch 4/40
Epoch 5/40
Epoch 6/40
Epoch 7/40
Epoch 8/40
Epoch 9/40
Epoch 10/40
Epoch 11/40
Epoch 12/40
Epoch 13/40
14/86 [===>..........................] - ETA: 4:08 - loss: 0.0405 - iou_score: 0.7119

In [None]:
def safe_log(x):
    y = x.copy()
    y[y==0]=1
    return 10*np.log10(y)

In [None]:
#img = next(fit_generator.flow(mode='validation'))

fig, ax = plt.subplots(2,3, figsize=(15,9))
pred = model.predict(img[0])
ax = ax.ravel()
for i in range(3):
    ax[i].imshow(img[0][0,...,i],cmap='gist_gray',vmin=15,vmax=60)
    ax[3+i].imshow(safe_log(img[0][0,...,i]),cmap='gist_gray',vmin=11,vmax=18)


In [None]:
img = next(fit_generator.flow(mode='validation'))

fig, ax = plt.subplots(2,3, figsize=(20,20))
pred = model.predict(img[0])
for i in range(3):
    ax[0,i].imshow(img[0][0,...,i],cmap='gist_gray',vmin=25,vmax=60)

ax[1,0].imshow(pred[0,...,0],cmap='gist_gray')
ax[1,1].imshow(img[1][0,...,0],cmap='gist_gray')

In [None]:
img = next(fit_generator.flow(mode='validation'))
fig, ax = plt.subplots(2,1, figsize=(30,30))
ax[0].imshow(img[0][0,...,0],cmap='gist_gray',vmin=10,vmax=60)
ax[1].imshow(img[1][0,...,0],cmap='Oranges',alpha=1)
ax[1].imshow(safe_log(img[0][0,...,0]),cmap='Blues_r',alpha=0.8, vmin=12, vmax=18)

In [None]:
help(np.rot90)

In [None]:
np.unique(img[1][0,...])

In [None]:
plt.imshow(img[1][0,...] > 0)

In [None]:
plt.imshow(img[0][0,...])

In [None]:
plt.imshow(img[1][0,...])

In [None]:
from tensorflow.keras import Model, Input
from tensorflow.keras.layers import Conv2D

In [None]:
plt.imshow(img[1][0,...])

In [None]:
a=

In [None]:
new_input = Input(shape=(900,900,4))
first_layer = Conv2D(3,(3,3))(new_input)
other_layers= model.get_layer("block1_conv1")(first_layer)

#Model(new_input)

In [None]:
first_layer(new_input)

In [None]:
model.to_json()

In [None]:
#model.layers.pop(0)
model.layers

newInput = Input(batch_shape=(4,900,900,1))
newOutputs = model
newModel = Model(newInput, newOutputs)

newModel.summary()

In [None]:
model.summary()

In [None]:
model.layers

In [None]:
with rasterio.open('/Users/a955nd/Programming/axa_climate/managing_provider/data-provider-docs/fastcat_validate_da/tests/data/integration/test_gsi_jackson_26022020_with_satellite_images/raw_bucket/92e5c126-e9d0-423a-95b6-f5284a8e22c8_satellite_raw_iceye_1581871651_0_v0.tiff') as f: 
    a=f.read()
    b=f.meta
    with rasterio.open('')

In [None]:
plt.imshow(a[0])