In [1]:
import tensorflow as tf
import tensorflow_hub as hub


In [2]:
import tifffile

In [3]:
def get_lulc_class(path):
  splits = path.split('/')
  return splits[-2]

In [4]:
def multispectral_to_rgb(raster, optical_maximum = 2000):

  r = raster[:, :, 3]
  g = raster[:, :, 2]
  b = raster[:, :, 1]

  rgb_raster = np.stack([r, g, b], axis=2)

  #cast to uint and scale to 0/255

  rgb_raster = rgb_raster/optical_maximum
  rgb_raster = np.around(rgb_raster*255)
  rgb_raster = np.clip(rgb_raster, 0, 255).astype(int)
  return rgb_raster

In [5]:
classDictionary = {30: 'Herbaceous Vegetation',
20: 'Shrubs',
40: 'Agricultural Land',
50: 'Urban Areas',
60: 'Bare Earth/Sparse Vegetation',
70: 'Snow and Ice',
80: 'Permanent Water Bodies',
90: 'Herbaceous Wetland',
100: 'Moss and Lichen',
111: 'Closed Evergreen Needle Leaf Forest',
112: 'Closed Evergreen Broad Leaf Forest',
113: 'Closed Deciduous Needle Leaft Forest',
114: 'Closed Deciduous Broad Leaf Forest',
115: 'Closed Mixed Forest',
116: 'Other Closed Forest',
121: 'Open Evergreen Needle Leaf Forest',
122: 'Open Evergreen Broad Leaf Forest',
123: 'Open Deciduous Needle Leaft Forest',
124: 'Open Deciduous Broad Leaf Forest',
125: 'Open Mixed Forest',
126: 'Other Open Forest',
200: 'Oceans, Seas'}

In [6]:
def rescale_image(raster):
  raster = np.nan_to_num(raster)
  max_val = np.nanmax(raster)
  mid_val = max_val/2
  rescaled = np.nan_to_num((raster-mid_val)/(mid_val))
  return np.clip(rescaled, -1, 1)

In [7]:
def rio_to_channels_last(raster):
  return raster.transpose((1, 2, 0))

In [8]:
def get_array(path):
  _r = tifffile.imread(path)
  #arr = rio_to_channels_last(_r)
  return _r

In [9]:
def get_image_paths(top_level_path):
  ecoregion_folders = glob.glob(top_level_path+'/*')
  img_paths = []
  for ec_dir in ecoregion_folders:
    img_paths += glob.glob(ec_dir+'*/*.tif')
  return img_paths


In [12]:
import glob
import random
import numpy as np

In [14]:
folder = 'latamSatData/datasetRGB'
test_dataset = tfds.ImageFolder(folder)
test_dataset = test_dataset.as_dataset(as_supervised=True, batch_size=32)
for k in test_dataset.keys():
    test_dataset[k] = test_dataset[k].map(lambda x, y: (x, tf.one_hot(y, depth=19)))
base_dataset = test_dataset['0']
for idx, k in enumerate(test_dataset.keys()):
    if k != '0':
        base_dataset = base_dataset.concatenate(test_dataset[k])
    
    
del(test_dataset)


generating paths


2023-10-03 10:39:03.189535: I metal_plugin/src/device/metal_device.cc:1154] Metal device set to: Apple M2 Pro
2023-10-03 10:39:03.189561: I metal_plugin/src/device/metal_device.cc:296] systemMemory: 16.00 GB
2023-10-03 10:39:03.189568: I metal_plugin/src/device/metal_device.cc:313] maxCacheSize: 5.33 GB
2023-10-03 10:39:03.189634: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:303] Could not identify NUMA node of platform GPU ID 0, defaulting to 0. Your kernel may not have been built with NUMA support.
2023-10-03 10:39:03.189666: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:269] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 0 MB memory) -> physical PluggableDevice (device: 0, name: METAL, pci bus id: <undefined>)


In [15]:
whole_ds_gen = dataset.random_image_generator(supervised=True, rgb=False, normalise=True, one_hot=False)

In [16]:
ds_batch_size = 32

In [17]:
def prepare_for_training(dataset, batch_size=32, cache='CachedDataset.cache'):
    dataset = dataset.repeat()

    dataset = dataset.batch(batch_size, drop_remainder=True)


    #dataset = dataset.shuffle(64)

    #dataset = dataset.prefetch(buffer_size=tf.data.experimental.AUTOTUNE)

    return dataset

In [18]:
val_img_generator = prepare_for_training(val_img_generator, cache='valCahce.cache')


In [19]:
train_img_generator = prepare_for_training(train_img_generator, cache='trainCache.cache')


In [20]:
test_img_generator = prepare_for_training(test_img_generator, cache='testCache.cache')

testreturn = next(val_img_generator.as_numpy_iterator())
testreturn[1].shape

(32, 19)

In [21]:
num_channels = testreturn[0].shape[-1]

In [22]:
import matplotlib.pyplot as plt

In [23]:
num_channels

13

In [24]:
img_path_len = len(dataset.img_paths)
img_path_pct = np.floor(img_path_len/100)


In [25]:
num_epochs = 20

In [26]:
steps_epoch = (img_path_pct*60 // ds_batch_size) - 1 

In [27]:
steps_epoch

5882.0

In [28]:
steps_validation = (img_path_pct*10 // ds_batch_size) -1

In [29]:
steps_validation

979.0

In [30]:
#train_ds = prepare_for_training(img_generator, batch_size=64)

In [31]:
def make_model(input_shape, num_classes):
    inputs = tf.keras.Input(shape=input_shape)

    # Entry block
    #x = layers.Rescaling(1.0 / 255)(inputs)
    x = tf.keras.layers.Conv2D(128, 3, strides=2, padding="same")(inputs)
    x = tf.keras.layers.BatchNormalization()(x)
    x = tf.keras.layers.Activation("relu")(x)

    previous_block_activation = x  # Set aside residual

    for size in [256, 512, 768]:
        x = tf.keras.layers.Activation("relu")(x)
        x = tf.keras.layers.SeparableConv2D(size, 3, padding="same")(x)
        x = tf.keras.layers.BatchNormalization()(x)

        x = tf.keras.layers.Activation("relu")(x)
        x = tf.keras.layers.SeparableConv2D(size, 3, padding="same")(x)
        x = tf.keras.layers.BatchNormalization()(x)

        x = tf.keras.layers.MaxPooling2D(3, strides=2, padding="same")(x)

        # Project residual
        residual = tf.keras.layers.Conv2D(size, 1, strides=2, padding="same")(
            previous_block_activation
        )
        x = tf.keras.layers.add([x, residual])  # Add back residual
        previous_block_activation = x  # Set aside next residual

    x = tf.keras.layers.SeparableConv2D(1024, 3, padding="same")(x)
    x = tf.keras.layers.BatchNormalization()(x)
    x = tf.keras.layers.Activation("relu")(x)

    x = tf.keras.layers.GlobalAveragePooling2D()(x)
    if num_classes == 2:
        activation = "sigmoid"
        units = 1
    else:
        activation = "softmax"
        units = num_classes

    x = tf.keras.layers.Dropout(0.5)(x)
    outputs = tf.keras.layers.Dense(units, activation=activation)(x)
    return tf.keras.Model(inputs, outputs)


m = make_model(input_shape=(64,64,13), num_classes=19)
tf.keras.utils.plot_model(m, show_shapes=True)


You must install pydot (`pip install pydot`) and install graphviz (see instructions at https://graphviz.gitlab.io/download/) for plot_model to work.


In [32]:

m.compile(
    loss="categorical_crossentropy",
    optimizer="adam",
    metrics=["accuracy"],
    run_eagerly=False
)

In [33]:
import os

In [34]:
model_name = "satellite-classification_xception"
model_path = os.path.join( model_name + ".h5")
model_checkpoint = tf.keras.callbacks.ModelCheckpoint(model_path, save_best_only=True, verbose=1)

In [35]:
try:
    m.load_weights(model_path)
except:
    pass

In [36]:

def recursive_train():
    try:
        m_history = m.fit(
        train_img_generator,
        validation_data = test_img_generator,
        verbose=1, epochs=num_epochs,
        steps_per_epoch=steps_epoch,
        validation_steps = 25,
    
        callbacks=[model_checkpoint]
        )
    except:
        #print('epoch', epochs)
        pass
        

for i in range(1):
    print('starting epoch', i)
    #recursive_train()


starting epoch 0


In [37]:
m.load_weights(model_path)
featureExtraction = tf.keras.Model(inputs=m.input,
                                 outputs=m.layers[-2].output)


In [38]:
from cleanlab import Datalab


In [None]:
feats_2 = []
classifications_2 = []
imgs_2 = []
pred_probs = []
for i in range(70001, img_path_len):
    img, probs = next(whole_ds_gen)
    img = np.expand_dims(img, 0)
    _f = featureExtraction.predict(img, verbose=0)
    pred = m.predict(img, verbose=0)
    feats_2.append(_f)
    imgs_2.append(img)
    pred_probs.append(pred)
    
    classifications_2.append(probs)
    if i > 0 and i % 1000 == 0:
        print(i, 'done')
    if i > 0 and i % 10000 == 0:
        data_to_clean = {'Images':np.squeeze(np.array(imgs_2)), 'Labels':np.array(classifications_2)}
        lab = Datalab(data=data_to_clean, label_name="Labels", image_key="Images")
        lab.find_issues(pred_probs=np.squeeze(np.array(pred_probs)), features=np.squeeze(np.array(feats_2)))
        label_issues = lab.get_issues("label")
        label_issues.to_csv(f"{i}_issues.csv")
        del(feats_2)
        del(classifications_2)
        del(imgs_2)
        del(pred_probs)
        feats_2 = []
        classifications_2 = []
        imgs_2 = []
        pred_probs = []
        

data_to_clean = {'Images':np.squeeze(np.array(imgs_2)), 'Labels':np.array(classifications_2)}
lab = Datalab(data=data_to_clean, label_name="Labels", image_key="Images")
lab.find_issues(pred_probs=np.squeeze(np.array(pred_probs)), features=np.squeeze(np.array(feats_2)))
label_issues = lab.get_issues("label")
label_issues.to_csv(f"{i}_issues.csv")



2023-10-03 10:39:07.793830: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:114] Plugin optimizer for device_type GPU is enabled.
2023-10-03 10:39:08.987484: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:114] Plugin optimizer for device_type GPU is enabled.


71000 done
72000 done
73000 done
74000 done
75000 done
76000 done
77000 done
78000 done
79000 done
80000 done
Finding label issues ...
Finding outlier issues ...
Fitting OOD estimator based on provided features ...
Finding near_duplicate issues ...
Finding non_iid issues ...
Finding dark, light, low_information, odd_aspect_ratio, odd_size, grayscale, blurry images ...


  0%|          | 0/10000 [00:00<?, ?it/s]

Error in checking for image issues: 'list' object has no attribute 'size'

Audit complete. 8671 issues found in the dataset.
81000 done
82000 done
83000 done
84000 done
85000 done
86000 done
87000 done
88000 done


In [None]:
break

In [None]:
lab
#label_issues_df = label_issues.query("is_label_issue").sort_values("label_score")


In [None]:
break

In [None]:
feats_full = feats+ feats_2
classifications_full = classifications + classifications_2

import pickle
with open('featuresPickle.pickle', 'wb') as wf:
    pickle.dump(feats_full, wf)

with open('classesPickle.pickle', 'wb') as wc:
    pickle.dump(classifications_full, wc)

In [None]:
print('Done!')