In [25]:
import geopandas as gpd
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import h5py
import tensorflow as tf
import os

from src.utils import to_rgb
from src.satellite_images.storage import SentinelDataset, SentinelDatasetIterator
from src.mask.mask_dataset import MaskDataset, MaskDatasetIterator
from src.mask.utils import apply_mask_to_image_series, apply_mask_to_image

%load_ext autoreload
%autoreload 2

data_path = '../../../kornmo-data-files/raw-data/crop-classification-data/'

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [26]:
print("Reading data")
data = gpd.read_file(f"{data_path}/training_data.gpkg")
data['orgnr'] = data['orgnr'].astype(int)
data['year'] = data['year'].astype(int)

print("Reading masks")
masks = MaskDataset(f"{data_path}/small_masks_train.h5")

print("Reading satellite images")
satellite_imgs = SentinelDataset(f"{data_path}/small_images_train.h5")

print(f"Loaded {len(masks.labels)} masks")
print(f"Loaded {len(satellite_imgs.labels) * 30} images")

Reading data
Reading masks
Reading satellite images
Loaded 86079 masks
Loaded 2582370 images


In [27]:
data = data.loc[data['planted'] != 'erter']
data = data.loc[data['planted'] != 'rughvete']
data = data.loc[data['planted'] != 'oljefro']
data = data.loc[data['planted'] != 'rug']
data.drop(data[data['area'] < 1500].index, inplace = True)

# print(969923173 in data['orgnr'].unique())
# print(2017 in data.loc[data['orgnr'] == 969923173]['year'].unique())
# label = data.loc[data['orgnr'] == 969923173]
# print(label)

In [28]:

labels = list(set(data['planted']))
print(labels)
print(pd.Series(list(data['planted'])).value_counts())
def add_labels(orgnr, year, data_arg):
    orgnr = int(orgnr[:9])
    year = int(year)
    if orgnr in data['orgnr'].unique() and year in data.loc[data['orgnr'] == orgnr]['year'].unique():
        label = data.loc[data['orgnr'] == orgnr].loc[data['year'] == year]['planted'].iloc[0]
        index = labels.index(label)
        arr = [0 for _ in range(0, len(labels))]
        arr[index] = 1
        return {'class': arr}


['havre', 'hvete', 'bygg']
bygg     76231
havre    20512
hvete    18102
dtype: int64


In [29]:
train, val = satellite_imgs.to_iterator().split(rand_seed='corn')


train = train.with_data(add_labels, show_progress=True)
val = val.with_data(add_labels, show_progress=True)
masks_it = masks.get_iterator()
mask_dict = {}

for orgnr, year, mask in masks_it:
    mask_dict[f'{orgnr}/{year}'] = mask


# train = train.filter(lambda orgnr, year, _,__: f"{orgnr}/{year}" in mask_dict)
# val = val.filter(lambda orgnr, year, _,__: f"{orgnr}/{year}" in mask_dict)

print(f"train samples: {len(train)}")
print(f"val samples: {len(val)}")

#7737, 1937

100%|██████████| 68863/68863 [03:59<00:00, 287.01it/s]
100%|██████████| 17216/17216 [01:14<00:00, 230.32it/s]


train samples: 67970
val samples: 17000


In [None]:
import random

# def train_generator():
#     for orgnr, year, imgs, label in train:
#         for img in imgs[5:20]:
#             img = apply_mask_to_image(mask_dict[f'{orgnr}/{year}'], img, image_size=16)
#             yield img, label['class']
#
#
# def val_generator():
#     for orgnr, year, imgs, label in val:
#         for img in imgs[5:20]:
#             img = apply_mask_to_image(mask_dict[f'{orgnr}/{year}'], img, image_size=16)
#             yield img, label['class']


def train_generator():
    for orgnr, year, imgs, label in train:
        # imgs = apply_mask_to_image_series(mask_dict[f'{orgnr}/{year}'], imgs[4:20], image_size=16)
        imgs = imgs[4:20]
        yield imgs, label['class']


def val_generator():
    for orgnr, year, imgs, label in val:
        # imgs = apply_mask_to_image_series(mask_dict[f'{orgnr}/{year}'], imgs[4:20], image_size=16)
        imgs = imgs[4:20]
        yield imgs, label['class']



In [45]:
i = 0
for vale in val:
    if i > 10:
        break
    i += 1
    print(vale[2][()])

[[[[3.01422530e-03 2.90657439e-03 2.78354479e-03 ... 4.19838524e-03
    1.13802384e-03 1.18415994e-03]
   [3.01422530e-03 2.86043829e-03 2.79892349e-03 ... 4.19838524e-03
    1.10726644e-03 1.16878124e-03]
   [2.96808920e-03 2.87581699e-03 2.70665129e-03 ... 4.09073433e-03
    1.07650903e-03 1.12264514e-03]
   ...
   [2.95271050e-03 2.66051519e-03 2.62975779e-03 ... 4.18300654e-03
    1.21491734e-03 1.23029604e-03]
   [2.95271050e-03 2.64513649e-03 2.56824298e-03 ... 4.18300654e-03
    1.19953864e-03 1.24567474e-03]
   [2.95271050e-03 2.62975779e-03 2.52210688e-03 ... 4.18300654e-03
    1.18415994e-03 1.24567474e-03]]

  [[3.01422530e-03 2.90657439e-03 2.76816609e-03 ... 4.19838524e-03
    1.18415994e-03 1.24567474e-03]
   [3.01422530e-03 2.87581699e-03 2.78354479e-03 ... 4.19838524e-03
    1.15340254e-03 1.24567474e-03]
   [2.96808920e-03 2.84505959e-03 2.73740869e-03 ... 4.09073433e-03
    1.07650903e-03 1.18415994e-03]
   ...
   [2.95271050e-03 2.70665129e-03 2.62975779e-03 ... 4.18

In [46]:


# train_dataset = tf.data.Dataset.from_generator(
#     train_generator,
#     output_types=(tf.dtypes.float64, tf.dtypes.int64),
#     output_shapes=(( 16, 16, 12), 7)
# )
#
# val_dataset = tf.data.Dataset.from_generator(
#     val_generator,
#     output_types=(tf.dtypes.float64, tf.dtypes.int64),
#     output_shapes=(( 16, 16, 12), 7)
# )

train_dataset = tf.data.Dataset.from_generator(
    train_generator,
    output_types=(tf.dtypes.float64, tf.dtypes.int64),
    output_shapes=((16, 16, 16, 12), 3)
)

val_dataset = tf.data.Dataset.from_generator(
    val_generator,
    output_types=(tf.dtypes.float64, tf.dtypes.int64),
    output_shapes=((16, 16, 16, 12), 3)
)



In [47]:
from sklearn.utils import class_weight
def CNN(input_dim, output_dim):
    input_layer = layers.Input(shape=input_dim)
    y = layers.Conv2D(16, (3, 3), activation=tf.nn.relu, padding='same')(input_layer)
    y = layers.MaxPool2D((2, 2))(y)
    y = layers.Conv2D(32, (3, 3), activation=tf.nn.relu, padding='same')(y)
    y = layers.MaxPool2D((2, 2))(y)
    y = layers.Conv2D(64, (3, 3), activation=tf.nn.relu, padding='same')(y)
    y = layers.MaxPool2D((2, 2))(y)
    y = layers.Flatten()(y)
    y = layers.Dense(output_dim, activation=tf.nn.relu)(y)
    return models.Model(inputs=[input_layer], outputs=[y], name="SingleImageCNN")

classes = list(data['planted'])

class_weights = class_weight.compute_class_weight(class_weight='balanced', classes=np.unique(classes), y=classes)
class_weights = dict(zip(np.array([0, 1, 2]), class_weights))

print(class_weights)


{0: 0.5021797781305068, 1: 1.8663059022360895, 2: 2.114775531248849}


In [51]:
from keras.optimizer_v2.learning_rate_schedule import ExponentialDecay
from tensorflow.python.data import AUTOTUNE
from keras import models
from keras.applications.densenet import layers
from keras.models import load_model
from tensorflow import optimizers
from src.kornmo.pyimagesearch.callbacks.epochcheckpoint import EpochCheckpoint
from src.kornmo.pyimagesearch.callbacks.trainingmonitor import TrainingMonitor

plotPath = os.path.sep.join(["training", "hybrid_more_features.png"])
jsonPath = os.path.sep.join(["training", "hybrid_more_features.json"])
callbacks = [
	EpochCheckpoint('./training', every=1, startAt=0),
	TrainingMonitor(plotPath, jsonPath=jsonPath, startAt=0)
]


restart = True
if restart:
    cnn_net = CNN((16, 16, 12), 64)
    input_cnn = layers.Input(shape=(16, 16, 16, 12), name="cnn_input")

    cnn = layers.TimeDistributed(cnn_net)(input_cnn)
    cnn = layers.GRU(128, return_sequences=False)(cnn)
    cnn = layers.Flatten()(cnn)
    cnn = layers.Dense(128)(cnn)
    cnn = layers.Dense(3, activation='softmax')(cnn)
    cnn = models.Model(inputs=input_cnn, outputs=cnn, name="CNN")

    lr_schedule = ExponentialDecay(
        initial_learning_rate=1e-5,
        decay_steps=1000,
        decay_rate=0.9)
    cnn.compile(
        optimizer=optimizers.Adam(learning_rate=lr_schedule),
        loss=tf.keras.losses.CategoricalCrossentropy(),
        metrics=['categorical_accuracy']
    )
    # callback = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=15, restore_best_weights=True)


    cnn_history = cnn.fit(
            train_dataset.take(10000).batch(32).prefetch(2),
            validation_data=val_dataset.batch(32).prefetch(2),
            epochs=100,
            verbose=1,
            callbacks=callbacks,
            class_weight=class_weights
    )
else:
    cnn_net = load_model('./training/epoch_2.hdf5')

    cnn_history = cnn_net.fit(
        train_dataset.take(10000).batch(32).prefetch(2),
        validation_data=val_dataset.batch(32).prefetch(2),
        epochs=100,
        verbose=1,
        callbacks=callbacks
    )

# restart = True
# if restart:
#     input_layer = layers.Input(shape=(16, 16, 12))
#     cnn = layers.Conv2D(16, (3, 3), activation='relu', padding='same')(input_layer)
#     cnn = layers.MaxPooling2D((2, 2))(cnn)
#     cnn = layers.Dropout(0.2)(cnn)
#     cnn = layers.Conv2D(32, (3, 3), strides=(2,2), activation='relu', padding='same')(cnn)
#     cnn = layers.Flatten()(cnn)
#     cnn = layers.Dense(32, activation="relu")(cnn)
#     cnn = layers.Dropout(0.2)(cnn)
#     cnn = layers.Dense(7, activation='softmax')(cnn)
#
#
#
#
#     cnn = models.Model(inputs=[input_layer], outputs=cnn, name="cnn_pure")
#     cnn.compile(
#         optimizer=optimizers.Adam(),
#         loss=tf.keras.losses.CategoricalCrossentropy(),
#         metrics=['categorical_accuracy']
#     )
#     history = cnn.fit(
#         train_dataset.batch(32).prefetch(buffer_size=AUTOTUNE),
#         validation_data=val_dataset.batch(32).prefetch(buffer_size=AUTOTUNE),
#         epochs=100,
#         callbacks=callbacks
#     )
# else:
#
#     cnn_net = load_model('./training/epoch_4.hdf5')
#
#     cnn_history = cnn_net.fit(
#         train_dataset.take(10000).batch(32).prefetch(2),
#         validation_data=val_dataset.batch(32).prefetch(2),
#         epochs=100,
#         verbose=1,
#         callbacks=callbacks
#     )


Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

KeyboardInterrupt: 

In [None]:
from sklearn.metrics import classification_report

cnn_net = load_model('./training/epoch_16.hdf5')
res = cnn_net.predict(val_dataset, verbose=1)