In [None]:
import geopandas as gpd
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import h5py
import tensorflow as tf
import os

from src.utils import to_rgb
from src.satellite_images.storage import SentinelDataset, SentinelDatasetIterator
from src.mask.mask_dataset import MaskDataset, MaskDatasetIterator
from src.mask.utils import apply_mask_to_image_series, apply_mask_to_image

%load_ext autoreload
%autoreload 2

data_path = '../../../kornmo-data-files/raw-data/crop-classification-data/'

In [None]:
print("Reading data")
data = gpd.read_file(f"{data_path}/training_data.gpkg")
data['orgnr'] = data['orgnr'].astype(int)
data['year'] = data['year'].astype(int)

print("Reading masks")
masks = MaskDataset(f"{data_path}/small_masks_train.h5")

print("Reading satellite images")
satellite_imgs = SentinelDataset(f"{data_path}/small_images_train.h5")

print(f"Loaded {len(masks.labels)} masks")
print(f"Loaded {len(satellite_imgs.labels) * 30} images")

In [None]:
# data = data.loc[data['planted'] != 'erter']
# data = data.loc[data['planted'] != 'rughvete']
# data = data.loc[data['planted'] != 'oljefro']
# data = data.loc[data['planted'] != 'rug']

# print(969923173 in data['orgnr'].unique())
# print(2017 in data.loc[data['orgnr'] == 969923173]['year'].unique())
# label = data.loc[data['orgnr'] == 969923173]
# print(label)

In [None]:

labels = list(set(data['planted']))
print(labels)
print(pd.Series(list(data['planted'])).value_counts())
def add_labels(orgnr, year, data_arg):
    orgnr = int(orgnr[:9])
    year = int(year)
    if orgnr in data['orgnr'].unique() and year in data.loc[data['orgnr'] == orgnr]['year'].unique():
        label = data.loc[data['orgnr'] == orgnr].loc[data['year'] == year]['planted'].iloc[0]
        index = labels.index(label)
        arr = [0 for _ in range(0, len(labels))]
        arr[index] = 1
        return {'class': arr}


In [None]:
train, val = satellite_imgs.to_iterator().split(rand_seed='corn')


train = train.with_data(add_labels, show_progress=True)
val = val.with_data(add_labels, show_progress=True)
masks_it = masks.get_iterator()
mask_dict = {}

for orgnr, year, mask in masks_it:
    mask_dict[f'{orgnr}/{year}'] = mask


# train = train.filter(lambda orgnr, year, _,__: f"{orgnr}/{year}" in mask_dict)
# val = val.filter(lambda orgnr, year, _,__: f"{orgnr}/{year}" in mask_dict)

print(f"train samples: {len(train)}")
print(f"val samples: {len(val)}")

#7737, 1937

In [None]:
import random

def train_generator():
    for orgnr, year, imgs, label in train:
        for img in imgs[5:20]:
            img = apply_mask_to_image(mask_dict[f'{orgnr}/{year}'], img, image_size=16)
            yield img, label['class']


def val_generator():
    for orgnr, year, imgs, label in val:
        for img in imgs[5:20]:
            img = apply_mask_to_image(mask_dict[f'{orgnr}/{year}'], img, image_size=16)
            yield img, label['class']


# def train_generator():
#     for orgnr, year, imgs, label in train:
#         imgs = apply_mask_to_image_series(mask_dict[f'{orgnr}/{year}'], imgs[5:20])
#         yield imgs, label['class']
#
#
# def val_generator():
#     for orgnr, year, imgs, label in val:
#         imgs = apply_mask_to_image_series(mask_dict[f'{orgnr}/{year}'], imgs[5:20])
#         yield imgs, label['class']
#


In [None]:
for vale in val:
    print(vale)
    break

In [None]:


train_dataset = tf.data.Dataset.from_generator(
    train_generator,
    output_types=(tf.dtypes.float64, tf.dtypes.int64),
    output_shapes=(( 16, 16, 12), 3)
)

val_dataset = tf.data.Dataset.from_generator(
    val_generator,
    output_types=(tf.dtypes.float64, tf.dtypes.int64),
    output_shapes=(( 16, 16, 12), 3)
)

# print(f"training samples: {len(train_dataset)}")
# print(f"Validation samples: {len(val_dataset)}")

In [None]:

def cnn(input_dim, output_dim):
    input_layer = layers.Input(shape=input_dim)
    y = layers.Conv2D(16, (3, 3), activation=tf.nn.relu, padding='same')(input_layer)
    y = layers.MaxPool2D((2, 2))(y)
    y = layers.Conv2D(32, (3, 3), activation=tf.nn.relu, padding='same')(y)
    y = layers.MaxPool2D((2, 2))(y)
    y = layers.Conv2D(64, (3, 3), activation=tf.nn.relu, padding='same')(y)
    y = layers.MaxPool2D((2, 2))(y)
    y = layers.Flatten()(y)
    y = layers.Dense(output_dim, activation=tf.nn.relu)(y)
    return models.Model(inputs=[input_layer], outputs=[y], name="SingleImageCNN")

In [None]:
from tensorflow.python.data import AUTOTUNE
from keras import models
from keras.applications.densenet import layers
from keras.models import load_model
from tensorflow import optimizers
from src.kornmo.pyimagesearch.callbacks.epochcheckpoint import EpochCheckpoint
from src.kornmo.pyimagesearch.callbacks.trainingmonitor import TrainingMonitor

plotPath = os.path.sep.join(["training", "hybrid_more_features.png"])
jsonPath = os.path.sep.join(["training", "hybrid_more_features.json"])
callbacks = [
	EpochCheckpoint('./training', every=1, startAt=0),
	TrainingMonitor(plotPath, jsonPath=jsonPath, startAt=0)
]

#
# restart = True
# if restart:
#     cnn_net = cnn((16, 16, 12), 64)
#     input_cnn = layers.Input(shape=(15, 16, 16, 12), name="cnn_input")
#
#     cnn = layers.TimeDistributed(cnn_net)(input_cnn)
#     cnn = layers.GRU(128, return_sequences=False)(cnn)
#     cnn = layers.Flatten()(cnn)
#     cnn = layers.Dense(128)(cnn)
#     cnn = layers.Dense(3, activation='softmax')(cnn)
#
#     cnn = models.Model(inputs=input_cnn, outputs=cnn, name="CNN")
#
#     cnn.compile(
#         optimizer=optimizers.Adam(),
#         loss=tf.keras.losses.CategoricalCrossentropy(),
#         metrics=['categorical_accuracy']
#     )
#     # callback = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=15, restore_best_weights=True)
#
#
#     cnn_history = cnn.fit(
#             train_dataset.take(10000).batch(32).prefetch(2),
#             validation_data=val_dataset.batch(32).prefetch(2),
#             epochs=10,
#             verbose=1,
#             callbacks=callbacks
#     )
# else:
#     cnn_net = load_model('./training/epoch_2.hdf5')
#
#     cnn_history = cnn_net.fit(
#         train_dataset.take(10000).batch(32).prefetch(2),
#         validation_data=val_dataset.batch(32).prefetch(2),
#         epochs=10,
#         verbose=1,
#         callbacks=callbacks
#     )

restart = True
if restart:
    input_layer = layers.Input(shape=(16, 16, 12))
    cnn = layers.Conv2D(16, (3, 3), activation='relu', padding='same')(input_layer)
    cnn = layers.MaxPooling2D((2, 2))(cnn)
    cnn = layers.Dropout(0.2)(cnn)
    cnn = layers.Conv2D(32, (3, 3), strides=(2,2), activation='relu', padding='same')(cnn)
    cnn = layers.Flatten()(cnn)
    cnn = layers.Dense(32, activation="relu")(cnn)
    cnn = layers.Dropout(0.2)(cnn)
    cnn = layers.Dense(3, activation='softmax')(cnn)




    cnn = models.Model(inputs=[input_layer], outputs=cnn, name="cnn_pure")
    cnn.compile(
        optimizer=optimizers.Adam(),
        loss=tf.keras.losses.CategoricalCrossentropy(),
        metrics=['categorical_accuracy']
    )
    history = cnn.fit(
        train_dataset.batch(32).prefetch(buffer_size=AUTOTUNE),
        validation_data=val_dataset.batch(32).prefetch(buffer_size=AUTOTUNE),
        epochs=100,
        callbacks=callbacks
    )
else:

    cnn_net = load_model('./training/epoch_4.hdf5')

    cnn_history = cnn_net.fit(
        train_dataset.take(10000).batch(32).prefetch(2),
        validation_data=val_dataset.batch(32).prefetch(2),
        epochs=100,
        verbose=1,
        callbacks=callbacks
    )


In [None]:
from sklearn.metrics import classification_report

cnn_net = load_model('./training/epoch_16.hdf5')
res = cnn_net.predict(val_dataset, verbose=1)