In [1]:
# Label filenames
label_file_path_train = 'DRC_labels_SAB_train_sample.csv'
label_file_path_val = 'DRC_labels_SAB_val_sample.csv'
label_file_path_train_full = 'DRC_labels_SAB_train_v1.csv'
label_file_path_val_full = 'DRC_labels_SAB_val_v1.csv'

### Download training files from S3

In [None]:
import boto3
import pandas as pd
import os

In [None]:
training_dir = './training_chips/' # change to whichever folder you want to download the files to

bucket_name = 'canopy-production-ml'
base_key = 'chips/model2_s2cloudless/training_v2/null/'

In [None]:
train_labels = pd.read_csv(label_file_path_train) # Change to label_file_path_train_full
                                                  # if you want to download all training chips
val_labels = pd.read_csv(label_file_path_val)     # Again, add _full to download all val chips

train_labels.head()

In [None]:
def download_training_chip(s3, bucket_name, base_key, path, dest_dir):
    """
    Downloads a training chip from s3 based on its path in the label file
    s3: s3 client
    bucket_name: Name of s3 bucket
    base_key: The s3 key shared by all training chips
    path: Path as listed in the label file
    dest_dir: Where to download the chip to
    """
    subfolder_name = path.split('/')[0]
    subfolder_path = dest_dir + subfolder_name
    if not os.path.exists(subfolder_path):
        os.mkdir(subfolder_path)
    s3_key = base_key + path
    
    s3.download_file(bucket_name, s3_key, subfolder_path)

In [None]:
s3 = boto3.client('s3')

for path in train_labels['paths']:
    download_training_chip(s3, bucket_name, base_key, path, training_dir)

for path in val_labels['paths']:
    download_training_chip(s3, bucket_name, base_key, path, training_dir)

### Load data

In [2]:
from dataloader import DataLoader



In [3]:
bands = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12] # change to whichever bands you want to use

In [4]:
training_dir = 'D:/canopy_data/s2cloudless_new_model' # change to whherever you stored your training images

gen = DataLoader(
    training_dir=training_dir,
    label_file_path_train=label_file_path_train,
    label_file_path_val=label_file_path_val,
    bands=bands
)

label_file_path_train: DRC_labels_SAB_train_sample.csv
labels_file_val: DRC_labels_SAB_val_sample.csv
No data augmentation. Please set augment to True if you want to augment training dataset
Training on 1797 images
Validation on 180 images 


### Train model

In [5]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.losses import CategoricalCrossentropy
from tensorflow_addons.metrics import F1Score

In [6]:
# Import basic model code; feel free to modify extensively
from sample_model import define_model

In [7]:
numclasses = 2
input_shape = (100, 100, int(len(bands)))

model = define_model(numclasses, input_shape)

(None, 100, 100, 12)
(None, 100, 100, 3)
(None, 2)


In [8]:
model_loss = CategoricalCrossentropy()

metrics = [
    tf.metrics.BinaryAccuracy(name='accuracy'),
    tf.keras.metrics.Precision(name='precision'),
    tf.keras.metrics.Precision(class_id=0,name='SAB_precision'),
    tf.keras.metrics.Recall(name='recall'),
    tf.keras.metrics.Recall(class_id=0,name='SAB_recall'),
    F1Score(num_classes=numclasses, name="f1_score")
]
# Can use different metrics if you want


model.compile(loss=model_loss,
              optimizer=keras.optimizers.Adam(learning_rate=0.001),
              metrics=metrics
)

In [9]:
epochs = 3 # As a starting point

history = model.fit(
    gen.training_dataset,
    validation_data=gen.validation_dataset,
    epochs=epochs
)

Epoch 1/3
Epoch 2/3
Epoch 3/3
