In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
# Setup 

from Helpers import *
from DataCollection import *
import pandas as pd
from ModelBuilding import *
from tensorflow import feature_column

# If you need to run this yourself, set the environment variable or manually set your key for MLHub here
os.environ['MLHUB_API_KEY'] = os.environ['RADIANT_API_KEY']
NUM_TEST_TILES = 1

In [None]:
collection_id = 'ref_landcovernet_v1_labels'

collection = client.get_collection(collection_id)
print(f'Description: {collection["description"]}')
print(f'License: {collection["license"]}')
print(f'DOI: {collection["sci:doi"]}')
print(f'Citation: {collection["sci:citation"]}')

In [None]:
items = client.list_collection_items(collection_id, limit=1)

first_item = next(items)
all_classes = []

label_classes = first_item['properties']['label:classes']
for label_class in label_classes:
    print("Classes for {}".format(label_class["name"]))
    for c in sorted(label_class['classes']):
        print("f - {}".format(c))
        all_classes.append(c)

In [None]:
# Collect training data
trainItems = [get_items(
    collection_id,
    classes=[x],
    max_items=1
) for x in all_classes]
trainIds = []
for item in trainItems:
    for i in item:
        if i['id'] not in trainIds:
            download_labels_and_source(i, assets=['labels','B02','B03','B04','B08','source_dates','CLD','SCL'], output_dir='./data/train')
            trainIds.append(i['id'])
print("Downloaded {} items".format(len(trainIds)))

In [None]:
from Helpers import FindTilesAndChips
tcs = FindTilesAndChips('data/train')
allCounts = {key: 0 for key in ClassIntMap.values()}
for tile,chip in tcs:
    d = DateCSVParser(tile,chip,prefix='data/train')
    dates = d.GetDates()
    for ind,date in dates:
        counts = CountClassLabels(tile, chip, date, prefix='data/train')
        for label, num in counts:
            allCounts[label] += num
print(allCounts)

In [None]:
# Retrieve test items. Limit these to things NOT in the training set
testItems = get_items(
    collection_id,
    classes=None,
    max_items=None
)
testIds = []
for item in testItems:
    if item['id'] not in testIds and item['id'] not in trainIds:
        testIds.append(item['id'])
        download_labels_and_source(item, assets=['labels','B02','B03','B04','B08','source_dates','CLD','SCL'], output_dir='./data/test')
    if len(testIds) >= NUM_TEST_TILES:
        break

In [None]:
# Create the datasets for testing and training
dfc = DFCreator()
tcs = FindTilesAndChips('data/train')
trainFrame = dfc.GenDF(tcs, prefix='data/train')
testFrame = dfc.GenDF(FindTilesAndChips('data/test'), 'data/test')

trainSet = tfdf.keras.pd_dataframe_to_tf_dataset(trainFrame, label="classification")

testSet = tfdf.keras.pd_dataframe_to_tf_dataset(testFrame, label="classification")

In [None]:
# Create the model and fit to training set
model = tfdf.keras.RandomForestModel()
model.compile(metrics=['accuracy'])
model.fit(x=trainSet)

In [None]:
# Evaluate the model
evaluation = model.evaluate(testSet, return_dict=True)

for name, val in evaluation.items():
    print("{}: {}".format(name,val))