# Covermap Comparison

**Author:** Adam Yang (ayang115@umd.edu)

**Description:** Compares datasets against test sets

**Crop Maps Used:** Harvest Togo & Kenya, Copernicus Land Cover, ESA CCI Land Cover Africa, GLAD Global Cropland Extent

In [1]:
import pandas as pd
import numpy as np
import geopandas as gpd
import os
import geemap
from pathlib import Path
from sklearn.metrics import classification_report 
from shapely.geometry import box

In [2]:
import ee

ee.Authenticate()

ee.Initialize()


Successfully saved authorization token.


## **Section 0** - Setup

**Functions**

In [3]:
# Remaps classes to crop/noncrop 
def map_values(val, value_for_crop):
    if val == value_for_crop:
        return 1
    else:
        return 0

In [4]:
# Function used in map function to extract from feature collection
def raster_extraction(image, resolution, f_collection):
    feature = image.sampleRegions(
        collection = f_collection,
        scale = resolution
    )
    return feature

In [5]:
# Convert sklearn classification report dict to 
def report_to_row(dataset, report, df):
    new_report = pd.DataFrame(data = {
        "dataset": dataset, 
        "accuracy": report["accuracy"], 
        "crop_f1": report["1"]["f1-score"], 
        "crop_support": report["1"]["support"], 
        "noncrop_support": report["0"]["support"], 
        "crop_precision": report["1"]["precision"], 
        "crop_recall": report["1"]["recall"], 
        "noncrop_precision": report["0"]["precision"], 
        "noncrop_recall": report["0"]["recall"]
        }, index=[0])
    
    return pd.concat([df, new_report])

In [6]:
# Creates ee.Feature from longitude and latitude coordinates from a dataframe
def create_point(row):
    geom = ee.Geometry.Point(row["lon"], row["lat"])
    prop = dict(row)

    return ee.Feature(geom, prop)

**Retrieve Test Data**

In [7]:
TEST_COUNTRIES = ["Kenya", "Togo", "Tanzania_CEO_2019"]
DATA_PATH = "../data/datasets/"

In [8]:
datasets_path = Path(DATA_PATH).glob("*")

target_paths = [p for p in datasets_path if p.stem in TEST_COUNTRIES]

In [9]:
test_data = pd.DataFrame(columns=["lat", "lon", "test_class", "ee_pts", "country"])

test_set = []
for p in target_paths:
    # Set dict key name
    key = p.stem

    # Read in data and extract test values and points 
    df = pd.read_csv(p)
    df = df.loc[df["subset"] == "testing"]
    df = df[["lat", "lon", "class_probability"]]

    # Create earth engine geometry points
    df["ee_pts"] = df.apply(create_point, axis=1)

    # Recast points as 1 or 0 (threshold = 0.5)
    df["test_class"] = df["class_probability"].apply(lambda x: 1 if x > 0.5 else 0)

    df["country"] = key

    test_set.append(df)

test_data = pd.concat(test_set)

In [10]:
test_data.reset_index(inplace=True)
test_data.drop("index", axis=1, inplace=True)

In [11]:
test_data.head()

Unnamed: 0,lat,lon,class_probability,ee_pts,test_class,country
0,0.725122,34.386282,0.0,"ee.Feature({\n ""functionInvocationValue"": {\n...",0,Kenya
1,0.621939,34.466496,0.0,"ee.Feature({\n ""functionInvocationValue"": {\n...",0,Kenya
2,0.459661,34.090158,0.0,"ee.Feature({\n ""functionInvocationValue"": {\n...",0,Kenya
3,0.226497,34.054859,0.0,"ee.Feature({\n ""functionInvocationValue"": {\n...",0,Kenya
4,0.096834,33.951959,0.0,"ee.Feature({\n ""functionInvocationValue"": {\n...",0,Kenya


**Create dataframe for results**

In [12]:
results = {}

for p in target_paths:
    key = p.stem
    results[key] = pd.DataFrame(columns=["dataset", "accuracy", "crop_f1", "crop_support", "noncrop_support", 
        "crop_precision", "crop_recall", "noncrop_precision", "noncrop_recall"])


In [13]:
test_coll = ee.FeatureCollection(test_data["ee_pts"].tolist())

## **Section 1** - Harvest Data

**Harvest Togo**

In [14]:
harvest_togo = ee.Image("projects/sat-io/open-datasets/nasa-harvest/togo_cropland_binary").sampleRegions(collection=test_coll)
h_togo_sampled = geemap.ee_to_gdf(harvest_togo)

In [15]:
test_data["harvest_togo"] = pd.merge(test_data, h_togo_sampled, on=["lat", "lon"], how="left")["b1"]

**Harvest Kenya**

In [16]:
harvest_kenya = ee.Image("projects/sat-io/open-datasets/nasa-harvest/kenya_cropland_binary").sampleRegions(collection=test_coll)
h_kenya_sampled = geemap.ee_to_gdf(harvest_kenya)

In [17]:
test_data["harvest_kenya"] = pd.merge(test_data, h_kenya_sampled, on=["lat", "lon"], how="left")["b1"]

**Harvest Tanzania**

In [18]:
harvest_tanzania = ee.Image("users/adadebay/Tanzania_cropland_2019").sampleRegions(collection=test_coll)
h_tanzania_sampled = geemap.ee_to_gdf(harvest_tanzania)
h_tanzania_sampled["b1"] = h_tanzania_sampled["b1"].apply(lambda x: 1 if x>0.5 else 0) 

In [19]:
test_data["harvest_tanzania"] = pd.merge(test_data, h_tanzania_sampled, on=["lat", "lon"], how="left")["b1"]

***TESTING ZONE***

In [20]:
#Country bboxes (xmin, xmax, ymin, ymax)

country_bbox = {
    "Kenya": (33.8935689697, 41.8550830926, -4.67677, 5.506),
    "Togo": (5.926547, 11.1395102, -0.1439746,	1.8087605),
    "Tanzania": (29.3399975929, -11.7209380022, 40.31659, -0.95),
}

In [21]:
tan = h_tanzania_sampled.reset_index()
ken = h_kenya_sampled.reset_index()

In [22]:
ken = h_kenya_sampled.clip((-5.083, 24.283, 4.884, 41.878))

  np.nanmin(b[:, 0]),  # minx
  np.nanmin(b[:, 1]),  # miny
  np.nanmax(b[:, 2]),  # maxx
  np.nanmax(b[:, 3]),  # maxy


In [23]:
ken

Unnamed: 0,geometry,b1,class_probability,lat,lon


## **Section 2** - Copernicus, ESA, GLAD

In [24]:
# Create earth engine points for section 2
test_coll = ee.FeatureCollection(test_data["ee_pts"].tolist())

**Copernicus Land Cover** ([Earth Engine](https://developers.google.com/earth-engine/datasets/catalog/COPERNICUS_Landcover_100m_Proba-V-C3_Global#description))

In [25]:
# Load copernicus data
copernicus = ee.ImageCollection("COPERNICUS/Landcover/100m/Proba-V-C3/Global")
cop_results = copernicus.select("discrete_classification").filterDate("2019-01-01", "2020-01-01").map(lambda x: raster_extraction(x, 100, test_coll)).flatten()

In [26]:
cop_sampled = geemap.ee_to_gdf(cop_results)
cop_sampled["cop_class"] = cop_sampled["discrete_classification"].apply(lambda x: map_values(x, 40))
cop_sampled.head()

Unnamed: 0,geometry,class_probability,discrete_classification,lat,lon,cop_class
0,,0.0,126,0.725122,34.386282,0
1,,0.0,122,0.621939,34.466496,0
2,,0.0,50,0.459661,34.090158,0
3,,0.0,40,0.226497,34.054859,1
4,,0.0,20,0.096834,33.951959,0


In [27]:
test_data["cop"] = pd.merge(test_data, cop_sampled, on=["lat", "lon"], how="left")["cop_class"]

**ESA World Cover** ([Earth Engine](https://developers.google.com/earth-engine/datasets/catalog/ESA_WorldCover_v100)) 

In [28]:
esa = ee.ImageCollection("ESA/WorldCover/v100")
esa_results = esa.filterBounds(test_coll).map(lambda x: raster_extraction(x, 10, test_coll)).flatten()

In [29]:
esa_sampled = geemap.ee_to_gdf(esa_results)
esa_sampled["esa_class"] = esa_sampled["Map"].apply(lambda x: map_values(x, 40))
esa_sampled.head()

Unnamed: 0,geometry,Map,class_probability,lat,lon,esa_class
0,,20,0.0,0.725122,34.386282,0
1,,20,0.0,0.621939,34.466496,0
2,,50,0.0,0.459661,34.090158,0
3,,30,0.0,0.226497,34.054859,0
4,,30,0.0,0.096834,33.951959,0


In [30]:
test_data["esa"] = pd.merge(test_data, esa_sampled, on=["lat", "lon"], how="left")["esa_class"]

**GLAD Global** ([Earth Engine](https://glad.earthengine.app/view/global-cropland-dynamics))

In [31]:
glad = ee.ImageCollection("users/potapovpeter/Global_cropland_2019")
glad_results = glad.filterBounds(test_coll).map(lambda x: raster_extraction(x, 30, test_coll)).flatten()

In [32]:
glad_sampled = geemap.ee_to_gdf(glad_results)
glad_sampled.head()

Unnamed: 0,geometry,b1,class_probability,lat,lon
0,,1,0.0,0.725122,34.386282
1,,0,0.0,0.621939,34.466496
2,,0,0.0,0.459661,34.090158
3,,0,0.0,0.226497,34.054859
4,,0,0.0,0.096834,33.951959


In [33]:
test_data['glad'] = pd.merge(test_data, glad_sampled, on=["lat", "lon"], how="left")["b1"]

In [34]:
test_data

Unnamed: 0,lat,lon,class_probability,ee_pts,test_class,country,harvest_togo,harvest_kenya,harvest_tanzania,cop,esa,glad
0,0.725122,34.386282,0.00,"ee.Feature({\n ""functionInvocationValue"": {\n...",0,Kenya,,1.0,,0,0.0,1
1,0.621939,34.466496,0.00,"ee.Feature({\n ""functionInvocationValue"": {\n...",0,Kenya,,1.0,,0,0.0,0
2,0.459661,34.090158,0.00,"ee.Feature({\n ""functionInvocationValue"": {\n...",0,Kenya,,,,0,0.0,0
3,0.226497,34.054859,0.00,"ee.Feature({\n ""functionInvocationValue"": {\n...",0,Kenya,,0.0,,1,0.0,0
4,0.096834,33.951959,0.00,"ee.Feature({\n ""functionInvocationValue"": {\n...",0,Kenya,,0.0,,0,0.0,0
...,...,...,...,...,...,...,...,...,...,...,...,...
3889,6.472810,1.502436,0.75,"ee.Feature({\n ""functionInvocationValue"": {\n...",1,Togo,1.0,,,1,0.0,0
3890,11.037647,0.048311,1.00,"ee.Feature({\n ""functionInvocationValue"": {\n...",1,Togo,1.0,,,1,1.0,1
3891,9.011538,1.493877,0.00,"ee.Feature({\n ""functionInvocationValue"": {\n...",0,Togo,0.0,,,1,0.0,0
3892,9.138645,1.391047,0.00,"ee.Feature({\n ""functionInvocationValue"": {\n...",0,Togo,0.0,,,0,0.0,0


## **Evaluation Results**

**Compute results**

In [35]:
for country, df in test_data.groupby("country"):
    for dataset in ["cop", "esa", "glad", "harvest_togo", "harvest_kenya", "harvest_tanzania"]:
        # If country is non-empty
        if not pd.isnull(df[dataset]).all() or not np.isnan(np.unique(df[dataset])[1]):
            print(country + ": " + dataset)
            # Remove na values
            temp = df[["test_class", dataset]].dropna()
            if len(temp) > 10:
                report = classification_report(temp["test_class"], temp[dataset], output_dict= True)     

            results[country] = report_to_row(dataset, report, results[country])

Kenya: cop
Kenya: esa
Kenya: glad
Kenya: harvest_kenya
Kenya: harvest_tanzania
Tanzania_CEO_2019: cop
Tanzania_CEO_2019: esa
Tanzania_CEO_2019: glad
Tanzania_CEO_2019: harvest_kenya
Tanzania_CEO_2019: harvest_tanzania
Togo: cop
Togo: esa
Togo: glad
Togo: harvest_togo


**Kenya**

In [36]:
results["Kenya"]

Unnamed: 0,dataset,accuracy,crop_f1,crop_support,noncrop_support,crop_precision,crop_recall,noncrop_precision,noncrop_recall
0,cop,0.913148,0.937824,571,258,0.925043,0.950963,0.884298,0.829457
0,esa,0.44994,0.34104,571,258,0.975207,0.206655,0.360169,0.988372
0,glad,0.849216,0.885636,571,258,0.927203,0.847636,0.716612,0.852713
0,harvest_kenya,0.952722,0.968054,520,178,0.974659,0.961538,0.891892,0.926966
0,harvest_tanzania,0.952722,0.968054,520,178,0.974659,0.961538,0.891892,0.926966


**Togo**

In [37]:
results["Togo"]

Unnamed: 0,dataset,accuracy,crop_f1,crop_support,noncrop_support,crop_precision,crop_recall,noncrop_precision,noncrop_recall
0,cop,0.732394,0.541063,107,248,0.56,0.523364,0.8,0.822581
0,esa,0.794366,0.613757,107,248,0.707317,0.542056,0.820513,0.903226
0,glad,0.783099,0.596859,107,248,0.678571,0.53271,0.815498,0.891129
0,harvest_togo,0.746479,0.656489,107,248,0.554839,0.803738,0.895,0.721774


**Tanzania**

In [38]:
results["Tanzania_CEO_2019"]

Unnamed: 0,dataset,accuracy,crop_f1,crop_support,noncrop_support,crop_precision,crop_recall,noncrop_precision,noncrop_recall
0,cop,0.811439,0.630513,629,2081,0.578249,0.693164,0.901329,0.847189
0,esa,0.84003,0.66561,629,2009,0.663507,0.667727,0.895761,0.893977
0,glad,0.8369,0.6464,629,2081,0.650564,0.642289,0.892293,0.895723
0,harvest_kenya,0.791277,0.562092,104,217,0.877551,0.413462,0.775735,0.97235
0,harvest_tanzania,0.784224,0.643703,431,1103,0.600402,0.693735,0.872587,0.819583
