# Test ee.Classifiers

In [33]:
import ee
import pandas as pd
from shapely import wkt
from tqdm.auto import tqdm
import numpy as np
import geedim as gd
import geopandas as gpd
import os
import geemap

# Initialize Earth Engine
ee.Initialize()

# Load the CSV file using pandas
csv_path = '/Users/raineyaberle/Research/PhD/snow_cover_mapping/snow-cover-mapping/inputs-outputs/Sentinel-2_SR_training_data.csv'
df = pd.read_csv(csv_path)
df['geometry'] = df['geometry'].apply(wkt.loads)
df = df.dropna().reset_index(drop=True)
df

ModuleNotFoundError: No module named 'geemap'

In [2]:
# fc = ee.FeatureCollection(df.apply(lambda row: ee.Feature(ee.Geometry.Point([, ]), row.to_dict()), axis=1))
# Create a list of ee.Feature objects from the DataFrame
features = []
for index, row in df.iterrows():
    dict = dict = row.to_dict()
    if 'geometry' in dict.keys(): 
        del dict['geometry']
    point = ee.Geometry.Point(row['geometry'].coords.xy[0][0], row['geometry'].coords.xy[1][0])
    feature = ee.Feature(point, dict)
    features.append(feature)

# Create an ee.FeatureCollection from the list of features
fc = ee.FeatureCollection(features)

# Define bands to use as input features
bands = ['B1', 'B2', 'B3', 'B4', 'B5', 'B6', 'B7', 'B8', 'B8A', 'B9', 'B11', 'B12', 'NDSI']  

In [3]:
# Define classifiers to test
classifiers = [
    ee.Classifier.smileRandomForest(10).train(features=fc, classProperty='Class', inputProperties=bands),
    ee.Classifier.libsvm().train(features=fc, classProperty='Class', inputProperties=bands),
    ee.Classifier.minimumDistance().train(features=fc, classProperty='Class', inputProperties=bands),
    ee.Classifier.smileCart().train(features=fc, classProperty='Class', inputProperties=bands),
    ee.Classifier.smileNaiveBayes().train(features=fc, classProperty='Class', inputProperties=bands),

]

classifier_names = [
    'Random Forest',
    'Support Vector Machine', 
    'Minimum Distance',
    'CART',
    'NaiveBayes'
]

In [4]:
# -----Assess classifier accuracy using K-folds cross-validation
num_folds = 10

# mean_accuracy = ee.List([])
for name, classifier in list(zip(classifier_names, classifiers)):

    print(name)
    
    # Perform K-fold cross-validation
    accuracy_list = ee.List([])
    for fold in tqdm(range(num_folds)):
        
        # Split the data into training and testing datasets
        testing_data = fc.filter(ee.Filter.eq('fold', fold))
        training_data = fc.filter(ee.Filter.neq('fold', fold))
    
        # Train the classifier using the training data
        trained_classifier = classifier.train(training_data, 'Class', bands)
    
        # Assess the accuracy of the classifier using the testing data
        test_accuracy = testing_data.classify(trained_classifier).errorMatrix('Class', 'classification')
        accuracy = ee.Number(test_accuracy.accuracy())
        
        # Append the accuracy to the accuracy list
        accuracy_list = accuracy_list.add(accuracy)

    # Calculate the mean accuracy using a custom reduction function
    print(accuracy_list.getInfo())

    # Print the mean accuracy
    # print("Mean Accuracy:", mean_accuracy.getInfo())
    print(' ')

# -----Identify best classifier
# ibest = np.argwhere(mean_accuracy==np.nanmin(mean_accuracy))
# best_classifier = classifiers[ibest]
# best_classifier_name = classifier_names[ibest]
# print('Best classifier: ', best_classifier_name)

Random Forest


  0%|          | 0/10 [00:00<?, ?it/s]

[0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
 
Support Vector Machine


  0%|          | 0/10 [00:00<?, ?it/s]

KeyboardInterrupt: 

In [6]:
# -----Assess classifier accuracy by setting aside 20% of the data
split = 0.8
for name, classifier in list(zip(classifier_names, classifiers)):

    print(name)
        
    training_data = fc.randomColumn('split').filter(ee.Filter.lt('split', split))
    testing_data = fc.randomColumn('split').filter(ee.Filter.gte('split', split))
    
    # Train the classifier using the training data
    trained_classifier = classifier.train(training_data, 'Class', bands)
    
    # Assess the accuracy of the classifier using the testing data
    test_accuracy = testing_data.classify(trained_classifier).errorMatrix('Class', 'classification')
    
    # Print the overall accuracy
    print("Overall Accuracy:", test_accuracy.accuracy().getInfo())
    print(' ')

Random Forest
Overall Accuracy: 0.9931300987548304
 
Support Vector Machine
Overall Accuracy: 0.9729497638471447
 
Minimum Distance
Overall Accuracy: 0.9132674967797338
 
CART
Overall Accuracy: 0.9948475740661228
 
NaiveBayes
Overall Accuracy: 0.2121082009446114
 


In [11]:
# -----Retrain best classifier with full dataset
classifier_trained = ee.Classifier.smileRandomForest(10).train(features=fc, classProperty='Class', inputProperties=bands)

In [35]:
def query_gee_for_imagery(aoi_utm, dataset, date_start, date_end, cloud_cover_max, mask_clouds):
    
    # -----Reformat AOI for image filtering
    # reproject CRS from AOI to WGS
    aoi_wgs = aoi_utm.to_crs('EPSG:4326')
    # prepare AOI for querying geedim (AOI bounding box)
    region = {'type': 'Polygon',
              'coordinates': [[[aoi_wgs.geometry.bounds.minx[0], aoi_wgs.geometry.bounds.miny[0]],
                               [aoi_wgs.geometry.bounds.maxx[0], aoi_wgs.geometry.bounds.miny[0]],
                               [aoi_wgs.geometry.bounds.maxx[0], aoi_wgs.geometry.bounds.maxy[0]],
                               [aoi_wgs.geometry.bounds.minx[0], aoi_wgs.geometry.bounds.maxy[0]],
                               [aoi_wgs.geometry.bounds.minx[0], aoi_wgs.geometry.bounds.miny[0]]
                               ]]}

    # -----Query GEE for imagery
    print('Querying GEE for ' + dataset + ' imagery...')
    if dataset == 'Landsat':
        # Landsat 8
        im_col_gd_8 = gd.MaskedCollection.from_name('LANDSAT/LC08/C02/T1_L2').search(start_date=date_start,
                                                                                     end_date=date_end,
                                                                                     region=region,
                                                                                     cloudless_portion=100 - cloud_cover_max,
                                                                                     mask=mask_clouds,
                                                                                     fill_portion=70)
        # Landsat 9
        im_col_gd_9 = gd.MaskedCollection.from_name('LANDSAT/LC09/C02/T1_L2').search(start_date=date_start,
                                                                                     end_date=date_end,
                                                                                     region=region,
                                                                                     cloudless_portion=100 - cloud_cover_max,
                                                                                     mask=mask_clouds,
                                                                                     fill_portion=70)
    elif dataset == 'Sentinel-2_TOA':
        im_col_gd = gd.MaskedCollection.from_name('COPERNICUS/S2_HARMONIZED').search(start_date=date_start,
                                                                                     end_date=date_end,
                                                                                     region=region,
                                                                                     cloudless_portion=100 - cloud_cover_max,
                                                                                     mask=mask_clouds,
                                                                                     fill_portion=70)
    elif dataset == 'Sentinel-2_SR':
        im_col_gd = gd.MaskedCollection.from_name('COPERNICUS/S2_SR_HARMONIZED').search(start_date=date_start,
                                                                                        end_date=date_end,
                                                                                        region=region,
                                                                                        cloudless_portion=100 - cloud_cover_max,
                                                                                        mask=mask_clouds,
                                                                                        fill_portion=70)
    else:
        print("'dataset' variable not recognized. Please set to 'Landsat', 'Sentinel-2_TOA', or 'Sentinel-2_SR'. "
              "Exiting...")
        return 'N/A'

    im_col_ee = im_col_gd.ee_collection

    # Add NDSI band
    def add_ndsi_band(image):
        ndsi = image.expression(
            '(green - swir) / (green + swir)', 
            {
                'green': image.select('B3'),  
                'swir': image.select('B11')   
            })
        return image.addBands(ndsi.rename('NDSI'))
    im_col_ee = im_col_ee.map(add_ndsi_band)

    return im_col_ee, region

In [50]:
# -----Query GEE for Sentinel-2 SR images over Wolverine
# load AOI
AOI_path = ('/Users/raineyaberle/Google Drive/My Drive/Research/CryoGARS-Glaciology/Advising/student-research/Alexandra-Friel/snow_cover_mapping_application/study-sites/Wolverine/AOIs/')
AOI_fn = 'Wolverine_RGI_outline.shp'
AOI = gpd.read_file(os.path.join(AOI_path, AOI_fn))
# query gee for imagery
date_start = '2021-09-01'
date_end = '2021-10-01'
dataset = "Sentinel-2_SR"
cloud_cover_max = 100
mask_clouds = True
image_collection, region = query_gee_for_imagery(AOI, dataset, date_start, date_end, cloud_cover_max, mask_clouds)
print('Images found = ', image_collection.size().getInfo())

# -----Classify images
def classify_image(image):
    # Classify the image using the trained classifier
    classified = image.classify(classifier_trained)
    # Return the image with classification results as a band
    return image.addBands(classified.rename('classification'))

# Apply the classifier to the entire image collection
print('Classifying images...')
classified_collection = image_collection.map(classify_image)
print('Done')
# Print the classified image collection
# print("Classified Image Collection:", classified_collection.getInfo())

Querying GEE for Sentinel-2_SR imagery...
Images found =  6
Classifying images...
Done


In [52]:
from IPython.display import Image

# Create a URL to the styled image for a region around France.
url = classified_collection.first().select('classification').getThumbUrl({
    'min': 1, 
    'max': 5, 
    'region': region,
    # 'palette': ['cyan', 'cyan', 'blue', 'orange', 'gray']  
})

# Display the thumbnail land surface temperature in France.
print('\nPlease wait while the thumbnail loads, it may take a moment...')
Image(url=url, width=300, height=500)


Please wait while the thumbnail loads, it may take a moment...
