In [1]:
import pandas as pd
import geopandas as gpd
from shapely.geometry import Point
import rasterio as rio
from shapely.geometry import Polygon
from mpl_toolkits.basemap import Basemap  
import matplotlib.pyplot as plt
from shapely.ops import unary_union
import os
import glob
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)
import geopandas as gpd
import pandas as pd
import glob

def calculate_IoU(both, total1, total2):
    return both / (total1 + total2 - both)

all_DEMs =  gpd.read_file('Plotting\\labels\\all_DEM_labels.geojson')
all_OP =  gpd.read_file('Plotting\\labels\\all_OP_labels.geojson')
common_DEMs = gpd.read_file('Plotting\\labels\\common_DEM_labels.geojson')
common_OT = gpd.read_file('Plotting\\labels\\all_OT_labels.geojson')

_________________
F1: DEM and OT in overlapping Area

In [2]:
import pandas as pd
import geopandas as gpd
import glob

# Define a function to calculate the F1 score
def calculate_F1(num_both, num_total_ot, num_total_dem):
    if num_total_ot == 0 or num_total_dem == 0:
        return 0  # Avoid division by zero
    precision = num_both / num_total_ot
    recall = num_both / num_total_dem
    if precision + recall == 0:
        return 0  # Avoid division by zero
    return 2 * precision * recall / (precision + recall)

regions = ['herschel', 'peel', 'gydan', 'kolguev']

# Create empty lists to store intermediate results for overall F1 calculation
overall_num_both_OT_DEM = []
overall_num_total_OT = []
overall_num_total_DEM = []

# Create an empty DataFrame to store the results
results_df = pd.DataFrame(columns=['Test Site', 'F1 Score', 'Only DEM', 'Only OT', 'Both'])

for region in regions:
    DEMs = common_DEMs
    OT = common_OT

    # Read the footprints and add them together into one gdf
    footprint_files = glob.glob(f'Results\\{region}\\Footprints\\footprint_{region}_*.geojson')
    region_footprints = gpd.GeoDataFrame()

    for file in footprint_files:
        fp = gpd.read_file(file)
        region_footprints = pd.concat([region_footprints, fp], ignore_index=True)

    study_area = gpd.read_file(f'Results\\{region}\\footprints\\{region}_area.geojson')
    common_labelling_area = gpd.overlay(region_footprints, study_area, how='intersection')

    DEM_common_lab = gpd.sjoin(DEMs, common_labelling_area, how="inner", predicate='intersects')
    OT_common_lab = gpd.sjoin(OT, common_labelling_area, how="inner", predicate='intersects')

    DEM_common_lab = DEM_common_lab.dissolve().explode(index_parts=True)
    OT_common_lab = OT_common_lab.dissolve().explode(index_parts=True)

    OT_common_lab.drop(columns=['index_right'], inplace=True)
    DEM_common_lab.drop(columns=['index_right'], inplace=True)

    both_OT_DEM = gpd.sjoin(OT_common_lab, DEM_common_lab, how="inner", predicate='intersects')
    both_OT_DEM.drop_duplicates(subset=['geometry'], inplace=True)

    num_both_OT_DEM = len(both_OT_DEM)
    num_DEM_only = len(DEM_common_lab) - num_both_OT_DEM
    num_OT_only = len(OT_common_lab) - num_both_OT_DEM
    F1 = calculate_F1(num_both_OT_DEM, len(OT_common_lab), len(DEM_common_lab))

    # Append results to the DataFrame
    results_df = pd.concat([results_df, pd.DataFrame([{
        'Test Site': region,
        'F1 Score': F1,
        'Only DEM': num_DEM_only,
        'Only OT': num_OT_only,
        'Both': num_both_OT_DEM
    }])], ignore_index=True)

    overall_num_both_OT_DEM.append(num_both_OT_DEM)
    overall_num_total_OT.append(len(OT_common_lab))
    overall_num_total_DEM.append(len(DEM_common_lab))

# Calculate overall F1 Score
overall_F1 = calculate_F1(sum(overall_num_both_OT_DEM), sum(overall_num_total_OT), sum(overall_num_total_DEM))

# Append overall F1 to the DataFrame
overall_row = pd.DataFrame([{
    'Test Site': 'Overall',
    'F1 Score': overall_F1,
    'Only DEM': results_df['Only DEM'].sum(),
    'Only OT': results_df['Only OT'].sum(),
    'Both': results_df['Both'].sum()
}], index=[len(results_df)])
results_df = pd.concat([results_df, overall_row])

results_df.set_index('Test Site', inplace=True)

# Display the DataFrame
print('Different RTS labeled by OT and DEM in Overlapping Areas:')
print(results_df)


Different RTS labeled by OT and DEM in Overlapping Areas:
           F1 Score Only DEM Only OT Both
Test Site                                
herschel   0.285714       17      33   10
peel       0.700000       16      14   35
gydan      0.260870       20      14    6
kolguev    0.541667       18       4   13
Overall    0.484848       71      65   64


_____________
F1: DEM and OP in study area

In [3]:
import pandas as pd
import geopandas as gpd

# Define a function to calculate the F1 score
def calculate_F1(num_both, num_total_op, num_total_dem):
    if num_total_op == 0 or num_total_dem == 0:
        return 0  # Avoid division by zero if there are no labels
    precision = num_both / num_total_op
    recall = num_both / num_total_dem
    if precision + recall == 0:
        return 0  # Avoid division by zero in F1 calculation
    return 2 * precision * recall / (precision + recall)

regions = ['herschel', 'peel', 'gydan', 'kolguev']

# Create empty lists to store intermediate results for overall F1 calculation
overall_num_both_OP_DEM = []
overall_num_total_OP = []
overall_num_total_DEM = []

# Create an empty DataFrame to store the results
results_df = pd.DataFrame(columns=['Test Site', 'F1 Score', 'Only DEM', 'Only OP', 'Both'])

for region in regions:
    DEMs = all_DEMs  # Assuming all_DEMs and all_OP are predefined
    OP = all_OP

    # Find common labeling area where footprints and study areas overlap
    study_area = gpd.read_file(f'Results\\{region}\\footprints\\{region}_area.geojson')
    
    DEM_common_lab = gpd.sjoin(DEMs, study_area, how="inner", predicate='intersects')
    OP_common_lab = gpd.sjoin(OP, study_area, how="inner", predicate='intersects')

    # Dissolve and explode to simplify and separate geometries
    DEM_common_lab = DEM_common_lab.dissolve().explode(index_parts=True)
    OP_common_lab = OP_common_lab.dissolve().explode(index_parts=True)

    # Drop the 'index_right' column
    OP_common_lab.drop(columns=['index_right'], inplace=True)
    DEM_common_lab.drop(columns=['index_right'], inplace=True)

    both_OP_DEM = gpd.sjoin(OP_common_lab, DEM_common_lab, how="inner", predicate='intersects')
    both_OP_DEM.drop_duplicates(subset=['geometry'], inplace=True)

    num_both_OP_DEM = len(both_OP_DEM)
    num_DEM_only = len(DEM_common_lab) - num_both_OP_DEM
    num_OP_only = len(OP_common_lab) - num_both_OP_DEM
    F1 = calculate_F1(num_both_OP_DEM, len(OP_common_lab), len(DEM_common_lab))

    # Append results to the DataFrame
    results_df = pd.concat([results_df, pd.DataFrame([{
        'Test Site': region,
        'F1 Score': F1,
        'Only DEM': num_DEM_only,
        'Only OP': num_OP_only,
        'Both': num_both_OP_DEM
    }])], ignore_index=True)

    overall_num_both_OP_DEM.append(num_both_OP_DEM)
    overall_num_total_OP.append(len(OP_common_lab))
    overall_num_total_DEM.append(len(DEM_common_lab))

# Calculate overall F1 Score
overall_F1 = calculate_F1(sum(overall_num_both_OP_DEM), sum(overall_num_total_OP), sum(overall_num_total_DEM))

# Calculate 'Only DEM', 'Only OP', and 'Both' for the overall row
overall_only_DEM = results_df['Only DEM'].sum()
overall_only_OP = results_df['Only OP'].sum()
overall_both = results_df['Both'].sum()

# Append overall F1 Score to the DataFrame
overall_row = pd.DataFrame([{
    'Test Site': 'Overall',
    'F1 Score': overall_F1,
    'Only DEM': overall_only_DEM,
    'Only OP': overall_only_OP,
    'Both': overall_both
}], index=[len(results_df)])
results_df = pd.concat([results_df, overall_row])

# Set the index of the DataFrame to be 'Test Site'
results_df.set_index('Test Site', inplace=True)

# Display the DataFrame
print(results_df)


           F1 Score Only DEM Only OP Both
Test Site                                
herschel   0.444444        9      76   34
peel       0.720812       37      73  142
gydan      0.432432       21      63   32
kolguev    0.474227       29      22   23
Overall    0.583333       96     234  231
