# Accuracy Assessment

### Import packages

In [2]:
import geopandas as gpd
import rasterio
import os
import numpy as np
from rasterio.sample import sample_gen
from sklearn.metrics import accuracy_score, confusion_matrix
import pandas as pd

#### Overall accuracy

In [3]:
# Define the paths
classified_image_path = r'Z:\Data\CLASSIFIED'
classified_imagelist = [os.path.join(classified_image_path, file) for file in os.listdir(classified_image_path) if file.endswith('.TIF') or file.endswith('.tif')]

validation_shapefile_path = r'C:\Users\User\OneDrive - Stellenbosch University\MAPWAPS\DataChapter1\uMzi_ROI\Train_Val\Validation_data\validation_shapefile.shp'

# Load validation shapefile as a geodataframe
validation_gdf = gpd.read_file(validation_shapefile_path)

# Initialize lists to store overall metrics
all_validation_values = []
all_classified_values = []

# Initialize lists to store detailed comparison data
comparison_data = []

# Loop through each classified image
for classified_image_file in classified_imagelist:
    print("Processing:", classified_image_file)
    
    # Open the classified image
    with rasterio.open(classified_image_file) as classified_image:
        # Generate samples of the classified image values at validation point locations
        validation_values = []
        classified_values = []
        
        for _, row in validation_gdf.iterrows():
            coords = [(row.geometry.x, row.geometry.y)]
            sampled_values = list(sample_gen(classified_image, coords))
            
            if sampled_values:
                classified_value = sampled_values[0][0]
                
                # Ensure classified values are between 1 and 8, ignoring 0 or 255 (which may be NoData)
                if 1 <= classified_value <= 8:
                    validation_value = row['ID']  # Assuming 'ID' is the ground truth column name
                    classified_values.append(classified_value)
                    validation_values.append(validation_value)
                    
                    # Store comparison data
                    comparison_data.append({'Validation Value': validation_value, 'Classified Value': classified_value})
                else:
                    # Handle NoData or invalid classified values
                    print(f"Skipping invalid value {classified_value} at coordinates {coords}")

        # Append to overall lists
        all_validation_values.extend(validation_values)
        all_classified_values.extend(classified_values)

# Calculate overall accuracy
accuracy = accuracy_score(all_validation_values, all_classified_values)
conf_matrix = confusion_matrix(all_validation_values, all_classified_values)

# Calculate User's and Producer's Accuracy
def calculate_accuracy(conf_matrix):
    total = np.sum(conf_matrix)
    user_accuracy = np.diag(conf_matrix) / np.sum(conf_matrix, axis=0)
    producer_accuracy = np.diag(conf_matrix) / np.sum(conf_matrix, axis=1)
    
    return user_accuracy, producer_accuracy

user_accuracy, producer_accuracy = calculate_accuracy(conf_matrix)

# Calculate Kappa Coefficient
def calculate_kappa(conf_matrix):
    total = np.sum(conf_matrix)
    sum_po = np.sum(np.diag(conf_matrix))
    sum_pe = np.sum(np.sum(conf_matrix, axis=0) * np.sum(conf_matrix, axis=1)) / total
    kappa = (sum_po - sum_pe) / (total - sum_pe)
    return kappa

kappa = calculate_kappa(conf_matrix)

print("Overall Accuracy:", accuracy)
print("Confusion Matrix:")
print(conf_matrix)

# Print User's Accuracy
print("\nUser's Accuracy:")
for i, acc in enumerate(user_accuracy):
    print(f"Class {i + 1}: {acc:.2f}")

# Print Producer's Accuracy
print("\nProducer's Accuracy:")
for i, acc in enumerate(producer_accuracy):
    print(f"Class {i + 1}: {acc:.2f}")

# Print Kappa Coefficient
print("\nKappa Coefficient:")
print(f"{kappa:.2f}")

# Create and print comparison DataFrame
comparison_df = pd.DataFrame(comparison_data)
print("\nComparison Table:")
print(comparison_df)


Processing: Z:\Data\CLASSIFIED\with_indices_3028DB_16_2020_1377_RGB_RECT_projected_classified.tif
Skipping invalid value 255 at coordinates [(689963.5349000003, 6570128.567)]
Skipping invalid value 255 at coordinates [(690554.9042999996, 6594323.7953)]
Skipping invalid value 255 at coordinates [(701864.3657, 6588811.1657)]
Skipping invalid value 255 at coordinates [(689763.1364000002, 6570196.1107)]
Skipping invalid value 255 at coordinates [(690387.2967999997, 6594332.7356)]
Skipping invalid value 255 at coordinates [(690942.4511000002, 6599787.0709)]
Skipping invalid value 255 at coordinates [(690883.9029000001, 6580352.031)]
Skipping invalid value 255 at coordinates [(679980.5745999999, 6602618.4019)]
Skipping invalid value 255 at coordinates [(708731.5776000004, 6592517.9952)]
Skipping invalid value 255 at coordinates [(698056.7525000004, 6591147.1521)]
Skipping invalid value 255 at coordinates [(698960.8870000001, 6568254.0181)]
Skipping invalid value 255 at coordinates [(708500.1

#### Inter accuracy assessment

In [7]:
# Initialize lists to store overall metrics
all_validation_values = []
all_classified_values = []

# Initialize lists to store detailed comparison data
comparison_data = []

# Define groups
def group_class(value):
    if value in [1, 2, 3]:  # Invasive alien plants
        return 'Invasive Alien'
    elif value in [4, 5, 6, 7, 8]:  # Non-invasive plants
        return 'Non-Invasive'
    else:
        return 'Unknown'

# Loop through each classified image
for classified_image_file in classified_imagelist:
    print("Processing:", classified_image_file)
    
    # Open the classified image
    with rasterio.open(classified_image_file) as classified_image:
        # Generate samples of the classified image values at validation point locations
        validation_values = []
        classified_values = []
        
        for _, row in validation_gdf.iterrows():
            coords = [(row.geometry.x, row.geometry.y)]
            sampled_values = list(sample_gen(classified_image, coords))
            
            if sampled_values:
                classified_value = sampled_values[0][0]
                
                # Ensure classified values are between 1 and 8, ignoring 0 or 255 (NoData or out of range values)
                if 1 <= classified_value <= 8:
                    validation_value = row['ID']  # Assuming 'ID' is the ground truth column name
                    classified_values.append(classified_value)
                    validation_values.append(validation_value)
                    
                    # Store comparison data
                    comparison_data.append({
                        'Validation Value': validation_value,
                        'Classified Value': classified_value,
                        'Grouped Validation': group_class(validation_value),
                        'Grouped Classified': group_class(classified_value)
                    })
        
        # Append to overall lists
        all_validation_values.extend(validation_values)
        all_classified_values.extend(classified_values)

# Group the values
grouped_validation_values = [group_class(value) for value in all_validation_values]
grouped_classified_values = [group_class(value) for value in all_classified_values]

# Calculate overall accuracy for the grouped classes
accuracy = accuracy_score(grouped_validation_values, grouped_classified_values)
conf_matrix = confusion_matrix(grouped_validation_values, grouped_classified_values, labels=['Invasive Alien', 'Non-Invasive'])

# Calculate Kappa Coefficient
def calculate_kappa(conf_matrix):
    total = np.sum(conf_matrix)
    sum_po = np.sum(np.diag(conf_matrix))  # Observed agreement
    sum_pe = np.sum(np.sum(conf_matrix, axis=0) * np.sum(conf_matrix, axis=1)) / total  # Expected agreement
    kappa = (sum_po - sum_pe) / (total - sum_pe) if (total - sum_pe) != 0 else 0
    return kappa

kappa = calculate_kappa(conf_matrix)

# Print results
print("Overall Accuracy:", accuracy)
print("Confusion Matrix:")
print(conf_matrix)

# Print Kappa Coefficient
print("\nKappa Coefficient:")
print(f"{kappa:.2f}")

# Create and print comparison DataFrame
comparison_df = pd.DataFrame(comparison_data)
print("\nComparison Table:")
print(comparison_df)


Processing: Z:\Data\CLASSIFIED\with_indices_3028DB_16_2020_1377_RGB_RECT_projected_classified.tif
Processing: Z:\Data\CLASSIFIED\with_indices_3028DB_17_2020_1377_RGB_RECT_projected_classified.tif
Processing: Z:\Data\CLASSIFIED\with_indices_3028DB_18_2020_1377_RGB_RECT_projected_classified.tif
Processing: Z:\Data\CLASSIFIED\with_indices_3028DB_19_2020_1377_RGB_RECT_projected_classified.tif
Processing: Z:\Data\CLASSIFIED\with_indices_3028DB_20_2020_1377_RGB_RECT_projected_classified.tif
Processing: Z:\Data\CLASSIFIED\with_indices_3028DB_21_2020_1377_RGB_RECT_projected_classified.tif
Processing: Z:\Data\CLASSIFIED\with_indices_3028DB_22_2020_1377_RGB_RECT_projected_classified.tif
Processing: Z:\Data\CLASSIFIED\with_indices_3028DB_23_2020_1377_RGB_RECT_projected_classified.tif
Processing: Z:\Data\CLASSIFIED\with_indices_3028DB_24_2020_1377_RGB_RECT_projected_classified.tif
Processing: Z:\Data\CLASSIFIED\with_indices_3028DB_25_2020_1377_RGB_RECT_projected_classified.tif
Processing: Z:\Data\

#### Intra accuracy assessment

In [4]:
# Define the paths
classified_image_path = r'Z:\Data\CLASSIFIED'
classified_imagelist = [os.path.join(classified_image_path, file) for file in os.listdir(classified_image_path) if file.endswith('.TIF') or file.endswith('.tif')]

# Initialize lists to store overall metrics
all_validation_values = []
all_classified_values = []

# Initialize lists to store detailed comparison data
comparison_data = []

# Define groups
def group_class(value):
    if value == 1:  # Black Wattle
        return 'Invasive Alien 1'  # Black Wattle
    elif value == 2:  # Gum
        return 'Invasive Alien 2'  # Gum
    elif value == 3:  # Silver Wattle
        return 'Invasive Alien 3'  # Silver Wattle
    elif value in [4, 5, 6, 7, 8]:  # Non-invasive plants
        return 'Non-Invasive'
    else:
        return 'Unknown'

# Loop through each TIFF file in the specified directory
for classified_image_file in classified_imagelist:
    print(f'Processing file: {classified_image_file}')
    
    # Open the classified image
    with rasterio.open(classified_image_file) as classified_image:
        # Generate samples of the classified image values at validation point locations
        validation_values = []
        classified_values = []
        
        for _, row in validation_gdf.iterrows():
            coords = [(row.geometry.x, row.geometry.y)]
            sampled_values = list(sample_gen(classified_image, coords))
            if sampled_values:
                classified_value = sampled_values[0][0]
                
                # Ensure classified values are between 1 and 8, ignoring 0 or 255 (which may be NoData)
                if classified_value in [1, 2, 3, 4, 5, 6, 7, 8]:
                    validation_value = row['ID']  # Assuming 'ID' is the ground truth column name
                    classified_values.append(classified_value)
                    validation_values.append(validation_value)
                    
                    # Store comparison data
                    comparison_data.append({
                        'Validation Value': validation_value,
                        'Classified Value': classified_value,
                        'Grouped Validation': group_class(validation_value),
                        'Grouped Classified': group_class(classified_value)
                    })
        
        # Append to overall lists
        all_validation_values.extend(validation_values)
        all_classified_values.extend(classified_values)

# Group the values
grouped_validation_values = [group_class(value) for value in all_validation_values]
grouped_classified_values = [group_class(value) for value in all_classified_values]

# Calculate overall accuracy for the grouped classes
accuracy = accuracy_score(grouped_validation_values, grouped_classified_values)
conf_matrix = confusion_matrix(
    grouped_validation_values,
    grouped_classified_values,
    labels=[
        'Invasive Alien 1',  # Black Wattle
        'Invasive Alien 2',  # Gum
        'Invasive Alien 3',  # Silver Wattle
        'Non-Invasive'       # Classes 4-8
    ]
)

# Calculate Kappa Coefficient
def calculate_kappa(conf_matrix):
    total = np.sum(conf_matrix)
    sum_po = np.sum(np.diag(conf_matrix))  # Observed agreement
    sum_pe = np.sum(np.sum(conf_matrix, axis=0) * np.sum(conf_matrix, axis=1)) / total  # Expected agreement
    kappa = (sum_po - sum_pe) / (total - sum_pe) if (total - sum_pe) != 0 else 0
    return kappa

kappa = calculate_kappa(conf_matrix)

print("Overall Accuracy:", accuracy)
print("Confusion Matrix:")
print(conf_matrix)

# Print Kappa Coefficient
print("\nKappa Coefficient:")
print(f"{kappa:.2f}")

# Create and print comparison DataFrame
comparison_df = pd.DataFrame(comparison_data)
print("\nComparison Table:")
print(comparison_df)


Processing file: Z:\Data\CLASSIFIED\with_indices_3028DB_16_2020_1377_RGB_RECT_projected_classified.tif
Processing file: Z:\Data\CLASSIFIED\with_indices_3028DB_17_2020_1377_RGB_RECT_projected_classified.tif
Processing file: Z:\Data\CLASSIFIED\with_indices_3028DB_18_2020_1377_RGB_RECT_projected_classified.tif
Processing file: Z:\Data\CLASSIFIED\with_indices_3028DB_19_2020_1377_RGB_RECT_projected_classified.tif
Processing file: Z:\Data\CLASSIFIED\with_indices_3028DB_20_2020_1377_RGB_RECT_projected_classified.tif
Processing file: Z:\Data\CLASSIFIED\with_indices_3028DB_21_2020_1377_RGB_RECT_projected_classified.tif
Processing file: Z:\Data\CLASSIFIED\with_indices_3028DB_22_2020_1377_RGB_RECT_projected_classified.tif
Processing file: Z:\Data\CLASSIFIED\with_indices_3028DB_23_2020_1377_RGB_RECT_projected_classified.tif
Processing file: Z:\Data\CLASSIFIED\with_indices_3028DB_24_2020_1377_RGB_RECT_projected_classified.tif
Processing file: Z:\Data\CLASSIFIED\with_indices_3028DB_25_2020_1377_RGB_