# Accuracy Assessment

### Importing libraries

In [1]:
import geopandas as gpd
import rasterio
import numpy as np
from rasterio.sample import sample_gen
from sklearn.metrics import accuracy_score, confusion_matrix
import pandas as pd

#### Overall classification

In [3]:
# Define the paths
classified_image_file = r'C:\Users\User\Documents\DataFusion\DataFusion\Classified\datafusion_classified10042014.tif'
validation_shapefile_path = r'C:\Users\User\OneDrive - Stellenbosch University\MAPWAPS\DataChapter1\uMzi_ROI\Train_Val\Validation_data\validation_shapefile.shp'

# Load validation shapefile as a geodataframe
validation_gdf = gpd.read_file(validation_shapefile_path)

# Initialize lists to store overall metrics
all_validation_values = []
all_classified_values = []

# Initialize lists to store detailed comparison data
comparison_data = []

# Open the classified image
with rasterio.open(classified_image_file) as classified_image:
    # Generate samples of the classified image values at validation point locations
    validation_values = []
    classified_values = []
    
    for _, row in validation_gdf.iterrows():
        coords = [(row.geometry.x, row.geometry.y)]
        sampled_values = list(sample_gen(classified_image, coords))
        if sampled_values:
            classified_value = sampled_values[0][0]
            validation_value = row['ID']  # Assuming 'ID' is the ground truth column name
            classified_values.append(classified_value)
            validation_values.append(validation_value)
            
            # Store comparison data
            comparison_data.append({'Validation Value': validation_value, 'Classified Value': classified_value})
    
    # Append to overall lists
    all_validation_values.extend(validation_values)
    all_classified_values.extend(classified_values)

# Calculate overall accuracy
accuracy = accuracy_score(all_validation_values, all_classified_values)
conf_matrix = confusion_matrix(all_validation_values, all_classified_values)

# Calculate User's and Producer's Accuracy
def calculate_accuracy(conf_matrix):
    total = np.sum(conf_matrix)
    num_classes = conf_matrix.shape[0]
    
    user_accuracy = np.diag(conf_matrix) / np.sum(conf_matrix, axis=0)
    producer_accuracy = np.diag(conf_matrix) / np.sum(conf_matrix, axis=1)
    
    return user_accuracy, producer_accuracy

user_accuracy, producer_accuracy = calculate_accuracy(conf_matrix)

# Calculate Kappa Coefficient
def calculate_kappa(conf_matrix):
    total = np.sum(conf_matrix)
    sum_po = np.sum(np.diag(conf_matrix))
    sum_pe = np.sum(np.sum(conf_matrix, axis=0) * np.sum(conf_matrix, axis=1)) / total
    kappa = (sum_po - sum_pe) / (total - sum_pe)
    return kappa

kappa = calculate_kappa(conf_matrix)

print("Overall Accuracy:", accuracy)
print("Confusion Matrix:")
print(conf_matrix)

# Print User's Accuracy
print("\nUser's Accuracy:")
for i, acc in enumerate(user_accuracy):
    print(f"Class {i + 1}: {acc:.2f}")

# Print Producer's Accuracy
print("\nProducer's Accuracy:")
for i, acc in enumerate(producer_accuracy):
    print(f"Class {i + 1}: {acc:.2f}")

# Print Kappa Coefficient
print("\nKappa Coefficient:")
print(f"{kappa:.2f}")

# Create and save comparison DataFrame
comparison_df = pd.DataFrame(comparison_data)
comparison_csv_path = r'C:\Users\SkosanaT\OneDrive - Stellenbosch University\MAPWAPS\DataChapter1\uMzi_ROI\data\SPOT_6\comparison_data.csv'
comparison_df.to_csv(comparison_csv_path, index=False)

print(f"\nComparison Table saved to: {comparison_csv_path}")


Overall Accuracy: 0.9861111111111112
Confusion Matrix:
[[9 0 0 0 0 0 0 0]
 [0 8 1 0 0 0 0 0]
 [0 0 9 0 0 0 0 0]
 [0 0 0 9 0 0 0 0]
 [0 0 0 0 9 0 0 0]
 [0 0 0 0 0 9 0 0]
 [0 0 0 0 0 0 9 0]
 [0 0 0 0 0 0 0 9]]

User's Accuracy:
Class 1: 1.00
Class 2: 1.00
Class 3: 0.90
Class 4: 1.00
Class 5: 1.00
Class 6: 1.00
Class 7: 1.00
Class 8: 1.00

Producer's Accuracy:
Class 1: 1.00
Class 2: 0.89
Class 3: 1.00
Class 4: 1.00
Class 5: 1.00
Class 6: 1.00
Class 7: 1.00
Class 8: 1.00

Kappa Coefficient:
0.98


OSError: Cannot save file into a non-existent directory: 'C:\Users\SkosanaT\OneDrive - Stellenbosch University\MAPWAPS\DataChapter1\uMzi_ROI\data\SPOT_6'

#### Inter accuracy assessment

In [4]:
# Initialize lists to store overall metrics
all_validation_values = []
all_classified_values = []

# Initialize lists to store detailed comparison data
comparison_data = []

# Define groups
def group_class(value):
    if value in [1, 2, 3]:  # Invasive alien plants
        return 'Invasive Alien'
    elif value in [4, 5, 6, 7, 8]:  # Non-invasive plants
        return 'Non-Invasive'
    else:
        return 'Unknown'


# Open the classified image
with rasterio.open(classified_image_file) as classified_image:
    # Generate samples of the classified image values at validation point locations
    validation_values = []
    classified_values = []
    
    for _, row in validation_gdf.iterrows():
        coords = [(row.geometry.x, row.geometry.y)]
        sampled_values = list(sample_gen(classified_image, coords))
        if sampled_values:
            classified_value = sampled_values[0][0]
            validation_value = row['ID']  # Assuming 'ID' is the ground truth column name
            classified_values.append(classified_value)
            validation_values.append(validation_value)
            
            # Store comparison data
            comparison_data.append({
                'Validation Value': validation_value,
                'Classified Value': classified_value,
                'Grouped Validation': group_class(validation_value),
                'Grouped Classified': group_class(classified_value)
            })
    
    # Append to overall lists
    all_validation_values.extend(validation_values)
    all_classified_values.extend(classified_values)

# Group the values
grouped_validation_values = [group_class(value) for value in all_validation_values]
grouped_classified_values = [group_class(value) for value in all_classified_values]

# Calculate overall accuracy for the grouped classes
accuracy = accuracy_score(grouped_validation_values, grouped_classified_values)
conf_matrix = confusion_matrix(grouped_validation_values, grouped_classified_values, labels=['Invasive Alien', 'Non-Invasive'])

# Calculate Kappa Coefficient
def calculate_kappa(conf_matrix):
    total = np.sum(conf_matrix)
    sum_po = np.sum(np.diag(conf_matrix))  # Observed agreement
    sum_pe = np.sum(np.sum(conf_matrix, axis=0) * np.sum(conf_matrix, axis=1)) / total  # Expected agreement
    kappa = (sum_po - sum_pe) / (total - sum_pe) if (total - sum_pe) != 0 else 0
    return kappa

kappa = calculate_kappa(conf_matrix)

print("Overall Accuracy:", accuracy)
print("Confusion Matrix:")
print(conf_matrix)

# Print Kappa Coefficient
print("\nKappa Coefficient:")
print(f"{kappa:.2f}")

# Create and print comparison DataFrame
comparison_df = pd.DataFrame(comparison_data)
print("\nComparison Table:")
print(comparison_df)


Overall Accuracy: 1.0
Confusion Matrix:
[[27  0]
 [ 0 45]]

Kappa Coefficient:
1.00

Comparison Table:
    Validation Value  Classified Value Grouped Validation Grouped Classified
0                1.0                 1     Invasive Alien     Invasive Alien
1                6.0                 6       Non-Invasive       Non-Invasive
2                6.0                 6       Non-Invasive       Non-Invasive
3                1.0                 1     Invasive Alien     Invasive Alien
4                6.0                 6       Non-Invasive       Non-Invasive
..               ...               ...                ...                ...
67               8.0                 8       Non-Invasive       Non-Invasive
68               5.0                 5       Non-Invasive       Non-Invasive
69               7.0                 7       Non-Invasive       Non-Invasive
70               8.0                 8       Non-Invasive       Non-Invasive
71               5.0                 5       Non-I

#### Intra accuracy assessment

In [5]:
# Initialize lists to store overall metrics
all_validation_values = []
all_classified_values = []

# Initialize lists to store detailed comparison data
comparison_data = []

# Define groups
def group_class(value):
    if value == 1:  # Black Wattle
        return 'Invasive Alien '  # Black Wattle
    elif value == 2:  # Gum
        return 'Invasive Alien 2'  # Gum
    elif value == 3:  # Silver Wattle
        return 'Invasive Alien 3'  # Silver Wattle
    elif value in [4, 5, 6, 7, 8]:  # Non-invasive plants
        return 'Non-Invasive'
    else:
        return 'Unknown'
    
# Open the classified image
with rasterio.open(classified_image_file) as classified_image:
    # Generate samples of the classified image values at validation point locations
    validation_values = []
    classified_values = []
    
    for _, row in validation_gdf.iterrows():
        coords = [(row.geometry.x, row.geometry.y)]
        sampled_values = list(sample_gen(classified_image, coords))
        if sampled_values:
            classified_value = sampled_values[0][0]
            validation_value = row['ID']  # Assuming 'ID' is the ground truth column name
            classified_values.append(classified_value)
            validation_values.append(validation_value)
            
            # Store comparison data
            comparison_data.append({
                'Validation Value': validation_value,
                'Classified Value': classified_value,
                'Grouped Validation': group_class(validation_value),
                'Grouped Classified': group_class(classified_value)
            })
    
    # Append to overall lists
    all_validation_values.extend(validation_values)
    all_classified_values.extend(classified_values)

# Group the values
grouped_validation_values = [group_class(value) for value in all_validation_values]
grouped_classified_values = [group_class(value) for value in all_classified_values]

# Calculate overall accuracy for the grouped classes
accuracy = accuracy_score(grouped_validation_values, grouped_classified_values)
conf_matrix = confusion_matrix(
    grouped_validation_values,
    grouped_classified_values,
    labels=[
        'Invasive Alien 1',  # Black Wattle
        'Invasive Alien 2',  # Gum
        'Invasive Alien 3',  # Silver Wattle
        'Non-Invasive'       # Classes 4-8
    ]
)

# Calculate Kappa Coefficient
def calculate_kappa(conf_matrix):
    total = np.sum(conf_matrix)
    sum_po = np.sum(np.diag(conf_matrix))  # Observed agreement
    sum_pe = np.sum(np.sum(conf_matrix, axis=0) * np.sum(conf_matrix, axis=1)) / total  # Expected agreement
    kappa = (sum_po - sum_pe) / (total - sum_pe) if (total - sum_pe) != 0 else 0
    return kappa

kappa = calculate_kappa(conf_matrix)

print("Overall Accuracy:", accuracy)
print("Confusion Matrix:")
print(conf_matrix)

# Print Kappa Coefficient
print("\nKappa Coefficient:")
print(f"{kappa:.2f}")

# Create and print comparison DataFrame
comparison_df = pd.DataFrame(comparison_data)
print("\nComparison Table:")
print(comparison_df)


Overall Accuracy: 0.9861111111111112
Confusion Matrix:
[[ 0  0  0  0]
 [ 0  8  1  0]
 [ 0  0  9  0]
 [ 0  0  0 45]]

Kappa Coefficient:
0.96

Comparison Table:
    Validation Value  Classified Value Grouped Validation Grouped Classified
0                1.0                 1    Invasive Alien     Invasive Alien 
1                6.0                 6       Non-Invasive       Non-Invasive
2                6.0                 6       Non-Invasive       Non-Invasive
3                1.0                 1    Invasive Alien     Invasive Alien 
4                6.0                 6       Non-Invasive       Non-Invasive
..               ...               ...                ...                ...
67               8.0                 8       Non-Invasive       Non-Invasive
68               5.0                 5       Non-Invasive       Non-Invasive
69               7.0                 7       Non-Invasive       Non-Invasive
70               8.0                 8       Non-Invasive       Non-In