# Importing libraries

In [3]:
import geopandas as gpd
import rasterio
import numpy as np
from rasterio.sample import sample_gen
from sklearn.metrics import accuracy_score, confusion_matrix
import pandas as pd

### Accuracy Assessment

In [10]:
# Define the paths
classified_image_file = r'C:\Users\SkosanaT\OneDrive - Stellenbosch University\MAPWAPS\DataChapter1\uMzi_ROI\data\SPOT_6\SPOT_Classified\SPOT_added_indices_classified.TIF'

validation_shapefile_path = r'C:\Users\SkosanaT\OneDrive - Stellenbosch University\MAPWAPS\DataChapter1\uMzi_ROI\ValidationData\SPOT\SPOT_val_projected.shp'

# Load validation shapefile as a geodataframe
validation_gdf = gpd.read_file(validation_shapefile_path)

# Initialize lists to store overall metrics
all_validation_values = []
all_classified_values = []

# Initialize lists to store detailed comparison data
comparison_data = []

# Open the classified image
with rasterio.open(classified_image_file) as classified_image:
    # Generate samples of the classified image values at validation point locations
    validation_values = []
    classified_values = []
    
    for _, row in validation_gdf.iterrows():
        coords = [(row.geometry.x, row.geometry.y)]
        sampled_values = list(sample_gen(classified_image, coords))
        if sampled_values:
            classified_value = sampled_values[0][0]
            validation_value = row['ID']  # Assuming 'ID' is the ground truth column name
            classified_values.append(classified_value)
            validation_values.append(validation_value)
            
            # Store comparison data
            comparison_data.append({'Validation Value': validation_value, 'Classified Value': classified_value})
    
    # Append to overall lists
    all_validation_values.extend(validation_values)
    all_classified_values.extend(classified_values)

# Calculate overall accuracy
accuracy = accuracy_score(all_validation_values, all_classified_values)
conf_matrix = confusion_matrix(all_validation_values, all_classified_values)

# Calculate User's and Producer's Accuracy
def calculate_accuracy(conf_matrix):
    total = np.sum(conf_matrix)
    num_classes = conf_matrix.shape[0]
    
    user_accuracy = np.diag(conf_matrix) / np.sum(conf_matrix, axis=0)
    producer_accuracy = np.diag(conf_matrix) / np.sum(conf_matrix, axis=1)
    
    return user_accuracy, producer_accuracy

user_accuracy, producer_accuracy = calculate_accuracy(conf_matrix)

# Calculate Kappa Coefficient
def calculate_kappa(conf_matrix):
    total = np.sum(conf_matrix)
    sum_po = np.sum(np.diag(conf_matrix))
    sum_pe = np.sum(np.sum(conf_matrix, axis=0) * np.sum(conf_matrix, axis=1)) / total
    kappa = (sum_po - sum_pe) / (total - sum_pe)
    return kappa

kappa = calculate_kappa(conf_matrix)

print("Overall Accuracy:", accuracy)
print("Confusion Matrix:")
print(conf_matrix)

# Print User's Accuracy
print("\nUser's Accuracy:")
for i, acc in enumerate(user_accuracy):
    print(f"Class {i + 1}: {acc:.2f}")

# Print Producer's Accuracy
print("\nProducer's Accuracy:")
for i, acc in enumerate(producer_accuracy):
    print(f"Class {i + 1}: {acc:.2f}")

# Print Kappa Coefficient
print("\nKappa Coefficient:")
print(f"{kappa:.2f}")

# Create and save comparison DataFrame
comparison_df = pd.DataFrame(comparison_data)
comparison_csv_path = r'C:\Users\SkosanaT\OneDrive - Stellenbosch University\MAPWAPS\DataChapter1\uMzi_ROI\data\SPOT_6\comparison_data.csv'
comparison_df.to_csv(comparison_csv_path, index=False)

print(f"\nComparison Table saved to: {comparison_csv_path}")


Overall Accuracy: 0.6979166666666666
Confusion Matrix:
[[ 9  1  2  0  0  0  0  0]
 [ 0 12  0  0  0  0  0  0]
 [ 1  3  6  0  0  2  0  0]
 [ 0  0  0 11  0  0  0  1]
 [ 0  0  0  0 10  0  2  0]
 [ 1  0  1  0  1  9  0  0]
 [ 0  0  0  1  3  1  6  1]
 [ 0  0  0  3  4  0  1  4]]

User's Accuracy:
Class 1: 0.82
Class 2: 0.75
Class 3: 0.67
Class 4: 0.73
Class 5: 0.56
Class 6: 0.75
Class 7: 0.67
Class 8: 0.67

Producer's Accuracy:
Class 1: 0.75
Class 2: 1.00
Class 3: 0.50
Class 4: 0.92
Class 5: 0.83
Class 6: 0.75
Class 7: 0.50
Class 8: 0.33

Kappa Coefficient:
0.65

Comparison Table saved to: C:\Users\SkosanaT\OneDrive - Stellenbosch University\MAPWAPS\DataChapter1\uMzi_ROI\data\SPOT_6\comparison_data.csv


## Inter accuracy assessment

In [8]:
# Initialize lists to store overall metrics
all_validation_values = []
all_classified_values = []

# Initialize lists to store detailed comparison data
comparison_data = []

# Define groups
def group_class(value):
    if value in [1, 2, 3]:  # Invasive alien plants
        return 'Invasive Alien'
    elif value in [4, 5, 6, 7, 8]:  # Non-invasive plants
        return 'Non-Invasive'
    else:
        return 'Unknown'


# Open the classified image
with rasterio.open(classified_image_file) as classified_image:
    # Generate samples of the classified image values at validation point locations
    validation_values = []
    classified_values = []
    
    for _, row in validation_gdf.iterrows():
        coords = [(row.geometry.x, row.geometry.y)]
        sampled_values = list(sample_gen(classified_image, coords))
        if sampled_values:
            classified_value = sampled_values[0][0]
            validation_value = row['ID']  # Assuming 'ID' is the ground truth column name
            classified_values.append(classified_value)
            validation_values.append(validation_value)
            
            # Store comparison data
            comparison_data.append({
                'Validation Value': validation_value,
                'Classified Value': classified_value,
                'Grouped Validation': group_class(validation_value),
                'Grouped Classified': group_class(classified_value)
            })
    
    # Append to overall lists
    all_validation_values.extend(validation_values)
    all_classified_values.extend(classified_values)

# Group the values
grouped_validation_values = [group_class(value) for value in all_validation_values]
grouped_classified_values = [group_class(value) for value in all_classified_values]

# Calculate overall accuracy for the grouped classes
accuracy = accuracy_score(grouped_validation_values, grouped_classified_values)
conf_matrix = confusion_matrix(grouped_validation_values, grouped_classified_values, labels=['Invasive Alien', 'Non-Invasive'])

# Calculate Kappa Coefficient
def calculate_kappa(conf_matrix):
    total = np.sum(conf_matrix)
    sum_po = np.sum(np.diag(conf_matrix))  # Observed agreement
    sum_pe = np.sum(np.sum(conf_matrix, axis=0) * np.sum(conf_matrix, axis=1)) / total  # Expected agreement
    kappa = (sum_po - sum_pe) / (total - sum_pe) if (total - sum_pe) != 0 else 0
    return kappa

kappa = calculate_kappa(conf_matrix)

print("Overall Accuracy:", accuracy)
print("Confusion Matrix:")
print(conf_matrix)

# Print Kappa Coefficient
print("\nKappa Coefficient:")
print(f"{kappa:.2f}")

# Create and print comparison DataFrame
comparison_df = pd.DataFrame(comparison_data)
print("\nComparison Table:")
print(comparison_df)


Overall Accuracy: 0.9583333333333334
Confusion Matrix:
[[34  2]
 [ 2 58]]

Kappa Coefficient:
0.91

Comparison Table:
    Validation Value  Classified Value Grouped Validation Grouped Classified
0                3.0                 3     Invasive Alien     Invasive Alien
1                4.0                 4       Non-Invasive       Non-Invasive
2                3.0                 2     Invasive Alien     Invasive Alien
3                7.0                 6       Non-Invasive       Non-Invasive
4                6.0                 6       Non-Invasive       Non-Invasive
..               ...               ...                ...                ...
91               3.0                 3     Invasive Alien     Invasive Alien
92               2.0                 2     Invasive Alien     Invasive Alien
93               3.0                 1     Invasive Alien     Invasive Alien
94               1.0                 1     Invasive Alien     Invasive Alien
95               7.0               

### Intra accuracy assessment

In [9]:
# Initialize lists to store overall metrics
all_validation_values = []
all_classified_values = []

# Initialize lists to store detailed comparison data
comparison_data = []

# Define groups
def group_class(value):
    if value in [1, 2, 3]:  # Separate invasive alien classes
        return f'Invasive Alien {value}'
    elif value in [4, 5, 6, 7, 8]:  # Non-invasive plants
        return 'Non-Invasive'
    else:
        return 'Unknown'
    
# Open the classified image
with rasterio.open(classified_image_file) as classified_image:
    # Generate samples of the classified image values at validation point locations
    validation_values = []
    classified_values = []
    
    for _, row in validation_gdf.iterrows():
        coords = [(row.geometry.x, row.geometry.y)]
        sampled_values = list(sample_gen(classified_image, coords))
        if sampled_values:
            classified_value = sampled_values[0][0]
            validation_value = row['ID']  # Assuming 'ID' is the ground truth column name
            classified_values.append(classified_value)
            validation_values.append(validation_value)
            
            # Store comparison data
            comparison_data.append({
                'Validation Value': validation_value,
                'Classified Value': classified_value,
                'Grouped Validation': group_class(validation_value),
                'Grouped Classified': group_class(classified_value)
            })
    
    # Append to overall lists
    all_validation_values.extend(validation_values)
    all_classified_values.extend(classified_values)

# Group the values
grouped_validation_values = [group_class(value) for value in all_validation_values]
grouped_classified_values = [group_class(value) for value in all_classified_values]

# Calculate overall accuracy for the grouped classes
accuracy = accuracy_score(grouped_validation_values, grouped_classified_values)
conf_matrix = confusion_matrix(grouped_validation_values, grouped_classified_values, labels=[f'Invasive Alien {i}' for i in [1, 2, 3]] + ['Non-Invasive'])

# Calculate Kappa Coefficient
def calculate_kappa(conf_matrix):
    total = np.sum(conf_matrix)
    sum_po = np.sum(np.diag(conf_matrix))  # Observed agreement
    sum_pe = np.sum(np.sum(conf_matrix, axis=0) * np.sum(conf_matrix, axis=1)) / total  # Expected agreement
    kappa = (sum_po - sum_pe) / (total - sum_pe) if (total - sum_pe) != 0 else 0
    return kappa

kappa = calculate_kappa(conf_matrix)

print("Overall Accuracy:", accuracy)
print("Confusion Matrix:")
print(conf_matrix)

# Print Kappa Coefficient
print("\nKappa Coefficient:")
print(f"{kappa:.2f}")

# Create and print comparison DataFrame
comparison_df = pd.DataFrame(comparison_data)
print("\nComparison Table:")
print(comparison_df)

Overall Accuracy: 0.6041666666666666
Confusion Matrix:
[[ 0  0  0  0]
 [ 0  0  0  0]
 [ 0  0  0  0]
 [ 1  0  1 58]]

Kappa Coefficient:
0.00

Comparison Table:
    Validation Value  Classified Value  Grouped Validation Grouped Classified
0                3.0                 3  Invasive Alien 3.0   Invasive Alien 3
1                4.0                 4        Non-Invasive       Non-Invasive
2                3.0                 2  Invasive Alien 3.0   Invasive Alien 2
3                7.0                 6        Non-Invasive       Non-Invasive
4                6.0                 6        Non-Invasive       Non-Invasive
..               ...               ...                 ...                ...
91               3.0                 3  Invasive Alien 3.0   Invasive Alien 3
92               2.0                 2  Invasive Alien 2.0   Invasive Alien 2
93               3.0                 1  Invasive Alien 3.0   Invasive Alien 1
94               1.0                 1  Invasive Alien 1.0  