### Import functions

In [1]:
# Import required functions
import os
import rasterio
import fiona
import json
import csv
from rasterio.merge import merge
from rasterio.enums import Resampling
from rasterio.features import rasterize
from rasterio.transform import from_origin
from rasterio import Affine
from sklearn.metrics import accuracy_score, confusion_matrix
from rasterio.plot import show
from rasterio.mask import mask
from rasterio.plot import show
import matplotlib.pyplot as plt
import geopandas as gpd
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
import numpy as np
import pandas as pd

### This code loops through a folder to count the number of tiff files

In [5]:
# Define the path to the root folder
root_folder = r'D:\uMzi scene\Test'

# Initialize a counter for the number of TIFF files
tif_count = 0

# Iterate through all the directories and files in the root folder
for dirpath, dirnames, filenames in os.walk(root_folder):
    # Iterate through the files in the current directory
    for filename in filenames:
        # Check if the file has a TIFF extension
        if filename.endswith('.tif'):
            # Increment the counter if it's a TIFF file
            tif_count += 1

# Print the total count of TIFF files found
print("Total TIFF files found:", tif_count)

Total TIFF files found: 1


### This code cell calculates and adds indices to each image in the folder 

In [6]:
# Define the directory containing the images
images_dir = root_folder
print(images_dir)

# Create a folder to save the updated images if it doesn't exist
output_folder = os.path.join(images_dir, 'updated_images')
print(output_folder)
os.makedirs(output_folder, exist_ok=True)


D:\uMzi scene\Test
D:\uMzi scene\Test\updated_images


In [7]:
# Define custom band names for the added indices
band_names = ['GI', 'IRG', 'NGRDI', 'VARI', 'VDVI', 'WBI']

# Loop through each folder in the directory
for folder_name in os.listdir(images_dir):
    folder_path = os.path.join(images_dir, folder_name)
    
    # Check if the item is a directory
    if os.path.isdir(folder_path) and folder_name != 'updated_images':  # Skip processing the 'updated_images' folder
        print(f"Processing folder '{folder_name}':")
        
        # Loop through each image file in the folder
        for image_file in os.listdir(folder_path):
            # Construct full path to the image file
            image_path = os.path.join(folder_path, image_file)
            
            # Check if it's a file and if it ends with .tif or .TIF
            if os.path.isfile(image_path) and (image_file.endswith('.tif') or image_file.endswith('.TIF')):
                try:
                    # Open the image
                    with rasterio.open(image_path) as img:
                        # Read all bands
                        full_img = img.read()
                       
                        # Extract bands
                        red_band = full_img[0, :, :]
                        green_band = full_img[1, :, :]
                        blue_band = full_img[2, :, :]

                        # Calculate vegetation indices
                        # 1) Greenness Index (GI) (Green/Red)
                        GI = np.divide(green_band, red_band, out=np.zeros_like(green_band, dtype=float), where=red_band != 0)

                        # 2) IRG (Red-Green)
                        IRG = (red_band - green_band)

                        # 3) NGRDI (Green-Red)/(Green + Red)
                        NGRDI = np.divide((green_band - red_band),
                                          (green_band + red_band),
                                          where=(green_band + red_band) != 0)

                        # 4) Calculate VARI (Green-Red)/(Green+Red+Blue)
                        VARI = np.divide((green_band - red_band),
                                         (green_band + red_band + blue_band),
                                         out=np.zeros_like(green_band, dtype=float),  # Specify dtype=float
                                         where=(green_band + red_band + blue_band) != 0)

                        # 5) VDVI (2* Green-Red-Blue)/(2*Green+Red+Blue)
                        VDVI = np.divide((2 * green_band - red_band - blue_band),
                                         (2 * green_band + red_band + blue_band),
                                         out=np.zeros_like(green_band, dtype=float),  # Specify dtype=float
                                         where=(2 * green_band + red_band + blue_band) != 0)

                        # 6) WBI (Blue-Red)/(Blue+Red)
                        WBI = np.divide((blue_band - red_band),
                                        (blue_band + red_band),
                                        out=np.zeros_like(blue_band, dtype=float),  # Specify dtype=float
                                        where=(blue_band + red_band) != 0)

                        # Add the calculated indices as new bands to the image
                        indices = [GI, IRG, NGRDI, VARI, VDVI, WBI]
                        updated_img = np.concatenate((full_img, np.stack(indices)), axis=0)

                        # Update band names in metadata profile
                        profile = img.profile
                        if 'descriptions' not in profile:
                            profile['descriptions'] = [''] * img.count
                        band_names_all = profile['descriptions'] + band_names
                        profile.update(count=profile['count'] + len(indices), dtype='float16', descriptions=band_names_all)

                        # Output path for the updated image
                        output_filename = f'updated_{image_file}'
                        output_path = os.path.join(output_folder, output_filename)

                        # Write the modified image array to a new raster file
                        with rasterio.open(output_path, 'w', **profile) as dst:
                            dst.write(updated_img.astype('float16'))  # Convert to float16 before writing

                        print(f"Indices added to the image and saved as {output_path}")
                except Exception as e:
                    print(f"Error processing '{image_file}': {e}")
            else:
                print(f"Skipping '{image_file}' as it's not a valid TIFF file.")
    else:
        print(f"Skipping '{folder_name}' as it's not a directory or it's the 'updated_images' folder.")

    


Processing folder 'dd':
Skipping '3028DD_13_2020_1377_RGB_RECT.tfw' as it's not a valid TIFF file.
Error processing '3028DD_13_2020_1377_RGB_RECT.tif': Unable to allocate 31.9 GiB for an array with shape (9, 23395, 20351) and data type float64
Skipping '3028DD_13_2020_1377_RGB_RECT.tif.aux.xml' as it's not a valid TIFF file.
Skipping '3028DD_13_2020_1377_RGB_RECT_METADATA.XML' as it's not a valid TIFF file.
Skipping 'updated_images' as it's not a directory or it's the 'updated_images' folder.


### This cell prints out band numbers of each image to see if they were added as bands

In [8]:
# Define the directory containing the updated images
images_dir = r'D:\uMzi scene\Test\updated_images'

# Loop through each image in the directory
for image_file in os.listdir(images_dir):
    if image_file.endswith('.TIF') or image_file.endswith('.tif'):  # Check for both upper and lower case extensions
       
        # Open the image
        with rasterio.open(os.path.join(images_dir, image_file)) as img:
            # Print the number of bands
            print(f"Image '{image_file}' has {img.count} bands.")

### This code cell extracts pixel values from the images with the added VIs

In [7]:
# Define paths
pntsshp_path = r'D:\From_Thas pc\ValidationData\RGB\VAL.shp'

image_dir = r'D:\uMzi scene\Test\updated_images'

# Get the first TIFF file from the directory
tiff_files = [file for file in os.listdir(image_dir) if file.lower().endswith('.tif')]
if not tiff_files:
    raise FileNotFoundError("No TIFF files found in the specified directory.")
tiff_file = tiff_files[0]

# Open shapefile and extract points' coordinates and attributes
with fiona.open(pntsshp_path, 'r') as shapefile:
    points = [[(point['geometry']['coordinates'][0], point['geometry']['coordinates'][1]),
               (int(point['properties']['ID']), point['properties']['Val_id']),
               (point['properties']['X'], point['properties']['Y'])] for point in shapefile]

# Initialize dictionary to store band pixel values associated with each point
point_pixel_values = {}

# Process the TIFF file
image_path = os.path.join(image_dir, tiff_file)
with rasterio.open(image_path) as src:
    imagename = os.path.splitext(os.path.basename(image_path))[0]  # Remove the file extension
    
    # Iterate over each band in the image
    for band in range(1, src.count + 1):
        # Iterate over each point and extract pixel values
        for point in points:
            row, col = src.index(point[0][0], point[0][1])  # Find the point within the raster image
            values = src.read(band, window=((row, row + 1), (col, col + 1)))  # Read the band values

            if values.size != 0:  # Check if bands are valid/not empty
                # Prepare key for the current point
                point_key = f"Point_id: {point[1][1]}, X: {point[2][0]}, Y: {point[2][1]}, Class_ID: {point[1][0]}"
                if point_key not in point_pixel_values:
                    point_pixel_values[point_key] = {}

                # Add pixel values to the dictionary
                point_pixel_values[point_key][f'{imagename}_Band_{band}'] = values[0][0]

# Print point_pixel_values
for point_key, pixel_values in point_pixel_values.items():
    print(f"Point: {point_key}, Pixel Values: {pixel_values}")



### This cell changes point_pixel_value dictionary to a dataframe

In [8]:
# Create a list of dictionaries for DataFrame creation
data_list = []

for point_key, pixel_values in point_pixel_values.items():
    data_dict = {}
    # Extracting Point ID, X, Y, and Class ID from point_key
    point_id = point_key.split(',')[0].split(':')[1].strip()  
    x_coord = point_key.split(',')[1].split(':')[1].strip()
    y_coord = point_key.split(',')[2].split(':')[1].strip()
    class_id = point_key.split(',')[3].split(':')[1].strip()
    
    # Add extracted information as separate columns
    data_dict['Point'] = point_id
    data_dict['X'] = x_coord
    data_dict['Y'] = y_coord
    data_dict['Class_ID'] = class_id
    
    # Add band pixel values as columns
    data_dict.update(pixel_values)

    data_list.append(data_dict)

# Create DataFrame from list of dictionaries
pixelvalues_df = pd.DataFrame(data_list)

# Print the DataFrame (optional)
print(pixelvalues_df)

# Save the DataFrame to a CSV file
csv_file_path = r'D:\uMzi scene\Test\Band_values\pixel_values.csv'
pixelvalues_df.to_csv(csv_file_path, index=False)

print(f'DataFrame successfully saved to {csv_file_path}')


Empty DataFrame
Columns: []
Index: []
DataFrame successfully saved to C:\Users\SkosanaT\OneDrive - Stellenbosch University\MAPWAPS\DataChapter1\Data\Imagery\uMzimAOI\SPOT6\Band_values\pixel_values.csv


In [9]:
# Read the CSV file into a DataFrame
#pixelvalues_df = pd.read_csv(r'E:\SPOT6_7\SPOT6_7\BandValues\pixel_values.csv')

# Display the DataFrame
#print(pixelvalues_df)

### This cell block trains the random forest classifier 

In [10]:
# Define features 
features = pixelvalues_df[['updated_SPOT6_clipped_Band_1', 'updated_SPOT6_clipped_Band_2', 'updated_SPOT6_clipped_Band_3', 'updated_SPOT6_clipped_Band_4', 'updated_SPOT6_clipped_Band_5', 'updated_SPOT6_clipped_Band_6', 'updated_SPOT6_clipped_Band_7', 'updated_SPOT6_clipped_Band_8', 'updated_SPOT6_clipped_Band_9', 'updated_SPOT6_clipped_Band_10']]

# Define target (Class ID column)
target = pixelvalues_df['Class_ID']

# Convert features and target columns to arrays
features_array = features.values
target_array = target.values

# Train the classifier using RandomForest with 500 trees
classifier = RandomForestClassifier(n_estimators=500)
classifier.fit(features_array, target_array)

KeyError: "None of [Index(['updated_SPOT6_clipped_Band_1', 'updated_SPOT6_clipped_Band_2',\n       'updated_SPOT6_clipped_Band_3', 'updated_SPOT6_clipped_Band_4',\n       'updated_SPOT6_clipped_Band_5', 'updated_SPOT6_clipped_Band_6',\n       'updated_SPOT6_clipped_Band_7', 'updated_SPOT6_clipped_Band_8',\n       'updated_SPOT6_clipped_Band_9', 'updated_SPOT6_clipped_Band_10'],\n      dtype='object')] are in the [columns]"

### This cell block performs the classification on each image in the folder using the trained classifer in the previous cell

In [None]:
# Define the directory where you want to save the classified images
output_folder = r"D:\uMzi scene\Test\Classified"

# Check if the output folder exists, if not, create it
if not os.path.exists(output_folder):
    os.makedirs(output_folder)

# Define the directory containing the image
image_dir = r'D:\uMzi scene\Test\updated_images'
umzim_boundary = r'C:\Users\SkosanaT\OneDrive - Stellenbosch University\MAPWAPS\DataChapter1\uMzimScene\roi\ROI.shp'

# Find the single .tif or .TIF file in the directory
image_path = None
for filename in os.listdir(image_dir):
    if filename.lower().endswith('.tif'):
        image_path = os.path.join(image_dir, filename)
        break

if not image_path:
    raise FileNotFoundError("No .tif file found in the specified directory.")

# Read the ROI shapefile
with fiona.open(umzim_boundary, 'r') as shapefile:
    shapes = [feature["geometry"] for feature in shapefile]

# Open the image using rasterio
with rasterio.open(image_path) as src:
    # Mask the image using the ROI shapefile
    out_image, out_transform = mask(src, shapes, crop=True)
    out_meta = src.meta.copy()
    out_meta.update({
        "driver": "GTiff",
        "height": out_image.shape[1],
        "width": out_image.shape[2],
        "transform": out_transform,
        "count": src.count
    })

    # Get the shape of the masked image
    rows, cols = out_image.shape[1], out_image.shape[2]

    # Define batch size (adjust as needed based on memory constraints)
    batch_size = 1000  # You can adjust this value

    # Initialize the classification result array for the entire image
    classification_result = np.zeros((rows, cols), dtype=np.uint8)

    # Iterate over the image in batches
    for row_start in range(0, rows, batch_size):
        for col_start in range(0, cols, batch_size):
            row_end = min(row_start + batch_size, rows)
            col_end = min(col_start + batch_size, cols)

            # Read the batch of bands
            bands = [out_image[band_idx - 1, row_start:row_end, col_start:col_end] 
                     for band_idx in range(1, src.count + 1)]

            # Stack the bands into a single array
            stacked_bands = np.stack(bands, axis=-1)

            # Reshape the array to 2D (rows, columns) for classification
            reshaped_bands = stacked_bands.reshape(-1, src.count)

            # Mask out no data values (pixels with a value of 0)
            valid_pixels_mask = np.all(reshaped_bands != 0, axis=1)
            valid_pixels = reshaped_bands[valid_pixels_mask]

            if valid_pixels.size > 0:
                # Predict using the trained classifier (replace this with your classifier)
                predicted_labels = classifier.predict(valid_pixels)

                # Create a temporary result array for this batch
                batch_result = np.zeros((row_end - row_start, col_end - col_start), dtype=np.uint8)

                # Fill in the valid pixels with the predicted labels
                batch_result.reshape(-1)[valid_pixels_mask] = predicted_labels

                # Write the batch result to the corresponding window in the full classification result array
                classification_result[row_start:row_end, col_start:col_end] = batch_result

    # Construct the output file path
    output_filename = os.path.basename(image_path).replace('.tif', '_classified.tif')
    output_path = os.path.join(output_folder, output_filename)

    # Prepare metadata for the classified image
    meta = src.meta.copy()
    meta.update({
        'driver': 'GTiff',
        'dtype': 'uint8',  # Ensure data type is appropriate for classification results
        'count': 1,  # Single band
        'compress': 'lzw',  # Compression method (Lempel-Ziv-Welch)
        'nodata': 0,  # Optional: Set nodata value
        'crs': src.crs,  # Use the same CRS as the input raster
        'transform': out_transform,  # Use the transform of the masked image
    })

    # Write the full classified image to a new GeoTIFF file
    with rasterio.open(output_path, 'w', **meta) as dst:
        dst.write(classification_result.astype('uint8'), 1)

print("Classification completed and classified image saved successfully!")


### Accuracy Assessment

In [None]:
import os
import rasterio
import geopandas as gpd
import numpy as np
import pandas as pd
from rasterio.features import rasterize
from rasterio.transform import from_origin
from sklearn.metrics import accuracy_score, confusion_matrix

# Define the paths
classified_image_path = r'C:\Users\Thand\OneDrive - Stellenbosch University\MAPWAPS\DataChapter1\Results\SPOT 6_7\Classified_withIndices'
classified_imagelist = [os.path.join(classified_image_path, file) for file in os.listdir(classified_image_path) if file.endswith('.TIF') or file.endswith('.tif')]

validation_shapefile_path = r'C:\Users\Thand\OneDrive - Stellenbosch University\MAPWAPS\DataChapter1\Data\ValidationData\Val_projected.shp'
umzim_boundary = r'C:\Users\Thand\OneDrive - Stellenbosch University\MAPWAPS\Fieldwork\uMzimvubuFieldTrip\uMzimvubuBoundary.shp'

# Initialize variables to store overall metrics
all_ground_truth_labels = []
all_classified_values = []

# Loop over each classified image
for classified_image_file in classified_imagelist:
    print("Processing:", classified_image_file)
    
    # Open classified image
    with rasterio.open(classified_image_file) as classified_image:
        
        # Read classified image
        classified_image_data = classified_image.read(1)

        # Load validation shapefile as a geodataframe
        validation_gdf = gpd.read_file(validation_shapefile_path)
        
        # Filter validation points falling within the raster boundary
        raster_boundary = classified_image.bounds
        valid_points = validation_gdf.cx[raster_boundary.left:raster_boundary.right, 
                                         raster_boundary.bottom:raster_boundary.top]

        # Check if there are any valid geometries
        if not valid_points.empty and not valid_points.geometry.is_empty.all():
            # Rasterize validation points onto classified image to extract pixel values
            transform = from_origin(classified_image.bounds.left, classified_image.bounds.top,
                                    classified_image.res[0], classified_image.res[1])
            rasterized_validation = rasterize([(geom, 1) for geom in valid_points.geometry],
                                               out_shape=classified_image_data.shape,
                                               transform=transform,
                                               dtype='uint8')

            # Extract pixel values from classified image corresponding to validation points
            classified_values = classified_image_data[rasterized_validation == 1]

            # Get ground truth labels corresponding to valid points
            ground_truth_labels = valid_points['ID']

            # Append to overall lists
            all_ground_truth_labels.extend(ground_truth_labels)
            all_classified_values.extend(classified_values)
        else:
            print("No valid geometries found for rasterization within the raster boundary.")

# Calculate overall accuracy metrics
overall_accuracy = accuracy_score(all_ground_truth_labels, all_classified_values)
conf_matrix = confusion_matrix(all_ground_truth_labels, all_classified_values)

# Convert the confusion matrix to a pandas DataFrame for formatting
class_names = np.unique(np.concatenate((all_ground_truth_labels, all_classified_values)))
conf_matrix_df = pd.DataFrame(conf_matrix, index=class_names, columns=class_names)

# Add additional columns for total count and total precision for each class
conf_matrix_df['All'] = conf_matrix_df.sum(axis=1)
conf_matrix_df.loc['All'] = conf_matrix_df.sum()

# Specify the save location
#output_path = r'C:\Users\SkosanaT\OneDrive - Stellenbosch University\MAPWAPS\DataChapter1\Results\EMIT\AccuracyAss\Overallaccuracy_results.xlsx'

# Write the results to an Excel file
#with pd.ExcelWriter(output_path) as writer:
    #conf_matrix_df.to_excel(writer, sheet_name='Confusion Matrix')

# Print results
print("Overall Accuracy:", overall_accuracy)
print("Confusion Matrix:")
print(conf_matrix_df)


## Inter accuracy assessment

In [None]:
#Define paths

classified_image_path = r'C:\Users\Thand\OneDrive - Stellenbosch University\MAPWAPS\DataChapter1\Results\SPOT 6_7\Classified_withIndices'
classified_imagelist = [os.path.join(classified_image_path, file) for file in os.listdir(classified_image_path) if file.endswith('.TIF') or file.endswith('.tif')]


validation_shapefile_path = r'C:\Users\Thand\OneDrive - Stellenbosch University\MAPWAPS\DataChapter1\Data\ValidationData\Val_projected.shp'


# Initialize variables to store overall metrics
all_ground_truth_labels = []
all_classified_values = []

# Mapping function to aggregate classes
def aggregate_class(class_id):
    if 1 <= class_id <= 6:
        return 1  # Invasive alien class
    elif 7 <= class_id <= 15:
        return 2  # Non-invasive alien class
    else:
        return 0  # Undefined class (if any other IDs are present)

# Loop over each classified image
for classified_image_file in classified_imagelist:
    print("Processing:", classified_image_file)
    
    # Open classified image
    with rasterio.open(classified_image_file) as classified_image:
        
        # Read classified image
        classified_image_data = classified_image.read(1)

        # Load validation shapefile as a geodataframe
        validation_gdf = gpd.read_file(validation_shapefile_path)
        
        # Filter validation points falling within the raster boundary
        raster_boundary = classified_image.bounds
        valid_points = validation_gdf.cx[raster_boundary.left:raster_boundary.right, 
                                         raster_boundary.bottom:raster_boundary.top]

        # Rasterize validation points onto classified image to extract pixel values
        transform = from_origin(classified_image.bounds.left, classified_image.bounds.top,
                                classified_image.res[0], classified_image.res[1])
        rasterized_validation = rasterize([(geom, 1) for geom in valid_points.geometry],
                                           out_shape=classified_image_data.shape,
                                           transform=transform,
                                           dtype='uint8')

        # Extract pixel values from classified image corresponding to validation points
        classified_values = classified_image_data[rasterized_validation == 1]

        # Get ground truth labels corresponding to valid points
        ground_truth_labels = valid_points['ID']

        # Map ground truth and classified values to aggregated classes
        ground_truth_labels_aggregated = [aggregate_class(label) for label in ground_truth_labels]
        classified_values_aggregated = [aggregate_class(value) for value in classified_values]

        # Filter out undefined classes (if any)
        filtered_ground_truth = [gt for gt in ground_truth_labels_aggregated if gt != 0]
        filtered_classified = [cl for gt, cl in zip(ground_truth_labels_aggregated, classified_values_aggregated) if gt != 0]

        # Append to overall lists
        all_ground_truth_labels.extend(filtered_ground_truth)
        all_classified_values.extend(filtered_classified)

# Calculate overall accuracy metrics for aggregated classes
overall_accuracy = accuracy_score(all_ground_truth_labels, all_classified_values)
conf_matrix = confusion_matrix(all_ground_truth_labels, all_classified_values, labels=[1, 2])

# Convert the confusion matrix to a pandas DataFrame for formatting
class_names = ['Invasive Alien', 'Non-Invasive Alien']
conf_matrix_df = pd.DataFrame(conf_matrix, index=class_names, columns=class_names)

# Add additional columns for total count and total precision for each class
conf_matrix_df['All'] = conf_matrix_df.sum(axis=1)
conf_matrix_df.loc['All'] = conf_matrix_df.sum()

# Write the results to an Excel file
#output_path = r'C:\Users\SkosanaT\OneDrive - Stellenbosch University\MAPWAPS\DataChapter1\Results\EMIT\AccuracyAss\Interaccuracy_results.xlsx'

# Write the results to an Excel file
#with pd.ExcelWriter(output_path) as writer:
    #overall_accuracy_df.to_excel(writer, sheet_name='Overall Accuracy', index=False)
    #conf_matrix_df.to_excel(writer, sheet_name='Confusion Matrix')

# Print results
print("Overall Accuracy:", overall_accuracy)
print("Confusion Matrix:")
print(conf_matrix_df)


### Intra accuracy assessment

In [None]:
#Define paths
classified_image_path = r'C:\Users\Thand\OneDrive - Stellenbosch University\MAPWAPS\DataChapter1\Results\SPOT 6_7\Classified_withIndices'
classified_imagelist = [os.path.join(classified_image_path, file) for file in os.listdir(classified_image_path) if file.endswith('.TIF') or file.endswith('.tif')]

validation_shapefile_path = r'C:\Users\Thand\OneDrive - Stellenbosch University\MAPWAPS\DataChapter1\Data\ValidationData\Val_projected.shp'


# Initialize variables to store overall metrics
all_ground_truth_labels = []
all_classified_values = []

# Mapping function to aggregate classes
def aggregate_class(class_id):
    if 1 <= class_id <= 6:
        return class_id  # Keep individual classes 1 to 6
    elif 7 <= class_id <= 15:
        return 7  # Group classes 7 to 15 together as class 7
    else:
        return 0  # Undefined class (if any other IDs are present)

# Loop over each classified image
for classified_image_file in classified_imagelist:
    print("Processing:", classified_image_file)
    
    # Open classified image
    with rasterio.open(classified_image_file) as classified_image:
        
        # Read classified image
        classified_image_data = classified_image.read(1)

        # Load validation shapefile as a geodataframe
        validation_gdf = gpd.read_file(validation_shapefile_path)
        
        # Filter validation points falling within the raster boundary
        raster_boundary = classified_image.bounds
        valid_points = validation_gdf.cx[raster_boundary.left:raster_boundary.right, 
                                         raster_boundary.bottom:raster_boundary.top]

        # Rasterize validation points onto classified image to extract pixel values
        transform = from_origin(classified_image.bounds.left, classified_image.bounds.top,
                                classified_image.res[0], classified_image.res[1])
        rasterized_validation = rasterize([(geom, 1) for geom in valid_points.geometry],
                                           out_shape=classified_image_data.shape,
                                           transform=transform,
                                           dtype='uint8')

        # Extract pixel values from classified image corresponding to validation points
        classified_values = classified_image_data[rasterized_validation == 1]

        # Get ground truth labels corresponding to valid points
        ground_truth_labels = valid_points['ID']

        # Map ground truth and classified values to aggregated classes
        ground_truth_labels_aggregated = [aggregate_class(label) for label in ground_truth_labels]
        classified_values_aggregated = [aggregate_class(value) for value in classified_values]

        # Filter out undefined classes (if any)
        filtered_ground_truth = [gt for gt in ground_truth_labels_aggregated if gt != 0]
        filtered_classified = [cl for gt, cl in zip(ground_truth_labels_aggregated, classified_values_aggregated) if gt != 0]

        # Append to overall lists
        all_ground_truth_labels.extend(filtered_ground_truth)
        all_classified_values.extend(filtered_classified)

# Calculate overall accuracy metrics for aggregated classes
overall_accuracy = accuracy_score(all_ground_truth_labels, all_classified_values)
conf_matrix = confusion_matrix(all_ground_truth_labels, all_classified_values, labels=[1, 2, 3, 4, 5, 6, 7])

# Convert the confusion matrix to a pandas DataFrame for formatting
class_names = ['Alien_Black Wattle', 'Alien_Gum', 'Alien_Other', 'Alien_Pine', 'Alien Poplar', 'Alien Silver Wattle', 'Non-Invasive Alien']
conf_matrix_df = pd.DataFrame(conf_matrix, index=class_names, columns=class_names)

# Add additional columns for total count and total precision for each class
conf_matrix_df['All'] = conf_matrix_df.sum(axis=1)
conf_matrix_df.loc['All'] = conf_matrix_df.sum()

# Create a Pandas DataFrame for overall accuracy
overall_accuracy_df = pd.DataFrame([overall_accuracy], columns=['Overall Accuracy'])

# Specify the save location
#output_path = r'C:\Users\SkosanaT\OneDrive - Stellenbosch University\MAPWAPS\DataChapter1\Results\EMIT\AccuracyAss\Intraaccuracy_results.xlsx'

# Write the results to an Excel file
#with pd.ExcelWriter(output_path) as writer:
    #overall_accuracy_df.to_excel(writer, sheet_name='Overall Accuracy', index=False)
    #conf_matrix_df.to_excel(writer, sheet_name='Confusion Matrix')

# Print results
print("Overall Accuracy:", overall_accuracy)
print("Confusion Matrix:")
print(conf_matrix_df)
