## Import packages

In [1]:
# Import packages
import argparse
import os
#import geoviews
import warnings
import rasterio as rio
import rasterio
import pandas as pd
import fiona
import matplotlib.pyplot as plt
import numpy.ma as ma
import numpy as np
import xarray as xr
import rioxarray as rxr
from shapely.geometry import mapping, box
import geopandas as gpd
import earthpy as et
import earthpy.spatial as es
import earthpy.plot as ep
from sklearn.metrics import accuracy_score, confusion_matrix
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from rasterio.features import rasterize
from rasterio.transform import from_origin
from rasterio.mask import mask

warnings.simplefilter('ignore')

In [2]:
import math
from osgeo import gdal
import hvplot.xarray
import hvplot.pandas
import holoviews as hv
import rasterio as rio
#from modules.emit_tools import emit_xarray

### This code loops through a folder to count the number of tiff files

In [7]:
# Define the directory containing the image
images_dir = r'E:\uMzi scene\data\EMIT\CLIPPED'
print(images_dir)

# Create a folder to save the updated images if it doesn't exist
output_folder = os.path.join(images_dir, 'updated_images')
print(output_folder)
os.makedirs(output_folder, exist_ok=True)

E:\uMzi scene\data\EMIT\CLIPPED
E:\uMzi scene\data\EMIT\CLIPPED\updated_images


### Code to calculate number of bands

In [10]:
# Define the directory containing the updated image
images_dir = r'E:\uMzi scene\data\EMIT\CLIPPED'

# Loop through each image in the directory
for image_file in os.listdir(images_dir):
    if image_file.endswith('.TIF') or image_file.endswith('.tif'):  # Check for both upper and lower case extensions
       
        # Open the image
        with rasterio.open(os.path.join(images_dir, image_file)) as img:
            # Print the number of bands
            print(f"Image '{image_file}' has {img.count} bands.")

Image 'emit.tif' has 286 bands.


### This code cell calculates and adds indices to each image in the folder

In [8]:
# Define custom band names for the added indices
band_names = ['GI', 'IRG', 'NGRDI', 'VARI', 'VDVI', 'WBI']

# Loop through each image file in the directory
for image_file in os.listdir(images_dir):
    # Construct full path to the image file
    image_path = os.path.join(images_dir, image_file)
    
    # Check if it's a file and if it ends with .tif or .TIF
    if os.path.isfile(image_path) and (image_file.endswith('.tif') or image_file.endswith('.TIF')):
        try:
            # Open the image
            with rasterio.open(image_path) as img:
                # Read all bands
                full_img = img.read()
               
                # Extract bands
                red_band = full_img[0, :, :]
                green_band = full_img[1, :, :]
                blue_band = full_img[2, :, :]

                # Calculate vegetation indices
                # 1) Greenness Index (GI) (Green/Red)
                GI = np.divide(green_band, red_band, out=np.zeros_like(green_band, dtype=float), where=red_band != 0)

                # 2) IRG (Red-Green)
                IRG = (red_band - green_band)

                # 3) NGRDI (Green-Red)/(Green + Red)
                NGRDI = np.divide((green_band - red_band),
                                  (green_band + red_band),
                                  where=(green_band + red_band) != 0)

                # 4) Calculate VARI (Green-Red)/(Green+Red+Blue)
                VARI = np.divide((green_band - red_band),
                                 (green_band + red_band + blue_band),
                                 out=np.zeros_like(green_band, dtype=float),  # Specify dtype=float
                                 where=(green_band + red_band + blue_band) != 0)

                # 5) VDVI (2* Green-Red-Blue)/(2*Green+Red+Blue)
                VDVI = np.divide((2 * green_band - red_band - blue_band),
                                 (2 * green_band + red_band + blue_band),
                                 out=np.zeros_like(green_band, dtype=float),  # Specify dtype=float
                                 where=(2 * green_band + red_band + blue_band) != 0)

                # 6) WBI (Blue-Red)/(Blue+Red)
                WBI = np.divide((blue_band - red_band),
                                (blue_band + red_band),
                                out=np.zeros_like(blue_band, dtype=float),  # Specify dtype=float
                                where=(blue_band + red_band) != 0)

                # Add the calculated indices as new bands to the image
                indices = [GI, IRG, NGRDI, VARI, VDVI, WBI]
                updated_img = np.concatenate((full_img, np.stack(indices)), axis=0)

                # Update band names in metadata profile
                profile = img.profile
                if 'descriptions' not in profile:
                    profile['descriptions'] = [''] * img.count
                band_names_all = profile['descriptions'] + band_names
                profile.update(count=profile['count'] + len(indices), dtype='float32', descriptions=band_names_all)

                # Output path for the updated image
                output_filename = f'updated_{image_file}'
                output_path = os.path.join(output_folder, output_filename)

                # Write the modified image array to a new raster file
                with rasterio.open(output_path, 'w', **profile) as dst:
                    dst.write(updated_img.astype('float32'))  # Convert to float32 before writing

                print(f"Indices added to the image and saved as {output_path}")
        except Exception as e:
            print(f"Error processing '{image_file}': {e}")
    else:
        print(f"Skipping '{image_file}' as it's not a valid TIFF file.")

Indices added to the image and saved as E:\uMzi scene\data\EMIT\CLIPPED\updated_images\updated_emit.tif
Skipping 'mosaic.vrt' as it's not a valid TIFF file.
Skipping 'tif_list.txt' as it's not a valid TIFF file.
Skipping 'updated_images' as it's not a valid TIFF file.


### This cell prints out band numbers of each image to see if they were added as bands

In [9]:
# Define the directory containing the updated image
images_dir = r'E:\uMzi scene\data\EMIT\CLIPPED\updated_images'

# Loop through each image in the directory
for image_file in os.listdir(images_dir):
    if image_file.endswith('.TIF') or image_file.endswith('.tif'):  # Check for both upper and lower case extensions
       
        # Open the image
        with rasterio.open(os.path.join(images_dir, image_file)) as img:
            # Print the number of bands
            print(f"Image '{image_file}' has {img.count} bands.")

Image 'updated_emit.tif' has 292 bands.


### This code cell extracts pixel values from the images with the added VIs

In [11]:
# Define paths
pntsshp_path = r'E:\uMzi scene\Training_Data\EMIT\Training_Projected.shp'
imagesdir = r'E:\uMzi scene\data\EMIT\CLIPPED\updated_images'

# Get file path list of all TIFF images in dir
imagelist = [os.path.join(imagesdir, file) for file in os.listdir(imagesdir) if file.endswith('.tif')]
#print(imagelist)

# Initialize dictionaries
imgvals = {}

# Open shapefile and extract points' coordinates [0]= x coordinate, [1]= y coordinate  and attributes
with fiona.open(pntsshp_path, 'r') as shapefile:
    points = [[(point['geometry']['coordinates'][0], point['geometry']['coordinates'][1]),
               (int(point['properties']['ID']), point['properties']['Val_id']),
               (point['properties']['X'], point['properties']['Y'])] for point in shapefile]
    #for point in shapefile:
        #print(point['properties']['ID'])

        #print(point['properties']['ID'])

# Initialize dictionary to store band pixel values associated with each point
point_pixel_values = {}

# Iterate over each raster image
for imagepath in imagelist:
    with rasterio.open(imagepath) as src:
        
        # Initialize dictionaries for current image
        imagename = os.path.splitext(os.path.basename(imagepath))[0] #splittext removes the extension from files.
        
        # Iterate over each band
        for band in range(1, src.count + 1):
            
            # Initialize dictionary for current band
            band_pixel_values = {}

            # Iterate over each point and extract pixel values
            for point in points:
                row, col = src.index(point[0][0], point[0][1]) # Find the point within the raster image
                values = src.read(band, window=((row, row+1), (col, col+1))) # Read the band values 
               

                if values.size != 0: # Check if bands are valid/not empty
                    # Populate band_pixel_values dictionary
                    band_pixel_values[f'Band_{band}'] = values[0][0]
                    
                    # Add band_pixel_values to point_pixel_values for the current point
                    point_key = str("Point_id: {}, X: {}, Y: {}, Class_ID: {}".format(point[1][1], point[2][0], point[2][1], point[1][0] ))
                    if point_key not in point_pixel_values:
                        point_pixel_values[point_key] = {}
                    point_pixel_values[point_key].update(band_pixel_values)
                   
                         
# Print point_pixel_values
for point_key, pixel_values in point_pixel_values.items():
    print(f"Point: {point_key}, Pixel Values: {pixel_values}, Image name: {imagename}")


Point: Point_id: VAL_133, X: 705035.035, Y: 6576834.7805, Class_ID: 6, Pixel Values: {'Band_1': 0.020563921, 'Band_2': 0.021649232, 'Band_3': 0.022738708, 'Band_4': 0.023824712, 'Band_5': 0.025081547, 'Band_6': 0.026577502, 'Band_7': 0.028180273, 'Band_8': 0.02984218, 'Band_9': 0.03135361, 'Band_10': 0.03286194, 'Band_11': 0.034292076, 'Band_12': 0.035576165, 'Band_13': 0.036712516, 'Band_14': 0.037750874, 'Band_15': 0.03821367, 'Band_16': 0.03900308, 'Band_17': 0.04084325, 'Band_18': 0.043579623, 'Band_19': 0.046134464, 'Band_20': 0.04986006, 'Band_21': 0.053639393, 'Band_22': 0.057100605, 'Band_23': 0.059300844, 'Band_24': 0.0613831, 'Band_25': 0.062453497, 'Band_26': 0.06252918, 'Band_27': 0.06106781, 'Band_28': 0.06007484, 'Band_29': 0.059974775, 'Band_30': 0.061060883, 'Band_31': 0.061480664, 'Band_32': 0.06095874, 'Band_33': 0.060574386, 'Band_34': 0.060203962, 'Band_35': 0.06042952, 'Band_36': 0.06086645, 'Band_37': 0.05982674, 'Band_38': 0.059398968, 'Band_39': 0.05805427, 'Ban

### This cell changes point_pixel_value dictionary to a dataframe

In [12]:
# Create a list of dictionaries for DataFrame creation
data_list = []

for point_key, pixel_values in point_pixel_values.items():
    data_dict = {}
    # Extracting Point ID, X, Y, and Class ID from point_key
    point_id = point_key.split(',')[0].split(':')[1].strip()  
    x_coord = point_key.split(',')[1].split(':')[1].strip()
    y_coord = point_key.split(',')[2].split(':')[1].strip()
    class_id = point_key.split(',')[3].split(':')[1].strip()
    
    # Add extracted information as separate columns
    data_dict['Point'] = point_id
    data_dict['X'] = x_coord
    data_dict['Y'] = y_coord
    data_dict['Class_ID'] = class_id
    
    # Add band pixel values as columns
    data_dict.update(pixel_values)

    data_list.append(data_dict)

# Create DataFrame from list of dictionaries
pixelvalues_df = pd.DataFrame(data_list)

# Print the DataFrame
print(pixelvalues_df)

# Save the DataFrame to a CSV file
# Create the output folder path
output_path = os.path.join(images_dir, 'band_values')
print(output_path)

# Create the directory if it doesn't exist
os.makedirs(output_path, exist_ok=True)

# Define the CSV file path
csv_file_path = os.path.join(output_path, 'pixel_values.csv')

# Save the DataFrame to a CSV file
pixelvalues_df.to_csv(csv_file_path, index=False)

print(f'DataFrame successfully saved to {csv_file_path}')

       Point            X             Y Class_ID    Band_1    Band_2  \
0    VAL_133   705035.035  6576834.7805        6  0.020564  0.021649   
1    VAL_135   705014.845  6576884.9473        6  0.018863  0.019833   
2    VAL_136  705064.6222   6576855.231        6  0.020564  0.021649   
3    VAL_138  705214.9763  6576974.9364        6  0.020836  0.022315   
4    VAL_140  705374.8567  6576985.2816        6  0.019567  0.020573   
..       ...          ...           ...      ...       ...       ...   
235     None  688485.4775  6592415.2771        5  0.023086  0.024453   
236     None   688388.573  6592367.4476        5  0.024178  0.025717   
237     None  688227.1345  6591957.9707        5  0.016499  0.020532   
238     None  696116.3704  6598463.6223        5  0.017682  0.019098   
239     None  701723.4202  6579427.2763        5  0.019617  0.020679   

       Band_3    Band_4    Band_5    Band_6  ...  Band_283  Band_284  \
0    0.022739  0.023825  0.025082  0.026578  ...  0.064041  0.0

### This cell block trains the random forest classifier 

In [13]:
# Define features 
# Find the index of the last non-band column
last_non_band_index = pixelvalues_df.columns.tolist().index('Class_ID')  # Replace 'Last_Non_Band_Column' with the name of your last non-band column

# Select columns starting from the column following the last non-band column
features = pixelvalues_df.iloc[:, last_non_band_index + 1:]
#print(features)

# Define target (Class ID column)
target = pixelvalues_df['Class_ID']

# Convert features and target columns to arrays
features_array = features.values
target_array = target.values

# Train the classifier using RandomForest with 500 trees
classifier = RandomForestClassifier(n_estimators=500)
classifier.fit(features_array, target_array)

### This cell block performs the classification on each image in the folder using the trained classifer in the previous cell

In [14]:
# Define the directory where you want to save the classified images
output_folder = r'E:\uMzi scene\data\EMIT\Classified'

# Check if the output folder exists, if not, create it
if not os.path.exists(output_folder):
    os.makedirs(output_folder)

# Loop through each image in the collection
for image_path in imagelist:
    
    # Open the image using rasterio
    with rasterio.open(image_path) as src:
        
        # Read the bands in the images
        bands = [src.read(band_idx) for band_idx in range(1, src.count + 1)]  # Read all bands
        
        # Stack the bands into a single array
        stacked_bands = np.stack(bands, axis=-1)
        
        # Reshape the array to 2D (rows, columns) for classification
        rows, cols, num_bands = stacked_bands.shape
        reshaped_bands = stacked_bands.reshape(rows * cols, num_bands)
        
        # Mask out no data values (pixels with NaN values)
        valid_pixels_mask = np.all(~np.isnan(reshaped_bands), axis=1)
        valid_pixels = reshaped_bands[valid_pixels_mask]
        
        # Predict using the trained classifier
        predicted_labels = classifier.predict(valid_pixels)
        
        # Create an array to store the classification results
        classification_result = np.full(rows * cols, fill_value=np.nan, dtype=np.float32)
        
        # Fill in the valid pixels with the predicted labels
        classification_result[valid_pixels_mask] = predicted_labels
        
        # Reshape the classified result back to 2D
        classification_result_2d = classification_result.reshape(rows, cols)
        
        # Construct the output file path
        output_filename = os.path.basename(image_path).replace('.tif', '_classified.tif')
        output_path = os.path.join(output_folder, output_filename)
        
        # Prepare metadata for the classified image
        meta = src.meta.copy()
        meta.update({
            'driver': 'GTiff',
            'dtype': 'float32',  # Use float32 to accommodate NaN values
            'count': 1,  # Single band
            'compress': 'lzw',  # Compression method (Lempel-Ziv-Welch)
            'nodata': np.nan,  # Set nodata value to NaN
            'crs': src.crs,  # Use the same CRS as the input raster
            'transform': src.transform,  # Use the same transform as the input raster
        })
        
        # Write the classified image to a new GeoTIFF file
        with rasterio.open(output_path, 'w', **meta) as dst:
            dst.write(classification_result_2d, 1)

print("Classification completed and classified images saved successfully!")



Classification completed and classified images saved successfully!


### Accuracy Assessment

In [None]:
# Define the paths
classified_image_path = r'F:\Classified'
classified_imagelist = [os.path.join(classified_image_path, file) for file in os.listdir(classified_image_path) if file.endswith('.TIF') or file.endswith('.tif')]


validation_shapefile_path = r'C:\Users\SkosanaT\OneDrive - Stellenbosch University\MAPWAPS\DataChapter1\Data\ValidationData\ValSet_EMIT_PROJECT\ValSet_Projected.shp'

# Initialize variables to store overall metrics
all_ground_truth_labels = []
all_classified_values = []

# Loop over each classified image
for classified_image_file in classified_imagelist:
    print("Processing:", classified_image_file)
    
    # Open classified image
    with rasterio.open(classified_image_file) as classified_image:
        
        # Read classified image
        classified_image_data = classified_image.read(1)

        # Load validation shapefile as a geodataframe
        validation_gdf = gpd.read_file(validation_shapefile_path)
        
        # Filter validation points falling within the raster boundary
        raster_boundary = classified_image.bounds
        valid_points = validation_gdf.cx[raster_boundary.left:raster_boundary.right, 
                                         raster_boundary.bottom:raster_boundary.top]

        # Rasterize validation points onto classified image to extract pixel values
        transform = from_origin(classified_image.bounds.left, classified_image.bounds.top,
                                classified_image.res[0], classified_image.res[1])
        rasterized_validation = rasterize([(geom, 1) for geom in valid_points.geometry],
                                           out_shape=classified_image_data.shape,
                                           transform=transform,
                                           dtype='uint8')

        # Extract pixel values from classified image corresponding to validation points
        classified_values = classified_image_data[rasterized_validation == 1]

        # Get ground truth labels corresponding to valid points
        ground_truth_labels = valid_points['ID']

        # Append to overall lists
        all_ground_truth_labels.extend(ground_truth_labels)
        all_classified_values.extend(classified_values)

# Calculate overall accuracy metrics
overall_accuracy = accuracy_score(all_ground_truth_labels, all_classified_values)
conf_matrix = confusion_matrix(all_ground_truth_labels, all_classified_values)

# Convert the confusion matrix to a pandas DataFrame for formatting
class_names = np.unique(np.concatenate((all_ground_truth_labels, all_classified_values)))
conf_matrix_df = pd.DataFrame(conf_matrix, index=class_names, columns=class_names)

# Add additional columns for total count and total precision for each class
conf_matrix_df['All'] = conf_matrix_df.sum(axis=1)
conf_matrix_df.loc['All'] = conf_matrix_df.sum()

# Write the results to an Excel file
# Specify the save location
output_path = r'C:\Users\SkosanaT\OneDrive - Stellenbosch University\MAPWAPS\DataChapter1\Results\EMIT\AccuracyAss\Overallaccuracy_results.xlsx'

# Write the results to an Excel file
with pd.ExcelWriter(output_path) as writer:
    overall_accuracy_df.to_excel(writer, sheet_name='Overall Accuracy', index=False)
    conf_matrix_df.to_excel(writer, sheet_name='Confusion Matrix')

# Print results
print("Overall Accuracy:", overall_accuracy)
print("Confusion Matrix:")
print(conf_matrix_df)


### Inter accuracy assessment

In [None]:
#Define paths

classified_image_path = r'F:\Classified'
classified_imagelist = [os.path.join(classified_image_path, file) for file in os.listdir(classified_image_path) if file.endswith('.TIF') or file.endswith('.tif')]


validation_shapefile_path = r'C:\Users\SkosanaT\OneDrive - Stellenbosch University\MAPWAPS\DataChapter1\Data\ValidationData\ValSet_EMIT_PROJECT\ValSet_Projected.shp'


# Initialize variables to store overall metrics
all_ground_truth_labels = []
all_classified_values = []

# Mapping function to aggregate classes
def aggregate_class(class_id):
    if 1 <= class_id <= 6:
        return 1  # Invasive alien class
    elif 7 <= class_id <= 15:
        return 2  # Non-invasive alien class
    else:
        return 0  # Undefined class (if any other IDs are present)

# Loop over each classified image
for classified_image_file in classified_imagelist:
    print("Processing:", classified_image_file)
    
    # Open classified image
    with rasterio.open(classified_image_file) as classified_image:
        
        # Read classified image
        classified_image_data = classified_image.read(1)

        # Load validation shapefile as a geodataframe
        validation_gdf = gpd.read_file(validation_shapefile_path)
        
        # Filter validation points falling within the raster boundary
        raster_boundary = classified_image.bounds
        valid_points = validation_gdf.cx[raster_boundary.left:raster_boundary.right, 
                                         raster_boundary.bottom:raster_boundary.top]

        # Rasterize validation points onto classified image to extract pixel values
        transform = from_origin(classified_image.bounds.left, classified_image.bounds.top,
                                classified_image.res[0], classified_image.res[1])
        rasterized_validation = rasterize([(geom, 1) for geom in valid_points.geometry],
                                           out_shape=classified_image_data.shape,
                                           transform=transform,
                                           dtype='uint8')

        # Extract pixel values from classified image corresponding to validation points
        classified_values = classified_image_data[rasterized_validation == 1]

        # Get ground truth labels corresponding to valid points
        ground_truth_labels = valid_points['ID']

        # Map ground truth and classified values to aggregated classes
        ground_truth_labels_aggregated = [aggregate_class(label) for label in ground_truth_labels]
        classified_values_aggregated = [aggregate_class(value) for value in classified_values]

        # Filter out undefined classes (if any)
        filtered_ground_truth = [gt for gt in ground_truth_labels_aggregated if gt != 0]
        filtered_classified = [cl for gt, cl in zip(ground_truth_labels_aggregated, classified_values_aggregated) if gt != 0]

        # Append to overall lists
        all_ground_truth_labels.extend(filtered_ground_truth)
        all_classified_values.extend(filtered_classified)

# Calculate overall accuracy metrics for aggregated classes
overall_accuracy = accuracy_score(all_ground_truth_labels, all_classified_values)
conf_matrix = confusion_matrix(all_ground_truth_labels, all_classified_values, labels=[1, 2])

# Convert the confusion matrix to a pandas DataFrame for formatting
class_names = ['Invasive Alien', 'Non-Invasive Alien']
conf_matrix_df = pd.DataFrame(conf_matrix, index=class_names, columns=class_names)

# Add additional columns for total count and total precision for each class
conf_matrix_df['All'] = conf_matrix_df.sum(axis=1)
conf_matrix_df.loc['All'] = conf_matrix_df.sum()

# Write the results to an Excel file
output_path = r'C:\Users\SkosanaT\OneDrive - Stellenbosch University\MAPWAPS\DataChapter1\Results\EMIT\AccuracyAss\Interaccuracy_results.xlsx'

# Write the results to an Excel file
with pd.ExcelWriter(output_path) as writer:
    overall_accuracy_df.to_excel(writer, sheet_name='Overall Accuracy', index=False)
    conf_matrix_df.to_excel(writer, sheet_name='Confusion Matrix')

# Print results
print("Overall Accuracy:", overall_accuracy)
print("Confusion Matrix:")
print(conf_matrix_df)


### Intra accuracy assessment

In [None]:
#Define paths
classified_image_path = r'F:\Classified'
classified_imagelist = [os.path.join(classified_image_path, file) for file in os.listdir(classified_image_path) if file.endswith('.TIF') or file.endswith('.tif')]

validation_shapefile_path = r'C:\Users\SkosanaT\OneDrive - Stellenbosch University\MAPWAPS\DataChapter1\Data\ValidationData\ValSet_EMIT_PROJECT\ValSet_Projected.shp'


# Initialize variables to store overall metrics
all_ground_truth_labels = []
all_classified_values = []

# Mapping function to aggregate classes
def aggregate_class(class_id):
    if 1 <= class_id <= 6:
        return class_id  # Keep individual classes 1 to 6
    elif 7 <= class_id <= 15:
        return 7  # Group classes 7 to 15 together as class 7
    else:
        return 0  # Undefined class (if any other IDs are present)

# Loop over each classified image
for classified_image_file in classified_imagelist:
    print("Processing:", classified_image_file)
    
    # Open classified image
    with rasterio.open(classified_image_file) as classified_image:
        
        # Read classified image
        classified_image_data = classified_image.read(1)

        # Load validation shapefile as a geodataframe
        validation_gdf = gpd.read_file(validation_shapefile_path)
        
        # Filter validation points falling within the raster boundary
        raster_boundary = classified_image.bounds
        valid_points = validation_gdf.cx[raster_boundary.left:raster_boundary.right, 
                                         raster_boundary.bottom:raster_boundary.top]

        # Rasterize validation points onto classified image to extract pixel values
        transform = from_origin(classified_image.bounds.left, classified_image.bounds.top,
                                classified_image.res[0], classified_image.res[1])
        rasterized_validation = rasterize([(geom, 1) for geom in valid_points.geometry],
                                           out_shape=classified_image_data.shape,
                                           transform=transform,
                                           dtype='uint8')

        # Extract pixel values from classified image corresponding to validation points
        classified_values = classified_image_data[rasterized_validation == 1]

        # Get ground truth labels corresponding to valid points
        ground_truth_labels = valid_points['ID']

        # Map ground truth and classified values to aggregated classes
        ground_truth_labels_aggregated = [aggregate_class(label) for label in ground_truth_labels]
        classified_values_aggregated = [aggregate_class(value) for value in classified_values]

        # Filter out undefined classes (if any)
        filtered_ground_truth = [gt for gt in ground_truth_labels_aggregated if gt != 0]
        filtered_classified = [cl for gt, cl in zip(ground_truth_labels_aggregated, classified_values_aggregated) if gt != 0]

        # Append to overall lists
        all_ground_truth_labels.extend(filtered_ground_truth)
        all_classified_values.extend(filtered_classified)

# Calculate overall accuracy metrics for aggregated classes
overall_accuracy = accuracy_score(all_ground_truth_labels, all_classified_values)
conf_matrix = confusion_matrix(all_ground_truth_labels, all_classified_values, labels=[1, 2, 3, 4, 5, 6, 7])

# Convert the confusion matrix to a pandas DataFrame for formatting
class_names = ['Alien_Black Wattle', 'Alien_Gum', 'Alien_Other', 'Alien_Pine', 'Alien Poplar', 'Alien Silver Wattle', 'Non-Invasive Alien']
conf_matrix_df = pd.DataFrame(conf_matrix, index=class_names, columns=class_names)

# Add additional columns for total count and total precision for each class
conf_matrix_df['All'] = conf_matrix_df.sum(axis=1)
conf_matrix_df.loc['All'] = conf_matrix_df.sum()

# Create a Pandas DataFrame for overall accuracy
overall_accuracy_df = pd.DataFrame([overall_accuracy], columns=['Overall Accuracy'])

# Specify the save location
output_path = r'C:\Users\SkosanaT\OneDrive - Stellenbosch University\MAPWAPS\DataChapter1\Results\EMIT\AccuracyAss\Intraaccuracy_results.xlsx'

# Write the results to an Excel file
with pd.ExcelWriter(output_path) as writer:
    overall_accuracy_df.to_excel(writer, sheet_name='Overall Accuracy', index=False)
    conf_matrix_df.to_excel(writer, sheet_name='Confusion Matrix')

# Print results
print("Overall Accuracy:", overall_accuracy)
print("Confusion Matrix:")
print(conf_matrix_df)
