### This code cell extracts pixel values from the images with the added indices 

In [None]:
# Define paths
images_path = r'C:\Users\SkosanaT\Documents\RGB_test\npy_tiff'

# Get file path list of all TIFF images in dir
imagelist = [os.path.join(images_path, file) for file in os.listdir(images_path) if file.endswith('.TIF') or file.endswith('.tif')]
print(imagelist)

# Initialize dictionaries
imgvals = {}

# Open shapefile and extract points' coordinates [0]= x coordinate, [1]= y coordinate  and attributes
with fiona.open(pntsshp_path, 'r') as shapefile:
    points = [[(point['geometry']['coordinates'][0], point['geometry']['coordinates'][1]),
               (int(point['properties']['ID']), point['properties']['Val_id']),
               (point['properties']['X'], point['properties']['Y'])] for point in shapefile]
    #for point in shapefile:
        #print(point['properties']['ID'])

# Initialize dictionary to store band pixel values associated with each point
point_pixel_values = {}

# Iterate over each raster image
for imagepath in imagelist:
    with rasterio.open(imagepath) as src:
        
        # Initialize dictionaries for current image
        imagename = os.path.splitext(os.path.basename(imagepath))[0] #splittext removes the extension from files.
        
        # Iterate over each band
        for band in range(1, src.count + 1):
            
            # Initialize dictionary for current band
            band_pixel_values = {}

            # Iterate over each point and extract pixel values
            for point in points:
                row, col = src.index(point[0][0], point[0][1]) # Find the point within the raster image
                values = src.read(band, window=((row, row+1), (col, col+1))) # Read the band values 
               

                if values.size != 0: # Check if bands are valid/not empty
                    # Populate band_pixel_values dictionary
                    band_pixel_values[f'Band_{band}'] = values[0][0]
                    
                    # Add band_pixel_values to point_pixel_values for the current point
                    point_key = str("Point_id: {}, X: {}, Y: {}, Class_ID: {}".format(point[1][1], point[2][0], point[2][1], point[1][0] ))
                    if point_key not in point_pixel_values:
                        point_pixel_values[point_key] = {}
                    point_pixel_values[point_key].update(band_pixel_values)
                   
                         
# Print point_pixel_values
for point_key, pixel_values in point_pixel_values.items():
    print(f"Point: {point_key}, Pixel Values: {pixel_values}, Image name: {imagename}")


### This cell changes point_pixel_value dictionary to a dataframe

In [None]:
# Create a list of dictionaries for DataFrame creation
data_list = []

for point_key, pixel_values in point_pixel_values.items():
    data_dict = {}
    # Extracting Point ID, X, Y, and Class ID from point_key
    point_id = point_key.split(',')[0].split(':')[1].strip()  
    x_coord = point_key.split(',')[1].split(':')[1].strip()
    y_coord = point_key.split(',')[2].split(':')[1].strip()
    class_id = point_key.split(',')[3].split(':')[1].strip()
    
    # Add extracted information as separate columns
    data_dict['Point'] = point_id
    data_dict['X'] = x_coord
    data_dict['Y'] = y_coord
    data_dict['Class_ID'] = class_id
    
    # Add band pixel values as columns
    data_dict.update(pixel_values)

    data_list.append(data_dict)

# Create DataFrame from list of dictionaries
pixelvalues_df = pd.DataFrame(data_list)

# Print the DataFrame (optional)
print(pixelvalues_df)

# Save the DataFrame to a CSV file
csv_file_path = r'C:\Users\SkosanaT\Documents\RGB_test\Band_values\pixel_values.csv'
pixelvalues_df.to_csv(csv_file_path, index=False)

print(f'DataFrame successfully saved to {csv_file_path}')

In [None]:
# Read the CSV file into a DataFrame
#pixelvalues_df = pd.read_csv(r'E:\SPOT6_7\SPOT6_7\BandValues\pixel_values.csv')

# Display the DataFrame
#print(pixelvalues_df)

### This cell block trains the random forest classifier 

In [None]:
# Define features 
features = pixelvalues_df[['Band_1', 'Band_2', 'Band_3', 'Band_4', 'Band_5', 'Band_6', 'Band_7', 'Band_8', 'Band_9']]

# Define target (Class ID column)
target = pixelvalues_df['Class_ID']

# Convert features and target columns to arrays
features_array = features.values
target_array = target.values

# Train the classifier using RandomForest with 500 trees
classifier = RandomForestClassifier(n_estimators=500)
classifier.fit(features_array, target_array)

### This cell block performs the classification on each image in the folder using the trained classifer in the previous cell

In [None]:
# Define the directory where you want to save the classified images
output_folder = r"C:\Users\SkosanaT\Documents\RGB_test\Classified"

# Check if the output folder exists, if not, create it
if not os.path.exists(output_folder):
    os.makedirs(output_folder)

# Loop through each image in the collection
for image_path in imagelist:
    
    # Open the image using rasterio
    with rasterio.open(image_path) as src:
        # Get the shape of the image
        rows, cols = src.height, src.width
        
        # Define batch size (adjust as needed based on memory constraints)
        batch_size = 1000  # You can adjust this value
        
        # Initialize the classification result array for the entire image
        classification_result = np.zeros((rows, cols), dtype=np.uint8)
        
        # Iterate over the image in batches
        for row_start in range(0, rows, batch_size):
            for col_start in range(0, cols, batch_size):
                row_end = min(row_start + batch_size, rows)
                col_end = min(col_start + batch_size, cols)
        
                # Read the batch of bands
                bands = [src.read(band_idx, window=((row_start, row_end), (col_start, col_end))) 
                         for band_idx in range(1, src.count + 1)]
                
                # Stack the bands into a single array
                stacked_bands = np.stack(bands, axis=-1)
                
                # Reshape the array to 2D (rows * cols, num_bands) for classification
                reshaped_bands = stacked_bands.reshape(-1, stacked_bands.shape[-1])
                
                # Mask out no data values (pixels with a value of 0)
                valid_pixels_mask = np.all(reshaped_bands != 0, axis=1)
                valid_pixels = reshaped_bands[valid_pixels_mask]
                
                # Predict using the trained classifier
                predicted_labels = np.zeros(reshaped_bands.shape[0], dtype=np.uint8)
                predicted_labels[valid_pixels_mask] = classifier.predict(valid_pixels)
                
                # Reshape the predicted labels back to the 2D batch
                predicted_labels_2d = predicted_labels.reshape(row_end - row_start, col_end - col_start)
                
                # Place the classified batch back into the classification result array
                classification_result[row_start:row_end, col_start:col_end] = predicted_labels_2d
        
        # Construct the output file path
        output_filename = os.path.basename(image_path).replace('.tif', '_classified.tif')
        output_path = os.path.join(output_folder, output_filename)
        
        # Prepare metadata for the classified image
        meta = src.meta.copy()
        meta.update({
            'driver': 'GTiff',
            'dtype': 'uint8',  # Ensure data type is appropriate for classification results
            'count': 1,  # Single band
            'compress': 'lzw',  # Compression method (Lempel-Ziv-Welch)
            'nodata': 0,  # Optional: Set nodata value
            'crs': src.crs,  # Use the same CRS as the input raster
            'transform': src.transform,  # Use the same transform as the input raster
        })
        
        # Write the classified image to a new GeoTIFF file
        with rasterio.open(output_path, 'w', **meta) as dst:
            dst.write(classification_result, 1)

print("Classification completed and classified images saved successfully!")

### Accuracy Assessment

In [None]:
import os
import rasterio
import geopandas as gpd
import numpy as np
import pandas as pd
from rasterio.features import rasterize
from rasterio.transform import from_origin
from sklearn.metrics import accuracy_score, confusion_matrix

# Define the paths
classified_image_path = r'C:\Users\Thand\OneDrive - Stellenbosch University\MAPWAPS\DataChapter1\Results\SPOT 6_7\Classified_withIndices'
classified_imagelist = [os.path.join(classified_image_path, file) for file in os.listdir(classified_image_path) if file.endswith('.TIF') or file.endswith('.tif')]

validation_shapefile_path = r'C:\Users\Thand\OneDrive - Stellenbosch University\MAPWAPS\DataChapter1\Data\ValidationData\Val_projected.shp'
umzim_boundary = r'C:\Users\Thand\OneDrive - Stellenbosch University\MAPWAPS\Fieldwork\uMzimvubuFieldTrip\uMzimvubuBoundary.shp'

# Initialize variables to store overall metrics
all_ground_truth_labels = []
all_classified_values = []

# Loop over each classified image
for classified_image_file in classified_imagelist:
    print("Processing:", classified_image_file)
    
    # Open classified image
    with rasterio.open(classified_image_file) as classified_image:
        
        # Read classified image
        classified_image_data = classified_image.read(1)

        # Load validation shapefile as a geodataframe
        validation_gdf = gpd.read_file(validation_shapefile_path)
        
        # Filter validation points falling within the raster boundary
        raster_boundary = classified_image.bounds
        valid_points = validation_gdf.cx[raster_boundary.left:raster_boundary.right, 
                                         raster_boundary.bottom:raster_boundary.top]

        # Check if there are any valid geometries
        if not valid_points.empty and not valid_points.geometry.is_empty.all():
            # Rasterize validation points onto classified image to extract pixel values
            transform = from_origin(classified_image.bounds.left, classified_image.bounds.top,
                                    classified_image.res[0], classified_image.res[1])
            rasterized_validation = rasterize([(geom, 1) for geom in valid_points.geometry],
                                               out_shape=classified_image_data.shape,
                                               transform=transform,
                                               dtype='uint8')

            # Extract pixel values from classified image corresponding to validation points
            classified_values = classified_image_data[rasterized_validation == 1]

            # Get ground truth labels corresponding to valid points
            ground_truth_labels = valid_points['ID']

            # Append to overall lists
            all_ground_truth_labels.extend(ground_truth_labels)
            all_classified_values.extend(classified_values)
        else:
            print("No valid geometries found for rasterization within the raster boundary.")

# Calculate overall accuracy metrics
overall_accuracy = accuracy_score(all_ground_truth_labels, all_classified_values)
conf_matrix = confusion_matrix(all_ground_truth_labels, all_classified_values)

# Convert the confusion matrix to a pandas DataFrame for formatting
class_names = np.unique(np.concatenate((all_ground_truth_labels, all_classified_values)))
conf_matrix_df = pd.DataFrame(conf_matrix, index=class_names, columns=class_names)

# Add additional columns for total count and total precision for each class
conf_matrix_df['All'] = conf_matrix_df.sum(axis=1)
conf_matrix_df.loc['All'] = conf_matrix_df.sum()

# Specify the save location
#output_path = r'C:\Users\SkosanaT\OneDrive - Stellenbosch University\MAPWAPS\DataChapter1\Results\EMIT\AccuracyAss\Overallaccuracy_results.xlsx'

# Write the results to an Excel file
#with pd.ExcelWriter(output_path) as writer:
    #conf_matrix_df.to_excel(writer, sheet_name='Confusion Matrix')

# Print results
print("Overall Accuracy:", overall_accuracy)
print("Confusion Matrix:")
print(conf_matrix_df)


## Inter accuracy assessment

In [None]:
#Define paths

classified_image_path = r'C:\Users\Thand\OneDrive - Stellenbosch University\MAPWAPS\DataChapter1\Results\SPOT 6_7\Classified_withIndices'
classified_imagelist = [os.path.join(classified_image_path, file) for file in os.listdir(classified_image_path) if file.endswith('.TIF') or file.endswith('.tif')]


validation_shapefile_path = r'C:\Users\Thand\OneDrive - Stellenbosch University\MAPWAPS\DataChapter1\Data\ValidationData\Val_projected.shp'


# Initialize variables to store overall metrics
all_ground_truth_labels = []
all_classified_values = []

# Mapping function to aggregate classes
def aggregate_class(class_id):
    if 1 <= class_id <= 6:
        return 1  # Invasive alien class
    elif 7 <= class_id <= 15:
        return 2  # Non-invasive alien class
    else:
        return 0  # Undefined class (if any other IDs are present)

# Loop over each classified image
for classified_image_file in classified_imagelist:
    print("Processing:", classified_image_file)
    
    # Open classified image
    with rasterio.open(classified_image_file) as classified_image:
        
        # Read classified image
        classified_image_data = classified_image.read(1)

        # Load validation shapefile as a geodataframe
        validation_gdf = gpd.read_file(validation_shapefile_path)
        
        # Filter validation points falling within the raster boundary
        raster_boundary = classified_image.bounds
        valid_points = validation_gdf.cx[raster_boundary.left:raster_boundary.right, 
                                         raster_boundary.bottom:raster_boundary.top]

        # Rasterize validation points onto classified image to extract pixel values
        transform = from_origin(classified_image.bounds.left, classified_image.bounds.top,
                                classified_image.res[0], classified_image.res[1])
        rasterized_validation = rasterize([(geom, 1) for geom in valid_points.geometry],
                                           out_shape=classified_image_data.shape,
                                           transform=transform,
                                           dtype='uint8')

        # Extract pixel values from classified image corresponding to validation points
        classified_values = classified_image_data[rasterized_validation == 1]

        # Get ground truth labels corresponding to valid points
        ground_truth_labels = valid_points['ID']

        # Map ground truth and classified values to aggregated classes
        ground_truth_labels_aggregated = [aggregate_class(label) for label in ground_truth_labels]
        classified_values_aggregated = [aggregate_class(value) for value in classified_values]

        # Filter out undefined classes (if any)
        filtered_ground_truth = [gt for gt in ground_truth_labels_aggregated if gt != 0]
        filtered_classified = [cl for gt, cl in zip(ground_truth_labels_aggregated, classified_values_aggregated) if gt != 0]

        # Append to overall lists
        all_ground_truth_labels.extend(filtered_ground_truth)
        all_classified_values.extend(filtered_classified)

# Calculate overall accuracy metrics for aggregated classes
overall_accuracy = accuracy_score(all_ground_truth_labels, all_classified_values)
conf_matrix = confusion_matrix(all_ground_truth_labels, all_classified_values, labels=[1, 2])

# Convert the confusion matrix to a pandas DataFrame for formatting
class_names = ['Invasive Alien', 'Non-Invasive Alien']
conf_matrix_df = pd.DataFrame(conf_matrix, index=class_names, columns=class_names)

# Add additional columns for total count and total precision for each class
conf_matrix_df['All'] = conf_matrix_df.sum(axis=1)
conf_matrix_df.loc['All'] = conf_matrix_df.sum()

# Write the results to an Excel file
#output_path = r'C:\Users\SkosanaT\OneDrive - Stellenbosch University\MAPWAPS\DataChapter1\Results\EMIT\AccuracyAss\Interaccuracy_results.xlsx'

# Write the results to an Excel file
#with pd.ExcelWriter(output_path) as writer:
    #overall_accuracy_df.to_excel(writer, sheet_name='Overall Accuracy', index=False)
    #conf_matrix_df.to_excel(writer, sheet_name='Confusion Matrix')

# Print results
print("Overall Accuracy:", overall_accuracy)
print("Confusion Matrix:")
print(conf_matrix_df)


### Intra accuracy assessment

In [None]:
#Define paths
classified_image_path = r'C:\Users\Thand\OneDrive - Stellenbosch University\MAPWAPS\DataChapter1\Results\SPOT 6_7\Classified_withIndices'
classified_imagelist = [os.path.join(classified_image_path, file) for file in os.listdir(classified_image_path) if file.endswith('.TIF') or file.endswith('.tif')]

validation_shapefile_path = r'C:\Users\Thand\OneDrive - Stellenbosch University\MAPWAPS\DataChapter1\Data\ValidationData\Val_projected.shp'


# Initialize variables to store overall metrics
all_ground_truth_labels = []
all_classified_values = []

# Mapping function to aggregate classes
def aggregate_class(class_id):
    if 1 <= class_id <= 6:
        return class_id  # Keep individual classes 1 to 6
    elif 7 <= class_id <= 15:
        return 7  # Group classes 7 to 15 together as class 7
    else:
        return 0  # Undefined class (if any other IDs are present)

# Loop over each classified image
for classified_image_file in classified_imagelist:
    print("Processing:", classified_image_file)
    
    # Open classified image
    with rasterio.open(classified_image_file) as classified_image:
        
        # Read classified image
        classified_image_data = classified_image.read(1)

        # Load validation shapefile as a geodataframe
        validation_gdf = gpd.read_file(validation_shapefile_path)
        
        # Filter validation points falling within the raster boundary
        raster_boundary = classified_image.bounds
        valid_points = validation_gdf.cx[raster_boundary.left:raster_boundary.right, 
                                         raster_boundary.bottom:raster_boundary.top]

        # Rasterize validation points onto classified image to extract pixel values
        transform = from_origin(classified_image.bounds.left, classified_image.bounds.top,
                                classified_image.res[0], classified_image.res[1])
        rasterized_validation = rasterize([(geom, 1) for geom in valid_points.geometry],
                                           out_shape=classified_image_data.shape,
                                           transform=transform,
                                           dtype='uint8')

        # Extract pixel values from classified image corresponding to validation points
        classified_values = classified_image_data[rasterized_validation == 1]

        # Get ground truth labels corresponding to valid points
        ground_truth_labels = valid_points['ID']

        # Map ground truth and classified values to aggregated classes
        ground_truth_labels_aggregated = [aggregate_class(label) for label in ground_truth_labels]
        classified_values_aggregated = [aggregate_class(value) for value in classified_values]

        # Filter out undefined classes (if any)
        filtered_ground_truth = [gt for gt in ground_truth_labels_aggregated if gt != 0]
        filtered_classified = [cl for gt, cl in zip(ground_truth_labels_aggregated, classified_values_aggregated) if gt != 0]

        # Append to overall lists
        all_ground_truth_labels.extend(filtered_ground_truth)
        all_classified_values.extend(filtered_classified)

# Calculate overall accuracy metrics for aggregated classes
overall_accuracy = accuracy_score(all_ground_truth_labels, all_classified_values)
conf_matrix = confusion_matrix(all_ground_truth_labels, all_classified_values, labels=[1, 2, 3, 4, 5, 6, 7])

# Convert the confusion matrix to a pandas DataFrame for formatting
class_names = ['Alien_Black Wattle', 'Alien_Gum', 'Alien_Other', 'Alien_Pine', 'Alien Poplar', 'Alien Silver Wattle', 'Non-Invasive Alien']
conf_matrix_df = pd.DataFrame(conf_matrix, index=class_names, columns=class_names)

# Add additional columns for total count and total precision for each class
conf_matrix_df['All'] = conf_matrix_df.sum(axis=1)
conf_matrix_df.loc['All'] = conf_matrix_df.sum()

# Create a Pandas DataFrame for overall accuracy
overall_accuracy_df = pd.DataFrame([overall_accuracy], columns=['Overall Accuracy'])

# Specify the save location
#output_path = r'C:\Users\SkosanaT\OneDrive - Stellenbosch University\MAPWAPS\DataChapter1\Results\EMIT\AccuracyAss\Intraaccuracy_results.xlsx'

# Write the results to an Excel file
#with pd.ExcelWriter(output_path) as writer:
    #overall_accuracy_df.to_excel(writer, sheet_name='Overall Accuracy', index=False)
    #conf_matrix_df.to_excel(writer, sheet_name='Confusion Matrix')

# Print results
print("Overall Accuracy:", overall_accuracy)
print("Confusion Matrix:")
print(conf_matrix_df)
