# Import libraries

In [1]:
# Import required libraries
import os
import rasterio
import fiona
import json
import csv
import rasterio
from rasterio.merge import merge
from rasterio.enums import Resampling
from rasterio.features import rasterize
from rasterio.transform import from_origin
from rasterio import Affine
from rasterio.plot import show
from rasterio.mask import mask
from rasterio.plot import show
import matplotlib.pyplot as plt
import geopandas as gpd
import numpy as np
import pandas as pd

# Count tiff files
##### This code loops through a folder to count the number of tiff files

In [None]:
# Define the path to the root folder
images_dir = r'C:\Users\User\OneDrive - Stellenbosch University\MAPWAPS\DataChapter1\Data\Imagery\uMzimAOI\Aerial_photographs'

# Initialize a counter for the number of TIFF files
tif_count = 0

# Iterate through all the directories and files in the root folder
for dirpath, dirnames, filenames in os.walk(root_folder):
    # Iterate through the files in the current directory
    for filename in filenames:
        # Check if the file has a TIFF extension
        if filename.endswith('.tif'):
            # Increment the counter if it's a TIFF file
            tif_count += 1

# Print the total count of TIFF files found
print("Total TIFF files found:", tif_count)

# Calculate indices
##### This code cell calculates and adds indices to each image in the folder 

In [None]:
# Create a folder to save the updated images if it doesn't exist
with_indices = r'C:\Users\User\OneDrive - Stellenbosch University\MAPWAPS\DataChapter1\Data\Imagery\uMzimAOI\Aerial_photographs\with_indices'
os.makedirs(with_indices, exist_ok=True)

# Define custom band names for the added indices
band_names = ['GI', 'IRG', 'NGRDI', 'VARI', 'VDVI']

# Function to calculate indices
def calculate_indices(red_band, green_band, blue_band):
    # 1) Greenness Index (GI) (Green/Red)
    GI = np.divide(green_band, red_band, out=np.full_like(green_band, np.nan, dtype=float), where=red_band != 0)

    # 2) IRG (Red-Green)
    IRG = red_band - green_band

    # 3) NGRDI (Green-Red)/(Green + Red)
    NGRDI = np.divide((green_band - red_band),
                      (green_band + red_band),
                      where=(green_band + red_band) != 0)

    # 4) VARI (Green-Red)/(Green+Red+Blue)
    VARI = np.divide((green_band - red_band),
                     (green_band + red_band + blue_band),
                     out=np.full_like(green_band, np.nan, dtype=float),
                     where=(green_band + red_band + blue_band) != 0)

    # 5) VDVI (2 * Green - Red - Blue)/(2 * Green + Red + Blue)
    VDVI = np.divide((2 * green_band - red_band - blue_band),
                     (2 * green_band + red_band + blue_band),
                     out=np.full_like(green_band, np.nan, dtype=float),
                     where=(2 * green_band + red_band + blue_band) != 0)

    return [GI, IRG, NGRDI, VARI, VDVI]

# Function to process each image
def process_image(image_path):
    try:
        with rasterio.open(image_path) as img:
            # Read all bands
            full_img = img.read()

            # Mask out NoData values (e.g., pixels with NoData values set in the image metadata)
            full_img = np.where(full_img != img.nodata, full_img, np.nan)

            # Rescale the image bands to reflectance values between 0 and 1 by dividing each band by 255
            rescaled_img = full_img / 255.0

            # Separate RGB bands
            red_band = rescaled_img[0, :, :]
            green_band = rescaled_img[1, :, :]
            blue_band = rescaled_img[2, :, :]

            # Calculate indices
            indices = calculate_indices(red_band, green_band, blue_band)

            # Add the calculated indices as new bands to the image
            updated_img = np.concatenate((rescaled_img, np.stack(indices)), axis=0)

            # Update metadata profile
            profile = img.profile
            if 'descriptions' not in profile:
                profile['descriptions'] = [''] * img.count
            band_names_all = profile['descriptions'] + band_names
            profile.update(count=profile['count'] + len(indices), dtype='float32', descriptions=band_names_all)

            # Output path for the updated image
            output_filename = f'with_indices_{os.path.basename(image_path)}'
            output_path = os.path.join(with_indices, output_filename)

            # Write the modified image array to a new raster file
            with rasterio.open(output_path, 'w', **profile) as dst:
                dst.write(updated_img.astype('float32'))  # Convert to float32 before writing

            print(f"Indices added to {os.path.basename(image_path)} and saved as {output_path}")
    except Exception as e:
        print(f"Error processing '{os.path.basename(image_path)}': {e}")

if __name__ == '__main__':
    # List to store paths of all image files
    image_paths = []

    # Loop through each folder in the directory
    for folder_name in os.listdir(images_dir):
        folder_path = os.path.join(images_dir, folder_name)
        
        # Check if the item is a directory
        if os.path.isdir(folder_path) and folder_name != 'with_indices':  # Skip processing the 'updated_images' folder
            # Loop through each image file in the folder
            for image_file in os.listdir(folder_path):
                # Construct full path to the image file
                image_path = os.path.join(folder_path, image_file)
                
                # Check if it's a file and if it ends with .tif or .TIF
                if os.path.isfile(image_path) and (image_file.endswith('.tif') or image_file.endswith('.TIF')):
                    image_paths.append(image_path)
                else:
                    print(f"Skipping '{image_file}' as it's not a valid TIFF file.")
        else:
            print(f"Skipping '{folder_name}' as it's not a directory or it's the 'updated_images' folder.")

    # Process images one by one
    for image_path in image_paths:
        process_image(image_path)


Skipping '3028DB_15_2020_1377_RGB_RECT.tif.aux.xml' as it's not a valid TIFF file.
Skipping '3028DB_15_2020_1377_RGB_RECT_METADATA.XML' as it's not a valid TIFF file.
Skipping '3028DB_17_2020_1377_RGB_RECT.tfw' as it's not a valid TIFF file.
Skipping '3028DB_17_2020_1377_RGB_RECT.tif.aux.xml' as it's not a valid TIFF file.
Skipping '3028DB_17_2020_1377_RGB_RECT_METADATA.XML' as it's not a valid TIFF file.
Skipping '3028DB_18_2020_1377_RGB_RECT.tfw' as it's not a valid TIFF file.
Skipping '3028DB_18_2020_1377_RGB_RECT.tif.aux.xml' as it's not a valid TIFF file.
Skipping '3028DB_18_2020_1377_RGB_RECT_METADATA.XML' as it's not a valid TIFF file.
Skipping '3028DB_19_2020_1377_RGB_RECT.tfw' as it's not a valid TIFF file.
Skipping '3028DB_19_2020_1377_RGB_RECT.tif.aux.xml' as it's not a valid TIFF file.
Skipping '3028DB_19_2020_1377_RGB_RECT_METADATA.XML' as it's not a valid TIFF file.
Skipping '3028DB_20_2020_1377_RGB_RECT.tfw' as it's not a valid TIFF file.
Skipping '3028DB_20_2020_1377_RG

# Print output
##### This cell prints out band numbers of each image to see if indices were sucessfully added as bands, futhermore it prints out 5 random pixel values of the first image

In [None]:
# Loop through each image in the directory
for image_file in os.listdir(with_indices):
    if image_file.endswith('.TIF') or image_file.endswith('.tif'):  # Check for both upper and lower case extensions
       
        # Open the image
        with rasterio.open(os.path.join(output_folder, image_file)) as img:
            # Print the number of bands
            print(f"Image '{image_file}' has {img.count} bands.")
            
            # If this is the first image, print random pixel values avoiding NaN
            if image_file == os.listdir(with_indices)[0]:  # First image
                # Read the image into a NumPy array
                image_data = img.read()  # shape: (bands, height, width)

                # Loop through random pixels
                height, width = image_data.shape[1:]  # Get dimensions (height, width)
                num_bands = img.count
                
                # Pick 5 random pixel positions
                for _ in range(5):
                    # Random pixel position
                    random_row = np.random.randint(0, height)
                    random_col = np.random.randint(0, width)
                    
                    # Get the pixel values across all bands at this position
                    pixel_values = image_data[:, random_row, random_col]
                    
                    # Check if there are any NaN values
                    if not np.isnan(pixel_values).any():
                        print(f"Pixel at (row={random_row}, col={random_col}): {pixel_values}")
                    else:
                        print(f"Pixel at (row={random_row}, col={random_col}) contains NaN values, skipping.")


# Extract pixel values
###### This code cell extracts pixel values from the images with the added indices using the training data. These pixel values will be used to train the Random Forest classifier.

In [7]:
# Define paths
pntsshp_path = r'C:\Users\User\OneDrive - Stellenbosch University\MAPWAPS\DataChapter1\uMzi_ROI\Training_Data\RGB\RGB_train.shp'
with_indices = r'path_to_your_tiff_directory'  # Set the path to your directory containing TIFF files

# Get all TIFF files from the directory
tiff_files = [file for file in os.listdir(with_indices) if file.lower().endswith('.tif')]
if not tiff_files:
    raise FileNotFoundError("No TIFF files found in the specified directory.")

# Open shapefile and extract points' coordinates and attributes
with fiona.open(pntsshp_path, 'r') as shapefile:
    points = [[(point['geometry']['coordinates'][0], point['geometry']['coordinates'][1]),
               (int(point['properties']['ID']), point['properties']['Val_id']),
               (point['properties']['X'], point['properties']['Y'])] for point in shapefile]

# Initialize dictionary to store band pixel values associated with each point
point_pixel_values = {}

# Loop through each TIFF file in the directory
for tiff_file in tiff_files:
    # Process the TIFF file
    image_path = os.path.join(with_indices, tiff_file)
    with rasterio.open(image_path) as src:
        imagename = os.path.splitext(os.path.basename(image_path))[0]  # Remove the file extension
        
        # Iterate over each band in the image
        for band in range(1, src.count + 1):
            # Iterate over each point and extract pixel values
            for point in points:
                row, col = src.index(point[0][0], point[0][1])  # Find the point within the raster image
                values = src.read(band, window=((row, row + 1), (col, col + 1)))  # Read the band values

                if values.size != 0:  # Check if bands are valid/not empty
                    # Prepare key for the current point
                    point_key = f"Point_id: {point[1][1]}, X: {point[2][0]}, Y: {point[2][1]}, Class_ID: {point[1][0]}"
                    if point_key not in point_pixel_values:
                        point_pixel_values[point_key] = {}

                    # Add pixel values to the dictionary
                    point_pixel_values[point_key][f'{imagename}_Band_{band}'] = values[0][0]

# Print point_pixel_values
for point_key, pixel_values in point_pixel_values.items():
    print(f"Point: {point_key}, Pixel Values: {pixel_values}")

# Dictionary to dataframe
### This cell changes point_pixel_value dictionary to a dataframe

In [8]:
# Create a list of dictionaries for DataFrame creation
data_list = []

for point_key, pixel_values in point_pixel_values.items():
    data_dict = {}
    # Extracting Point ID, X, Y, and Class ID from point_key
    point_id = point_key.split(',')[0].split(':')[1].strip()  
    x_coord = point_key.split(',')[1].split(':')[1].strip()
    y_coord = point_key.split(',')[2].split(':')[1].strip()
    class_id = point_key.split(',')[3].split(':')[1].strip()
    
    # Add extracted information as separate columns
    data_dict['Point'] = point_id
    data_dict['X'] = x_coord
    data_dict['Y'] = y_coord
    data_dict['Class_ID'] = class_id
    
    # Add band pixel values as columns
    data_dict.update(pixel_values)

    data_list.append(data_dict)

# Create DataFrame from list of dictionaries
pixelvalues_df = pd.DataFrame(data_list)

# Print the DataFrame (optional)
print(pixelvalues_df)

# Save the DataFrame to a CSV file
csv_file_path = r'C:\Users\User\OneDrive - Stellenbosch University\MAPWAPS\DataChapter1\Data\Imagery\uMzimAOI\Aerial_photographs\pixel_values_rgb.csv'
pixelvalues_df.to_csv(csv_file_path, index=False)

print(f'DataFrame successfully saved to {csv_file_path}')


Empty DataFrame
Columns: []
Index: []
DataFrame successfully saved to C:\Users\SkosanaT\OneDrive - Stellenbosch University\MAPWAPS\DataChapter1\Data\Imagery\uMzimAOI\SPOT6\Band_values\pixel_values.csv


# Open .csv file
##### This code cell loads the .csv file as a dataframe (use if classification stopped/crashed)

In [9]:
# Read the CSV file into a DataFrame with the correct delimiter
pixelvalues_df = pd.read_csv(r'H:\Band_values\pixel_values_rgb.csv', delimiter=';')

# Print the column names to verify that they are now correctly separated
print(pixelvalues_df.columns)

# Convert the string representations of lists to numerical values
for column in ['Band_1', 'Band_2', 'Band_3', 'Band_4', 'Band_5', 'Band_6', 'Band_7', 'Band_8']: #replace with actual columns name
    if column in pixelvalues_df.columns:
        pixelvalues_df[column] = pixelvalues_df[column].apply(lambda x: float(x.strip('[]')))

        # Print out the results for the current column
        print(f"Column: {column}")
        print(pixelvalues_df[column].head())  # Display the first few rows of the converted column
        print("\n")  # Add a newline for readability
    else:
        print(f"Column '{column}' not found in the DataFrame.")

# Train classifer

In [None]:
# Define features 
# Find the index of the last non-band column
last_non_band_index = pixelvalues_df.columns.tolist().index('Class_ID')  #

# Select columns starting from the column following the last non-band column
#These columns are the band features used in the classification
features = pixelvalues_df.iloc[:, last_non_band_index + 1:]
#print(features)

# Define target (Class ID column)
target = pixelvalues_df['Class_ID']

# Convert features and target columns to arrays
features_array = features.values
target_array = target.values

# Train the classifier using RandomForest with 500 trees
classifier = RandomForestClassifier(n_estimators=500)
classifier.fit(features_array, target_array)

# Print information about features_array
print("Features Array:")
print(f"Shape: {features_array.shape}")
print(f"Data Type: {features_array.dtype}")
print("First 5 rows:\n", features_array[:5])

# Print information about target_array
print("\nTarget Array:")
print(f"Shape: {target_array.shape}")
print(f"Data Type: {target_array.dtype}")
print("First 5 values:\n", target_array[:5])


# Train the classifier using RandomForest with 500 trees
classifier = RandomForestClassifier(n_estimators=500)
classifier.fit(features_array, target_array)

# Classification
##### This cell block performs the classification on each image in the folder using the trained classifer in the previous cell

In [None]:
# Define the directory where you want to save the classified images
output_folder = r"C:\Users\User\OneDrive - Stellenbosch University\MAPWAPS\DataChapter1\Data\Imagery\uMzimAOI\Aerial_photographs\CLASSIFIED"

# Check if the output folder exists, if not, create it
if not os.path.exists(output_folder):
    os.makedirs(output_folder)

# Loop through the folder to find .tif or .TIF files
for filename in os.listdir(with_indices):
    if filename.lower().endswith('.tif'):
        image_path = os.path.join(with_indices, filename)

        # Open the image using rasterio
        with rasterio.open(image_path) as src:
            # Get metadata from the original image
            out_meta = src.meta.copy()

            # Get the shape of the image
            rows, cols = src.height, src.width

            # Define batch size (adjust as needed based on memory constraints)
            # Processes the image in batches (1000x1000 pixels) to avoid memory overload
            batch_size = 1000  

            # Initialize the classification result array for the entire image
            classification_result = np.zeros((rows, cols), dtype=np.uint8)

            # Iterate over the image in batches
            for row_start in range(0, rows, batch_size):
                for col_start in range(0, cols, batch_size):
                    row_end = min(row_start + batch_size, rows)
                    col_end = min(col_start + batch_size, cols)

                    # Read the batch of bands
                    bands = [src.read(band_idx, window=((row_start, row_end), (col_start, col_end)))
                             for band_idx in range(1, src.count + 1)]

                    # Stack the bands into a single array
                    stacked_bands = np.stack(bands, axis=-1)

                    # Reshape the array to 2D (rows, columns) for classification
                    reshaped_bands = stacked_bands.reshape(-1, src.count)

                    # Classify using the trained classifier (replace with your classifier)
                    predicted_labels = classifier.predict(reshaped_bands)

                    # Create a temporary result array for this batch
                    batch_result = predicted_labels.reshape(row_end - row_start, col_end - col_start)

                    # Write the batch result to the corresponding window in the full classification result array
                    classification_result[row_start:row_end, col_start:col_end] = batch_result

            # Construct the output file path
            output_filename = os.path.basename(image_path).replace('.tif', '_classified.tif')
            output_path = os.path.join(output_folder, output_filename)

            # Prepare metadata for the classified image
            meta = src.meta.copy()
            meta.update({
                'driver': 'GTiff',
                'dtype': 'uint8',  # Ensure data type is appropriate for classification results
                'count': 1,  # Single band
                'compress': 'lzw',  # Compression method (Lempel-Ziv-Welch)
            })

            # Write the full classified image to a new GeoTIFF file
            with rasterio.open(output_path, 'w', **meta) as dst:
                dst.write(classification_result.astype('uint8'), 1)

        print(f"Classification completed for {filename} and saved as {output_filename}!")
