### Importing libraries

In [1]:
# Importing required libraries
import os
import rasterio
import fiona
import json
import csv
import rasterio
from rasterio.plot import show
from rasterio.mask import mask
from rasterio.plot import show
import matplotlib.pyplot as plt
import geopandas as gpd
import numpy as np
import pandas as pd
from rasterio.windows import Window

### Rescale bands

In [2]:
# Define the folder containing the TIFF file
file_path = r'C:\Users\SkosanaT\OneDrive - Stellenbosch University\MAPWAPS\DataChapter1\uMzi_ROI\data\SPOT_6\SPOT6_PROJECTED\11092024_SPOT6_PROJECTED.TIF'

# Extract the filename from the file_path
filename = os.path.basename(file_path)
filename_without_ext, ext = os.path.splitext(filename)

# Open the TIFF file
with rasterio.open(file_path) as src:
    # Read the data as a NumPy array
    bands = src.read()

    # Rescale the image bands to reflectance values between 0-1 by dividing each band by 10 000
    rescaled_bands = bands / 10000
    
    # Define the output folder
    output_folder = r'C:\Users\SkosanaT\OneDrive - Stellenbosch University\MAPWAPS\DataChapter1\uMzi_ROI\data\SPOT_6\12092024_SPOT6\rescaled'

    # Create the output folder if it doesn't exist
    os.makedirs(output_folder, exist_ok=True)

    # Prepare the output file path
    filename = os.path.basename(file_path).replace('.tif', '_rescaled.tif')
    output_path = os.path.join(output_folder, filename)
    
    # Save the rescaled data as a new TIFF file
    with rasterio.open(
        output_path,
        'w',
        driver='GTiff',
        height=src.height,
        width=src.width,
        count=src.count,
        dtype=rescaled_bands.dtype,
        crs=src.crs,
        transform=src.transform,
    ) as dst:
        dst.write(rescaled_bands)

print(f"Rescaled and saved: {output_path}")


Rescaled and saved: C:\Users\SkosanaT\OneDrive - Stellenbosch University\MAPWAPS\DataChapter1\uMzi_ROI\data\SPOT_6\12092024_SPOT6\rescaled\11092024_SPOT6_PROJECTED.TIF


### This code cell calculates and adds indices to each images in the folder 

In [3]:
# Define the output directory and the file name
output_folder = r'C:\Users\SkosanaT\OneDrive - Stellenbosch University\MAPWAPS\DataChapter1\uMzi_ROI\data\SPOT_6\12092024_SPOT6\AddedIndices'
added_indices = os.path.join(output_folder, 'SPOT_added_indices.TIF')
# Create the folder if it doesn't exist
os.makedirs(output_folder, exist_ok=True)

# Define custom band names for the added indices
band_names = ['GI', 'IRG', 'NGRDI', 'VARI', 'VDVI']

try:
    # Open the image
    with rasterio.open(output_path) as img:
        # Read all bands
        full_img = img.read()

        # Extract bands
        blue_band = full_img[0, :, :]
        green_band = full_img[1, :, :]
        red_band = full_img[2, :, :]

        # Calculate vegetation indices
        # 1) Greenness Index (GI) (Green/Red)
        GI = np.divide(green_band, red_band, out=np.full_like(green_band, np.nan, dtype=float), where=red_band != 0)

        # 2) IRG (Red-Green)
        IRG = red_band - green_band

        # 3) NGRDI (Green-Red)/(Green + Red)
        NGRDI = np.divide((green_band - red_band),
                          (green_band + red_band),
                          where=(green_band + red_band) != 0)

        # 4) Calculate VARI (Green-Red)/(Green+Red+Blue)
        VARI = np.divide((green_band - red_band),
                         (green_band + red_band + blue_band),
                         out=np.full_like(green_band, np.nan, dtype=float),  # Specify dtype=float
                         where=(green_band + red_band + blue_band) != 0)

        # 5) VDVI (2* Green-Red-Blue)/(2*Green+Red+Blue)
        VDVI = np.divide((2 * green_band - red_band - blue_band),
                         (2 * green_band + red_band + blue_band),
                         out=np.full_like(green_band, np.nan, dtype=float),  # Specify dtype=float
                         where=(2 * green_band + red_band + blue_band) != 0)

        # Add the calculated indices as new bands to the image
        indices = np.stack([GI, IRG, NGRDI, VARI, VDVI], axis=0)
        updated_img = np.concatenate((full_img, indices), axis=0)

        # Update band names in metadata profile
        profile = img.profile.copy()  # Copy the profile to avoid modifying the original
        if 'descriptions' not in profile:
            profile['descriptions'] = [''] * img.count
        band_names_all = profile['descriptions'] + band_names
        profile.update(count=profile['count'] + len(indices), dtype='float32', descriptions=band_names_all)

        # Write the modified image array to a new raster file
        with rasterio.open(added_indices, 'w', **profile) as dst:
            dst.write(updated_img.astype('float32'))  # Convert to float32 before writing

        print(f"Indices added to the image and saved as {added_indices}")

except Exception as e:
    print(f"Error processing the image: {e}")


Indices added to the image and saved as C:\Users\SkosanaT\OneDrive - Stellenbosch University\MAPWAPS\DataChapter1\uMzi_ROI\data\SPOT_6\12092024_SPOT6\AddedIndices\SPOT_added_indices.TIF


### This cell prints out band numbers of each image to see if they were added as bands

In [4]:
# Open the image
with rasterio.open(added_indices) as img:
    # Print the number of bands
    print(f"Image '{added_indices}' has {img.count} bands.")
    
    # Retrieve and print the metadata
    metadata = img.meta
    print("Added indices image metadata:")
    for key, value in metadata.items():
        print(f"{key}: {value}")
    
    # Print additional metadata information
    print("\nAdditional Metadata:")
    print(f"CRS (Coordinate Reference System): {img.crs}")
    print(f"Bounds: {img.bounds}")
    print(f"Width: {img.width}")
    print(f"Height: {img.height}")
    print(f"Number of Bands: {img.count}")
    
    # Print pixel values of the first five pixels for each band
    print("\nFirst Five Pixel Values for Each Band:")
    for band in range(1, img.count + 1):
        band_data = img.read(band)
        print(f"Band {band} first five pixels: {band_data.flat[:5]}")


Image 'C:\Users\SkosanaT\OneDrive - Stellenbosch University\MAPWAPS\DataChapter1\uMzi_ROI\data\SPOT_6\12092024_SPOT6\AddedIndices\SPOT_added_indices.TIF' has 9 bands.
Added indices image metadata:
driver: GTiff
dtype: float32
nodata: None
width: 6719
height: 7191
count: 9
crs: EPSG:32735
transform: | 5.82, 0.00, 671653.64|
| 0.00,-5.82, 6605127.46|
| 0.00, 0.00, 1.00|

Additional Metadata:
CRS (Coordinate Reference System): EPSG:32735
Bounds: BoundingBox(left=671653.6388319659, bottom=6563255.327503694, right=710777.3893165004, top=6605127.464686949)
Width: 6719
Height: 7191
Number of Bands: 9

First Five Pixel Values for Each Band:
Band 1 first five pixels: [6.5535 6.5535 6.5535 6.5535 6.5535]
Band 2 first five pixels: [6.5535 6.5535 6.5535 6.5535 6.5535]
Band 3 first five pixels: [6.5535 6.5535 6.5535 6.5535 6.5535]
Band 4 first five pixels: [6.5535 6.5535 6.5535 6.5535 6.5535]
Band 5 first five pixels: [1. 1. 1. 1. 1.]
Band 6 first five pixels: [0. 0. 0. 0. 0.]
Band 7 first five pix

### This code cell extracts pixel values from the images with the added indices

In [None]:
# Define paths
pntsshp_path = r'C:\Users\SkosanaT\OneDrive - Stellenbosch University\MAPWAPS\DataChapter1\uMzi_ROI\Train_Val\Training_data\train_shapefile.shp'

# Open shapefile and extract points' coordinates [0]= x coordinate, [1]= y coordinate and attributes
with fiona.open(pntsshp_path, 'r') as shapefile:
    points = [[(point['geometry']['coordinates'][0], point['geometry']['coordinates'][1]),
               (int(point['properties']['ID']), point['properties']['Val_id']),
               (point['properties']['X'], point['properties']['Y'])] for point in shapefile]

# Initialize dictionary to store band pixel values associated with each point
point_pixel_values = {}

# Open the raster image
with rasterio.open(added_indices) as src:
    imagename = os.path.splitext(os.path.basename(added_indices))[0]  # Remove the file extension
    
    # Iterate over each band in the image
    for band in range(1, src.count + 1):
        
        # Iterate over each point and extract pixel values
        for point in points:
            row, col = src.index(point[0][0], point[0][1])  # Find the point within the raster image
            values = src.read(band, window=((row, row+1), (col, col+1)))  # Read the band values

            if values.size != 0:  # Check if bands are valid/not empty
                # Prepare key for the current point
                point_key = f"Point_id: {point[1][1]}, X: {point[2][0]}, Y: {point[2][1]}, Class_ID: {point[1][0]}"
                if point_key not in point_pixel_values:
                    point_pixel_values[point_key] = {}

                # Add pixel values to the dictionary
                point_pixel_values[point_key][f'{imagename}_Band_{band}'] = values[0][0]

# Print point_pixel_values
for point_key, pixel_values in point_pixel_values.items():
    print(f"Point: {point_key}, Pixel Values: {pixel_values}")



### This cell changes point_pixel_value dictionary to a dataframe

In [None]:
# Create a list of dictionaries for DataFrame creation
data_list = []

for point_key, pixel_values in point_pixel_values.items():
    data_dict = {}
    # Extracting Point ID, X, Y, and Class ID from point_key
    point_id = point_key.split(',')[0].split(':')[1].strip()  
    x_coord = point_key.split(',')[1].split(':')[1].strip()
    y_coord = point_key.split(',')[2].split(':')[1].strip()
    class_id = point_key.split(',')[3].split(':')[1].strip()
    
    # Add extracted information as separate columns
    data_dict['Point'] = point_id
    data_dict['X'] = x_coord
    data_dict['Y'] = y_coord
    data_dict['Class_ID'] = class_id
    
    # Add band pixel values as columns
    data_dict.update(pixel_values)

    data_list.append(data_dict)

# Create DataFrame from list of dictionaries
pixelvalues_df = pd.DataFrame(data_list)

# Print the DataFrame (optional)
print(pixelvalues_df)

# Save the DataFrame to a CSV file
# Define the CSV file folder
csv_file_folder = r'C:\Users\SkosanaT\OneDrive - Stellenbosch University\MAPWAPS\DataChapter1\uMzi_ROI\data\SPOT_6\12092024_SPOT6\band_values'

# Create the folder if it doesn't exist
os.makedirs(csv_file_folder, exist_ok=True)

# Define the CSV file path
csv_file_path = os.path.join(csv_file_folder, 'rgb_pixel_values.csv')

print(f'DataFrame successfully saved to {csv_file_path}')


### Load CSV
##### Only use this block of code to load the .csv file if the code stopped/crash after extacting pixel values

In [None]:
# Read the CSV file into a DataFrame
#pixelvalues_df = pd.read_csv(r'E:\SPOT6_7\SPOT6_7\BandValues\pixel_values.csv')

# Display the DataFrame
#print(pixelvalues_df)

### This cell block trains the random forest classifier 

In [None]:
# Define features 
# Find the index of the last non-band column
last_non_band_index = pixelvalues_df.columns.tolist().index('Class_ID')  #

# Select columns starting from the column following the last non-band column
#These columns are the band features used in the classification
features = pixelvalues_df.iloc[:, last_non_band_index + 1:]
#print(features)

# Define target (Class ID column)
target = pixelvalues_df['Class_ID']

# Convert features and target columns to arrays
features_array = features.values
target_array = target.values

# Train the classifier using RandomForest with 500 trees
classifier = RandomForestClassifier(n_estimators=500)
classifier.fit(features_array, target_array)

### This cell block performs the classification on each image in the folder using the trained classifer in the previous cell

In [9]:
# Define the directory where you want to save the classified images
output_folder = r'C:\Users\SkosanaT\OneDrive - Stellenbosch University\MAPWAPS\DataChapter1\uMzi_ROI\data\SPOT_6\12092024_SPOT6\SPOT_Classified'

# Check if the output folder exists, if not, create it
if not os.path.exists(output_folder):
    os.makedirs(output_folder)

# Open the image using rasterio
with rasterio.open(added_indices) as src:
    # Get the shape of the image
    rows, cols = src.height, src.width

    # Define batch size (adjust as needed based on memory constraints)
    batch_size = 1000  # You can adjust this value

    # Initialize the classification result array for the entire image
    classification_result = np.zeros((rows, cols), dtype=np.uint8)

    # Iterate over the image in batches
    for row_start in range(0, rows, batch_size):
        for col_start in range(0, cols, batch_size):
            row_end = min(row_start + batch_size, rows)
            col_end = min(col_start + batch_size, cols)

            # Read the batch of bands
            bands = [src.read(band_idx, window=((row_start, row_end), (col_start, col_end))) 
                     for band_idx in range(1, src.count + 1)]

            # Stack the bands into a single array
            stacked_bands = np.stack(bands, axis=-1)

            # Reshape the array to 2D (rows, columns) for classification
            reshaped_bands = stacked_bands.reshape(-1, src.count)

            # Mask out no data values (pixels with a value of 0)
            valid_pixels_mask = np.all(reshaped_bands != 0, axis=1)
            valid_pixels = reshaped_bands[valid_pixels_mask]

            if valid_pixels.size > 0:
                # Predict using the trained classifier (replace this with your classifier)
                predicted_labels = classifier.predict(valid_pixels)

                # Create a temporary result array for this batch
                batch_result = np.zeros((row_end - row_start, col_end - col_start), dtype=np.uint8)

                # Fill in the valid pixels with the predicted labels
                batch_result.reshape(-1)[valid_pixels_mask] = predicted_labels

                # Write the batch result to the corresponding window in the full classification result array
                classification_result[row_start:row_end, col_start:col_end] = batch_result

    # Construct the output file path
    output_filename = os.path.basename(added_indices).replace('.tif', '_classified.tif')
    output_path = os.path.join(output_folder, output_filename)

    # Prepare metadata for the classified image
    meta = src.meta.copy()
    meta.update({
        'driver': 'GTiff',
        'dtype': 'uint8',  # Ensure data type is appropriate for classification results
        'count': 1,  # Single band
        'compress': 'lzw',  # Compression method (Lempel-Ziv-Welch)
        'nodata': 0,  # Optional: Set nodata value
        'crs': src.crs,  # Use the same CRS as the input raster
        'transform': src.transform,  # Use the same transform as the input raster
    })

    # Write the full classified image to a new GeoTIFF file
    with rasterio.open(output_path, 'w', **meta) as dst:
        dst.write(classification_result.astype('uint8'), 1)

print("Classification completed and classified image saved successfully!")


Classification completed and classified image saved successfully!
