## Import packages

In [1]:
# Import packages
import argparse
import os
#import geoviews
import warnings
import rasterio as rio
import rasterio
import pandas as pd
import fiona
import matplotlib.pyplot as plt
import numpy.ma as ma
import numpy as np
import xarray as xr
import rioxarray as rxr
from shapely.geometry import mapping, box
import geopandas as gpd

warnings.simplefilter('ignore')

### Code loops through the folder and prints out metadata

In [2]:
# Define the path to the specific TIFF file
image_file_path = r'C:\Users\User\Documents\DataFusion\DataFusion\SimpleFusion\FusedImage\04102014fused_image.tif'

print(f"\nProcessing file: {mosaic_file_path}")

# Open the image
with rasterio.open(image_file_path) as img:
    # Print the original image metadata
    metadata = img.meta
    print("Metadata:", metadata)
    
    # Explicitly set NoData value to 0 (if it's not already set)
    nodata = 0
    
    # Loop through each band and print its statistics
    for band in range(1, img.count + 1):
        band_data = img.read(band)
        
        # Mask NoData values (0 in this case)
        band_data = np.ma.masked_equal(band_data, nodata)
        
        # Calculate statistics ignoring masked values (NoData)
        valid_data = band_data[~band_data.mask]
        
        if valid_data.size > 0:
            print(f"\nBand {band} Statistics:")
            print(f"Min: {valid_data.min()}")
            print(f"Max: {valid_data.max()}")
            print(f"Mean: {valid_data.mean()}")
            print(f"Standard Deviation: {valid_data.std()}")
            print(f"First 5 valid pixel values in Band {band}: {valid_data.flatten()[:5]}")
        else:
            print(f"\nBand {band} contains only NaN values.")



Processing file: C:\Users\User\Documents\DataFusion\DataFusion\SimpleFusion\FusedImage\04102014fused_image.tif
Metadata: {'driver': 'GTiff', 'dtype': 'float32', 'nodata': None, 'width': 3890, 'height': 4159, 'count': 28, 'crs': CRS.from_epsg(32735), 'transform': Affine(10.0, 0.0, 671770.0,
       0.0, -10.0, 6604990.0)}

Band 1 Statistics:
Min: 0.00019999999494757503
Max: 1.8167999982833862
Mean: 0.040264517995412356
Standard Deviation: 0.018357353086230356
First 5 valid pixel values in Band 1: [0.049400001764297485 0.04960000142455101 0.04619999974966049
 0.04830000177025795 0.05009999871253967]

Band 2 Statistics:
Min: 0.00019999999494757503
Max: 1.7128000259399414
Mean: 0.0718706353937605
Standard Deviation: 0.02367691043929165
First 5 valid pixel values in Band 2: [0.08479999750852585 0.08789999783039093 0.08160000294446945
 0.07829999923706055 0.08699999749660492]

Band 3 Statistics:
Min: 9.999999747378752e-05
Max: 1.6535999774932861
Mean: 0.0662405328309843
Standard Deviation: 0

### Normalize bands to from 0-1

In [3]:
# Function to normalize bands
def min_max_normalize_with_nodata(bands, nodata_value=0, new_nodata_value=-9999):
    normalized_bands = np.full_like(bands, new_nodata_value, dtype=np.float32)

    for i in range(bands.shape[0]):  # Loop over each band
        band = bands[i, :, :]
        
        # Create a mask for NoData values
        valid_mask = band != nodata_value
        
        # Calculate min and max values only from valid data
        if valid_mask.any():
            min_val = band[valid_mask].min()
            max_val = band[valid_mask].max()
            
            # Avoid division by zero
            if min_val != max_val:
                normalized_band = np.where(valid_mask, (band - min_val) / (max_val - min_val), new_nodata_value)
            else:
                normalized_band = np.where(valid_mask, 1.0, new_nodata_value)
        else:
            normalized_band = np.full(band.shape, new_nodata_value, dtype=np.float32)
        
        normalized_bands[i, :, :] = normalized_band
    
    return normalized_bands

# Path to your TIFF file
tif_path = r'C:\Users\User\Documents\DataFusion\DataFusion\SimpleFusion\FusedImage\04102014fused_image.tif'
output_tif_path = r'C:\Users\User\Documents\DataFusion\DataFusion\Normalized_Image\Normalized_FusedImage.tif'

with rasterio.open(tif_path) as src:
    # Read the bands from the TIFF file
    bands = src.read()  # Read all bands
    meta = src.meta.copy()  # Copy metadata

# Normalize the bands while handling NoData values
fused_bands_normalized = min_max_normalize_with_nodata(bands, nodata_value=0, new_nodata_value=-9999)

# Update metadata for the output TIFF
meta.update({
    'dtype': 'float32',  # Update data type
    'count': fused_bands_normalized.shape[0],  # Number of bands
})

# Save the normalized data to a new TIFF file
with rasterio.open(output_tif_path, 'w', **meta) as dst:
    dst.write(fused_bands_normalized)

print(f'Normalized TIFF saved to {output_tif_path}')


Normalized TIFF saved to C:\Users\User\Documents\DataFusion\DataFusion\Normalized_Image\Normalized_FusedImage.tif


### This code cell extracts pixel values

In [4]:
# Define paths
pntsshp_path = r'C:\Users\User\OneDrive - Stellenbosch University\MAPWAPS\DataChapter1\uMzi_ROI\Train_Val\Training_data\train_shapefile.shp'

# Open shapefile and extract points' coordinates [0]= x coordinate, [1]= y coordinate and attributes
with fiona.open(pntsshp_path, 'r') as shapefile:
    points = [[(point['geometry']['coordinates'][0], point['geometry']['coordinates'][1]),
               (int(point['properties']['ID']), point['properties']['Val_id']),
               (point['properties']['X'], point['properties']['Y'])] for point in shapefile]

# Initialize dictionary to store band pixel values associated with each point
point_pixel_values = {}

# Open the raster image
with rasterio.open(output_tif_path) as src:
    imagename = os.path.splitext(os.path.basename(output_tif_path))[0]  # Remove the file extension
    
    # Iterate over each band in the image
    for band in range(1, src.count + 1):
        
        # Iterate over each point and extract pixel values
        for point in points:
            row, col = src.index(point[0][0], point[0][1])  # Find the point within the raster image
            values = src.read(band, window=((row, row+1), (col, col+1)))  # Read the band values

            if values.size != 0:  # Check if bands are valid/not empty
                # Prepare key for the current point
                point_key = f"Point_id: {point[1][1]}, X: {point[2][0]}, Y: {point[2][1]}, Class_ID: {point[1][0]}"
                if point_key not in point_pixel_values:
                    point_pixel_values[point_key] = {}

                # Add pixel values to the dictionary
                point_pixel_values[point_key][f'{imagename}_Band_{band}'] = values[0][0]

# Print point_pixel_values
for point_key, pixel_values in point_pixel_values.items():
    print(f"Point: {point_key}, Pixel Values: {pixel_values}")


Point: Point_id: VAL_2887, X: 29.04363001, Y: -31.00724936, Class_ID: 2, Pixel Values: {'Normalized_FusedImage_Band_1': np.float32(0.0072663217), 'Normalized_FusedImage_Band_2': np.float32(0.016991708), 'Normalized_FusedImage_Band_3': np.float32(0.013244633), 'Normalized_FusedImage_Band_4': np.float32(0.07099656), 'Normalized_FusedImage_Band_5': np.float32(0.20262377), 'Normalized_FusedImage_Band_6': np.float32(0.24329078), 'Normalized_FusedImage_Band_7': np.float32(0.13759135), 'Normalized_FusedImage_Band_8': np.float32(0.31130534), 'Normalized_FusedImage_Band_9': np.float32(0.4460419), 'Normalized_FusedImage_Band_10': np.float32(0.12175069), 'Normalized_FusedImage_Band_11': np.float32(0.056024376), 'Normalized_FusedImage_Band_12': np.float32(nan), 'Normalized_FusedImage_Band_13': np.float32(0.34217125), 'Normalized_FusedImage_Band_14': np.float32(nan), 'Normalized_FusedImage_Band_15': np.float32(nan), 'Normalized_FusedImage_Band_16': np.float32(nan), 'Normalized_FusedImage_Band_17': 

### This cell changes point_pixel_value dictionary to a dataframe

In [5]:
# Create a list of dictionaries for DataFrame creation
data_list = []

for point_key, pixel_values in point_pixel_values.items():
    data_dict = {}
    # Extracting Point ID, X, Y, and Class ID from point_key
    point_id = point_key.split(',')[0].split(':')[1].strip()  
    x_coord = point_key.split(',')[1].split(':')[1].strip()
    y_coord = point_key.split(',')[2].split(':')[1].strip()
    class_id = point_key.split(',')[3].split(':')[1].strip()
    
    # Add extracted information as separate columns
    data_dict['Point'] = point_id
    data_dict['X'] = x_coord
    data_dict['Y'] = y_coord
    data_dict['Class_ID'] = class_id
    
    # Add band pixel values as columns
    data_dict.update(pixel_values)

    data_list.append(data_dict)

# Create DataFrame from list of dictionaries
pixelvalues_df = pd.DataFrame(data_list)

# Print the DataFrame
print(pixelvalues_df)

# Save the DataFrame to a CSV file
# Define the CSV file path
csv_file_path = r'C:\Users\User\Documents\DataFusion\DataFusion\bandvalues\04102024pixel_values.csv'

# Save the DataFrame to a CSV file
pixelvalues_df.to_csv(csv_file_path, index=False)

print(f'DataFrame successfully saved to {csv_file_path}')

        Point            X             Y Class_ID  \
0    VAL_2887  29.04363001  -31.00724936        2   
1     NVAL254  29.02381883  -31.00773088        2   
2        None   28.8769605  -30.69760225        7   
3     NVAL279  29.11025967  -30.90116542        5   
4    VAL_2214   29.1825931  -30.81179621        7   
..        ...          ...           ...      ...   
163  VAL_2224   29.1769613  -30.80323135        3   
164      None  29.04422791  -31.01097117        4   
165  VAL_1342  28.87787196  -30.69598924        7   
166      None  28.99859574  -30.89497315        8   
167  VAL_3303  28.99487763  -30.90048499        8   

     Normalized_FusedImage_Band_1  Normalized_FusedImage_Band_2  \
0                        0.007266                      0.016992   
1                        0.005450                      0.013780   
2                        0.020478                      0.046888   
3                        0.017175                      0.047413   
4                        0.0

### This cell block trains the random forest classifier 

In [6]:
# Define features 
# Find the index of the last non-band column
last_non_band_index = pixelvalues_df.columns.tolist().index('Class_ID')  #

# Select columns starting from the column following the last non-band column
#These columns are the band features used in the classification
features = pixelvalues_df.iloc[:, last_non_band_index + 1:]
#print(features)

# Define target (Class ID column)
target = pixelvalues_df['Class_ID']

# Convert features and target columns to arrays
features_array = features.values
target_array = target.values

# Train the classifier using RandomForest with 500 trees
classifier = RandomForestClassifier(n_estimators=500)
classifier.fit(features_array, target_array)

### This cell block performs the classification on each image in the folder using the trained classifer in the previous cell

In [7]:
# Define the directory where you want to save the classified images
output_folder_classified = r'C:\Users\User\Documents\DataFusion\DataFusion\Classified'
output_folder = r'C:\Users\User\Documents\DataFusion\DataFusion\AddedIndices'
os.makedirs(output_folder, exist_ok=True)
        
# Open the image using rasterio
with rasterio.open(output_tif_path) as src:
    # Get the shape of the image
    rows, cols = src.height, src.width

    # Define batch size (adjust as needed based on memory constraints)
    batch_size = 1000  # You can adjust this value

    # Initialize the classification result array for the entire image
    classification_result = np.zeros((rows, cols), dtype=np.uint8)

    # Iterate over the image in batches
    for row_start in range(0, rows, batch_size):
        for col_start in range(0, cols, batch_size):
            row_end = min(row_start + batch_size, rows)
            col_end = min(col_start + batch_size, cols)

            # Read the batch of bands
            bands = [src.read(band_idx, window=((row_start, row_end), (col_start, col_end))) 
                     for band_idx in range(1, src.count + 1)]

            # Stack the bands into a single array
            stacked_bands = np.stack(bands, axis=-1)

            # Reshape the array to 2D (rows, columns) for classification
            reshaped_bands = stacked_bands.reshape(-1, src.count)

            # Predict using the trained classifier (replace this with your classifier)
            predicted_labels = classifier.predict(reshaped_bands)

            # Create a temporary result array for this batch
            batch_result = predicted_labels.reshape(row_end - row_start, col_end - col_start)

            # Write the batch result to the corresponding window in the full classification result array
            classification_result[row_start:row_end, col_start:col_end] = batch_result

    # Construct the output file path
    output_filename = 'datafusion_classified10042014.tif'
    output_path = os.path.join(output_folder_classified, output_filename)

    # Prepare metadata for the classified image
    meta = src.meta.copy()
    meta.update({
        'driver': 'GTiff',
        'dtype': 'uint8',  # Ensure data type is appropriate for classification results
        'count': 1,  # Single band
        'compress': 'lzw',  # Compression method (Lempel-Ziv-Welch)
        'crs': src.crs,  # Use the same CRS as the input raster
        'transform': src.transform,  # Use the same transform as the input raster
    })

    # Remove the 'nodata' entry from the metadata if it exists
    meta.pop('nodata', None)

    # Write the full classified image to a new GeoTIFF file
    with rasterio.open(output_path, 'w', **meta) as dst:
        dst.write(classification_result.astype('uint8'), 1)

print(f"Classification completed and classified image '{output_filename}' saved successfully!")


Classification completed and classified image 'datafusion_classified10042014.tif' saved successfully!
