### Sk-learn Classification Script

In [8]:
#Import necessary libraries
import os
import geopandas as gpd
import rasterio
import numpy as np
from rasterio.features import geometry_mask
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
import autosklearn.classification
import matplotlib.pyplot as plt

### Load the Image

In [9]:
file_path = "/mnt/c/Users/User/Desktop/Master_Thesis_Christobal/Sentinel_Images/Wet_RGBNIR/Kaza_NDVI_Wet_season_Image.tif"
shapefile_path = "/mnt/c/Users/User/Desktop/Master_Thesis_Christobal/Shapefiles/training_data_kaza.shp"

In [10]:
if not os.path.exists(file_path):
    print('File does not exist:', file_path)
else:
    with rasterio.open(file_path) as src:
        image = src.read() #read all bands as a numpy array
        profile = src.profile #saves geospatial profile for later re-projection
        transform = src.transform # transforms mapping coordinates to pixels
        crs = src.crs # coordinate reference system (crs) 
        print('Image dimensions (Bands, Height, Width):', image.shape)

Image dimensions (Bands, Height, Width): (9, 1678, 2202)


### Load the Training Data

In [11]:
shapefile = gpd.read_file(shapefile_path)
if shapefile.crs != crs:
    shapefile = shapefile.to_crs(crs) # ensures crs matches that of the raster image

##### Extract the labels and their corresponding geometrics

In [12]:
labels = shapefile['Class'] # select column
geometries = shapefile.geometry

##### Extract training data from raster using shapefile

In [13]:
training_data = []
training_labels = []

for geometry, label in zip(geometries, labels):
    # create mask for the geometry
    mask = geometry_mask([geometry], transform=transform, invert=True, out_shape=(image.shape[1], image.shape[2]))

    # Extract pixel values within the geometry
    pixels = image[:, mask].T # Shape: (num_pixels, num_bands)
    training_data.append(pixels)
    training_labels.extend([label] * len(pixels)) # Repeat the label for all pixels

In [14]:
# Flatten training data into a single array
training_data = np.vstack(training_data)
training_labels = np.array(training_labels)

##### Prepare the data for classification

In [15]:
#Split into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(training_data, training_labels, test_size=0.3, random_state=42)

print(f'Training samples: {len(X_train)}, Testing samples: {len(X_test)}')

Training samples: 113, Testing samples: 49


##### Perform Classifcation using auto-sklearn

In [None]:
automl = autosklearn.classification.AutoSklearnClassifier(
    time_left_for_this_task=3600, # 1 hour time limit
    per_run_time_limit=300, # 5 minutes per model
    ensemble_size=50
)

print(' Training AutoSklearn Classifier...')
# Fit the AutoSklearn Classifier
automl.fit(X_train, y_train, random_state=42)

  automl = autosklearn.classification.AutoSklearnClassifier(


 Training AutoSklearn Classifier...


In [None]:
# Print the results
print('Auto-sklearn training complete.')
print('Best model:', automl.show_models())

##### Evaluate the Model

In [None]:
print('Evaluating the Model...')
y_pred = automl.predict(X_test)

In [None]:
# Generate classification report
print(classification_report(y_test, y_pred))

##### Apply the Model to the entire image

In [None]:
# Reshape the image into samples (rows = pixels, column = bands)
bands, height, width = image.shape
data = image.reshape(bands, height * width).T

In [None]:
# Predict the class for all valid pixels
predictions = automl.predict(data)

In [None]:
# Reconstruct the Classified image
classified_image = predictions.reshape(height, width)

##### Visualize the results

In [None]:
plt.figure(figsize=(10, 10))
plt.title('Classified Image')
plt.imshow(classified_image, cmap='tab20', interpolation='nearest')
plt.colorbar(label='Class Labels')
plt.show()

##### Save the classified image as a GeoTiff file

In [None]:
output_path = "/mnt/c/Users/User/Desktop/Master_Thesis_Christobal/Results/classified_image.tif"
with rasterio.open(
    output_path,
    'w',
    driver='GTiff',
    height=height,
    width=width,
    count=1,
    dtype=classified_image.dtype,
    crs=profile['crs'],
    transform=profile['transform']
) as dst:
    dst.write(classified_image, 1)

print(f"Classified image saved to: {output_path}")