In [None]:

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import ee
import joblib

import tensorflow as tf
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.callbacks import ReduceLROnPlateau

from sklearn.metrics import accuracy_score
from sklearn.ensemble import RandomForestClassifier
from sklearn.utils import shuffle
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay
from sklearn.preprocessing import MinMaxScaler
from sklearn.svm import SVC
from sklearn.model_selection import StratifiedKFold
from sklearn.model_selection import cross_val_score

In [None]:
# Authenticate and initialize the Earth Engine API
ee.Authenticate()
ee.Initialize(project='svevazanettieo1') #così apro il progetto esistente in GEE, non ne sto creando uno nuovo

In [None]:
# Function to convert Earth Engine FeatureCollection to Pandas DataFrame, eliminating the random column
def from_FeatureCollection_to_df(path):
    points = ee.FeatureCollection(path)
    points_ft = points.getInfo()['features']
    points_df = pd.json_normalize(points_ft)
    points_df = points_df.drop(columns=['properties.random'], errors='ignore')
    return points_df

training_points_df = from_FeatureCollection_to_df('projects/svevazanettieo1/assets/Dubai/Dubai_training_indexes')
validation_points_df = from_FeatureCollection_to_df('projects/svevazanettieo1/assets/Dubai/Dubai_validation_indexes')

print('Dataset size: \n')
print('Training set:', training_points_df.shape)
print('Validation set:', validation_points_df.shape)

In [None]:
# Extract feature columns (e.g., 'properties.B1', 'properties.B2', etc.)
feature_columns = [col for col in training_points_df.columns if col.startswith('properties.B') or col.startswith('properties.n')]
feature_columns

# Extract features and labels for training
X_train = training_points_df[feature_columns]
y_train = training_points_df['properties.LC']

# Extract features and labels for validation
X_val = validation_points_df[feature_columns]
y_val = validation_points_df['properties.LC']

In [None]:
scaler = MinMaxScaler()

# Normalize data
X_train = scaler.fit_transform(X_train)
X_val = scaler.transform(X_val)

joblib.dump(scaler, "scaler.save")

In [None]:
N_CLASSES = len(y_train.unique())
y_train = to_categorical(y_train, num_classes=N_CLASSES)
y_val = to_categorical(y_val, num_classes=N_CLASSES)

#qui vogliamo mischiare l'ordine dei dati, altrimenti tutti i primi sono dello stesso tipo, e così via

train_dataset = tf.data.Dataset.from_tensor_slices((X_train, y_train))
val_dataset = tf.data.Dataset.from_tensor_slices((X_val, y_val))

# Shuffle and batch the datasets
train_dataset_batch = train_dataset.shuffle(buffer_size=64).batch(8)
val_dataset_batch = val_dataset.batch(8)

# ANN1

In [None]:
model1 = tf.keras.models.Sequential([
    tf.keras.layers.Dense(64, activation='relu', input_shape=(X_train.shape[1],)),
    tf.keras.layers.Dropout(0.2),
    tf.keras.layers.Dense(N_CLASSES, activation='softmax')
])

# Compile the model with the specified loss function.
model1.compile(optimizer=tf.keras.optimizers.Adam(),
              loss='categorical_crossentropy',
              metrics=['accuracy'])

# Fitting the model
history1 = model1.fit(train_dataset_batch, validation_data=val_dataset_batch, epochs=100)
# Print model summary and save the model
model1.summary()
model1.save('model1.keras')

# ANN2

In [None]:
# Define the early stopping callback
early_stopping = EarlyStopping(
    monitor='val_loss',   # metric to monitor
    patience=15,          
    restore_best_weights=True  
)

# Define the callback
reduce_lr = ReduceLROnPlateau(
    monitor='val_loss',     # Watch validation loss
    factor=0.1,             # Drop LR by an order of magnitude (×0.1)
    patience=5,             # Wait 5 epochs with no improvement
    min_lr=1e-5,            # Lower bound on learning rate
    verbose=1
)
model2 = tf.keras.models.Sequential([
    tf.keras.layers.Dense(64, activation='relu', input_shape=(X_train.shape[1],)),
    tf.keras.layers.Dropout(0.2),
    tf.keras.layers.Dense(32, activation='relu'),
    tf.keras.layers.Dropout(0.2),
    tf.keras.layers.Dense(16, activation='relu'),
    tf.keras.layers.Dropout(0.2),
    tf.keras.layers.Dense(N_CLASSES, activation='softmax')
])

# Compile the model with the specified loss function.
model2.compile(optimizer=tf.keras.optimizers.Adam(),
              loss='categorical_crossentropy',
              metrics=['accuracy'])

# Fitting the model
history2 = model2.fit(train_dataset_batch, validation_data=val_dataset_batch, epochs=250, callbacks=[early_stopping, reduce_lr])
# Print model summary and save the model
model2.summary()
model2.save('model2_indexes.keras')

In [None]:
def loss_accuracy_graph(train_loss, val_loss, train_acc, val_acc):
    # Create a figure with two vertically stacked subplots
    fig, (ax1, ax2) = plt.subplots(nrows=2, ncols=1, figsize=(15, 6), sharex=True)

    # Plot training and validation loss
    ax1.plot(train_loss, label='Training loss', alpha=.8)
    ax1.plot(val_loss, label='Validation loss', alpha=.8)
    ax1.set_title('Loss')
    ax1.legend()
    ax1.grid(alpha=.3)

    # Plot training and validation accuracy
    ax2.plot(train_acc, label='Training accuracy', alpha=.8)
    ax2.plot(val_acc, label='Validation accuracy', alpha=.8)
    ax2.set_title('Accuracy')
    ax2.legend()
    ax2.grid(alpha=.3)

    # Adjust the layout and display the plot
    plt.tight_layout()
    plt.subplots_adjust(right=0.85)
    plt.show()

In [None]:
# Get training and validation accuracy and loss
train_acc_1 = history1.history['accuracy']

val_acc_1 = history1.history['val_accuracy']

train_loss_1 = history1.history['loss']

val_loss_1 = history1.history['val_loss']

loss_accuracy_graph(train_loss_1, val_loss_1, train_acc_1, val_acc_1)

In [None]:
# Get training and validation accuracy and loss
train_acc_2 = history2.history['accuracy']

val_acc_2 = history2.history['val_accuracy']

train_loss_2 = history2.history['loss']

val_loss_2 = history2.history['val_loss']

loss_accuracy_graph(train_loss_2, val_loss_2, train_acc_2, val_acc_2)


In [None]:
y_val_int = np.argmax(y_val, axis=1)

y_pred_ANN1 = model1.predict(X_val)
y_pred_ANN1 = np.argmax(y_pred_ANN1, axis=1)
val_accuracy_ANN1 = accuracy_score(y_val_int, y_pred_ANN1)
cm_ANN1 = confusion_matrix(y_val_int, y_pred_ANN1)

y_pred_ANN2 = model2.predict(X_val)
y_pred_ANN2 = np.argmax(y_pred_ANN2, axis=1)
val_accuracy_ANN2 = accuracy_score(y_val_int, y_pred_ANN2)
cm_ANN2 = confusion_matrix(y_val_int, y_pred_ANN2)

In [None]:
del model1
del history1
del model2
del history2

# SVM

In [None]:
#SVM
# Initialize an SVM with RBF kernel
svm = SVC(kernel='rbf', C=1.0, gamma='scale', random_state=42)

# Fit model
svm.fit(X_shuffled, y_shuffled)

# Predictions
y_pred_SVM = svm.predict(X_shuffled_val)
cm_SVM = confusion_matrix(y_shuffled_val, y_pred_SVM)
val_accuracy_SVM = accuracy_score(y_shuffled_val, y_pred_SVM)

# Accuracy
print("Validation Accuracy:", val_accuracy_SVM)

# RF

In [None]:
#Random Forest
# Hyperparameters
n_estimators = 100
max_depth = 10

# Initialize the Random Forest classifier
clf = RandomForestClassifier(n_estimators=n_estimators, max_depth=max_depth, oob_score=True)

clf_balanced = RandomForestClassifier(
    n_estimators=100,
    max_depth=10,
    class_weight='balanced'  # example for 5 classes
)

X_shuffled, y_shuffled = shuffle(X_train, y_train, random_state=42)
X_shuffled_val, y_shuffled_val = shuffle(X_val, y_val, random_state=42)

y_shuffled = np.argmax(y_shuffled, axis=1)
y_shuffled_val = np.argmax(y_shuffled_val, axis=1)

clf.fit(X_shuffled, y_shuffled)
clf_balanced.fit(X_shuffled, y_shuffled)

y_pred_RF = clf.predict(X_shuffled_val)

y_pred_RF_balanced = clf_balanced.predict(X_shuffled_val)

val_accuracy_RF = accuracy_score(y_shuffled_val, y_pred_RF)
cm_RF = confusion_matrix(y_shuffled_val, y_pred_RF)

val_accuracy_RF_balanced = accuracy_score(y_shuffled_val, y_pred_RF_balanced)
cm_RF_balanced = confusion_matrix(y_shuffled_val, y_pred_RF_balanced)

print("Validation Accuracy RF:", val_accuracy_RF)
print("Validation Accuracy RF (Balanced):", val_accuracy_RF_balanced)

In [None]:
# Suppose you have 4 confusion matrices
cm1 = cm_RF
cm2 = cm_RF_balanced

cms = [cm1, cm2]
titles = ['Random Forest', 'Random Forest (Balanced)']
fig, axes = plt.subplots(2, 1, figsize=(12, 10))  # 2x2 grid

for i, ax in enumerate(axes.flatten()):
    disp = ConfusionMatrixDisplay(confusion_matrix=cms[i], display_labels=['vegetation','water','soil','urban'])
    disp.plot(cmap="Blues", values_format="d", ax=ax, colorbar=False)  # use ax
    ax.set_title(titles[i])

plt.tight_layout()
plt.show()

# Comparison

In [None]:
cm1 = cm_ANN1
cm2 = cm_ANN2
cm3 = cm_SVM
cm4 = cm_RF

cms = [cm1, cm2, cm3, cm4]
titles = ['ANN1', 'ANN2', 'SVM', 'Random Forest']
fig, axes = plt.subplots(2, 2, figsize=(12, 10))  # 2x2 grid

for i, ax in enumerate(axes.flatten()):
    disp = ConfusionMatrixDisplay(confusion_matrix=cms[i], display_labels=['vegetation','water','soil','urban'])
    disp.plot(cmap="Blues", values_format="d", ax=ax, colorbar=False)  # use ax
    ax.set_title(titles[i])
plt.tight_layout()
plt.show()

In [None]:
print("Accuracy")
print("ANN1:", val_accuracy_ANN1)
print("ANN2:", val_accuracy_ANN2)
print("SVM:", val_accuracy_SVM)
print("Random Forest:", val_accuracy_RF)

# Tuning Final Model

In [None]:
y_train_cls = np.argmax(y_train, axis=1)
y_val_cls   = np.argmax(y_val, axis=1)

X = np.vstack((X_train, X_val))
y = np.hstack((y_train_cls, y_val_cls))

X_shuffled, y_shuffled = shuffle(X, y, random_state=42)

crossvalidation = StratifiedKFold(n_splits=30, shuffle=True)

# Initialize the Random Forest classifier
clf_1 = RandomForestClassifier(n_estimators=50, max_depth=10, oob_score=True)

# Evaluate the model using cross-validation
xval_score = cross_val_score(clf_1, X_shuffled, y_shuffled, cv=crossvalidation)

# Compute the basic statistics
accuracy_mean = np.average(xval_score)
accuracy_std = np.std(xval_score)


print('Random Forest 1')  # Print method name
print("========================================")
print(f"Accuracy (CV): {accuracy_mean:.3f} +/- {accuracy_std:.3f}")

#Second model 

# Initialize the Random Forest classifier
clf_1 = RandomForestClassifier(n_estimators=100, max_depth=10, oob_score=True)

# Evaluate the model using cross-validation
xval_score = cross_val_score(clf_1, X_shuffled, y_shuffled, cv=crossvalidation)

# Compute the basic statistics
accuracy_mean = np.average(xval_score)
accuracy_std = np.std(xval_score)


print('Random Forest 2')  # Print method name
print("========================================")
print(f"Accuracy (CV): {accuracy_mean:.3f} +/- {accuracy_std:.3f}")

#third model

# Initialize the Random Forest classifier
clf_1 = RandomForestClassifier(n_estimators=200, max_depth=10, oob_score=True)

# Evaluate the model using cross-validation
xval_score = cross_val_score(clf_1, X_shuffled, y_shuffled, cv=crossvalidation)

# Compute the basic statistics
accuracy_mean = np.average(xval_score)
accuracy_std = np.std(xval_score)


print('Random Forest 3')  # Print method name
print("========================================")
print(f"Accuracy (CV): {accuracy_mean:.3f} +/- {accuracy_std:.3f}")

In [None]:
# Initialize the Random Forest classifier
clf_1 = RandomForestClassifier(n_estimators=200, max_depth=5, oob_score=True)

# Evaluate the model using cross-validation
xval_score = cross_val_score(clf_1, X_shuffled, y_shuffled, cv=crossvalidation)

# Compute the basic statistics
accuracy_mean = np.average(xval_score)
accuracy_std = np.std(xval_score)

print('Random Forest 1')  # Print method name
print("========================================")
print(f"Accuracy (CV): {accuracy_mean:.3f} +/- {accuracy_std:.3f}")

#Second model 

# Initialize the Random Forest classifier
clf_1 = RandomForestClassifier(n_estimators=200, max_depth=10, oob_score=True)

# Evaluate the model using cross-validation
xval_score = cross_val_score(clf_1, X_shuffled, y_shuffled, cv=crossvalidation)

# Compute the basic statistics
accuracy_mean = np.average(xval_score)
accuracy_std = np.std(xval_score)


print('Random Forest 2')  # Print method name
print("========================================")
print(f"Accuracy (CV): {accuracy_mean:.3f} +/- {accuracy_std:.3f}")

#third model

# Initialize the Random Forest classifier
clf_1 = RandomForestClassifier(n_estimators=200, max_depth=15, oob_score=True)

# Evaluate the model using cross-validation
xval_score = cross_val_score(clf_1, X_shuffled, y_shuffled, cv=crossvalidation)

# Compute the basic statistics
accuracy_mean = np.average(xval_score)
accuracy_std = np.std(xval_score)


print('Random Forest 3')  # Print method name
print("========================================")
print(f"Accuracy (CV): {accuracy_mean:.3f} +/- {accuracy_std:.3f}")

In [None]:
# Best Hyperparameters
n_estimators = 200
max_depth = 10

# Initialize the Random Forest classifier
clf = RandomForestClassifier(n_estimators=n_estimators, max_depth=max_depth, oob_score=True)
clf.fit(X_shuffled, y_shuffled)

In [None]:
# Save model
joblib.dump(clf, "random_forest_model_train+val.pkl")