In [11]:
import tensorflow as tf
import numpy as np
from sklearn.svm import SVC
from sklearn.metrics import classification_report
import shap  # SHAP library
from sklearn.model_selection import train_test_split

In [12]:
# Configuración de hiperparámetros
IMAGE_SIZE = (128, 128)
BATCH_SIZE = 32
DATASET_DIR = "arcgis-survey-images"
SEED = 123

In [13]:
# Cargar el conjunto de datos
def load_dataset(dataset_dir, image_size, batch_size, validation_split, subset, seed):
    return tf.keras.preprocessing.image_dataset_from_directory(
        dataset_dir,
        labels='inferred',
        image_size=image_size,
        validation_split=validation_split,
        subset=subset,
        seed=seed,
        batch_size=batch_size
    )

train_ds = load_dataset(DATASET_DIR, IMAGE_SIZE, BATCH_SIZE, validation_split=0.2, subset='training', seed=SEED)
validation_ds = load_dataset(DATASET_DIR, IMAGE_SIZE, BATCH_SIZE, validation_split=0.2, subset='validation', seed=SEED)

Found 2521 files belonging to 5 classes.
Using 2017 files for training.
Found 2521 files belonging to 5 classes.
Using 504 files for validation.


In [14]:
# Preprocesar los datos
def preprocess_dataset(dataset):
    X, y = [], []
    for images, labels in dataset:
        X.append(images.numpy())
        y.append(labels.numpy())
    X = np.concatenate(X)
    y = np.concatenate(y)
    return X, y

X_train, y_train = preprocess_dataset(train_ds)
X_val, y_val = preprocess_dataset(validation_ds)

In [15]:
# Aplanar las imágenes para usar con SVM
X_train_flat = X_train.reshape(X_train.shape[0], -1)
X_val_flat = X_val.reshape(X_val.shape[0], -1)

In [16]:
# División adicional del conjunto de validación para crear un conjunto de prueba
X_val_flat, X_test_flat, y_val, y_test = train_test_split(X_val_flat, y_val, test_size=0.5, random_state=SEED)

In [17]:
# Entrenar un SVM
svm_model = SVC(kernel='linear', probability=True, random_state=SEED)
svm_model.fit(X_train_flat, y_train)

In [18]:
# Evaluar el modelo en el conjunto de validación
y_pred_svm_val = svm_model.predict(X_val_flat)
print("Reporte de clasificación para el conjunto de validación:")
print(classification_report(y_val, y_pred_svm_val))

Reporte de clasificación para el conjunto de validación:
              precision    recall  f1-score   support

           0       0.61      0.65      0.63        52
           1       0.50      0.60      0.55        50
           2       0.59      0.43      0.50        51
           3       0.60      0.56      0.58        55
           4       0.64      0.68      0.66        44

    accuracy                           0.58       252
   macro avg       0.59      0.59      0.58       252
weighted avg       0.59      0.58      0.58       252



In [19]:
# Evaluar el modelo en el conjunto de prueba
y_pred_svm_test = svm_model.predict(X_test_flat)
print("Reporte de clasificación para el conjunto de prueba:")
print(classification_report(y_test, y_pred_svm_test))

Reporte de clasificación para el conjunto de prueba:
              precision    recall  f1-score   support

           0       0.75      0.72      0.73        57
           1       0.54      0.62      0.58        42
           2       0.56      0.42      0.48        52
           3       0.60      0.73      0.66        49
           4       0.64      0.62      0.63        52

    accuracy                           0.62       252
   macro avg       0.62      0.62      0.62       252
weighted avg       0.62      0.62      0.62       252



In [None]:
# SHAP para SVM
explainer = shap.KernelExplainer(svm_model.predict_proba, X_train_flat[:100])
shap_values = explainer.shap_values(X_val_flat[:5])

  0%|          | 0/5 [00:00<?, ?it/s]

In [None]:
# Graficar los valores SHAP
shap.summary_plot(shap_values, X_val_flat[:5])

In [None]:
# Graficar la importancia de las características
plt.figure(figsize=(10, 8))
shap.summary_plot(shap_values, X_val_flat[:5], plot_type="bar")
plt.show()
