In [None]:
!pip install transformers

In [None]:
import pandas as pd
import os
import cv2
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.utils import resample
import tensorflow as tf
import tqdm as tqdm
from transformers import AutoImageProcessor, TFViTModel, ViTForImageClassification, TFSegformerForSemanticSegmentation
from tensorflow.keras.utils import to_categorical
from tensorflow.keras import Sequential, Model
from tensorflow.keras.layers import Dense, Input, concatenate, Flatten, Conv2D, Dropout,GlobalAveragePooling2D
from sklearn.metrics import accuracy_score
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.preprocessing.image import ImageDataGenerator
import matplotlib.pyplot as plt
from sklearn.preprocessing import LabelEncoder

from tensorflow.keras.callbacks import LearningRateScheduler, EarlyStopping
from tensorflow.keras.applications import ResNet50
from sklearn.metrics import classification_report

In [None]:
folder = '/content/drive/MyDrive/Proyecto Cultivos/deepglobe/tiles-128/'

In [None]:
class_to_rgb = {
    'urban_land': (0, 255, 255),
    'agriculture_land': (255, 255, 0),
    'rangeland': (255, 0, 255),
    'forest_land': (0, 255, 0),
    'water': (0, 0, 255),
    'barren_land': (255, 255, 255),
    'unknown': (0, 0, 0)
}

In [None]:
# Load the data
loaded_data = np.load('/content/drive/MyDrive/Proyecto Cultivos/deepglobe/datacompressed-128_small.npz')
df_small = pd.read_csv('/content/drive/MyDrive/Proyecto Cultivos/deepglobe/df-128_small.csv')
# Access images and masks from the loaded data
loaded_images = loaded_data['images']

In [None]:
y = df_small['label'].values

In [None]:
# Transformamos las etiquetas en categóricas para poder pasarlas al modelo

label_encoder = LabelEncoder()
y_encoded = label_encoder.fit_transform(y)
y_one_hot = to_categorical(y_encoded, num_classes=7)

In [None]:
X_train, X_test, y_train, y_test = train_test_split(loaded_images, y_one_hot, test_size=0.15, random_state=42)
X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.15, random_state=42)

Extraemos los embeddings. Utilizaremos ViT de Google para ello:

In [None]:
image_processor = AutoImageProcessor.from_pretrained("google/vit-base-patch16-224-in21k")
modelViTClasif = TFViTModel.from_pretrained("google/vit-base-patch16-224-in21k")

All model checkpoint layers were used when initializing TFViTModel.

All the layers of TFViTModel were initialized from the model checkpoint at google/vit-base-patch16-224-in21k.
If your task is similar to the task the model of the checkpoint was trained on, you can already use TFViTModel for predictions without further training.


In [None]:
# Importante asegurarse en la extracción de embeddings que si algún valor resulta null, nan o inf, se debe descartar
# (podría darse alguna imagen con muy poca información y que son pequeñas)
# También descartar en ese caso su respectiva etiqueta.
from tqdm import tqdm

embeddings_train = []
filtered_labels_train = []
for image, label in tqdm(zip(X_train, y_train), desc="Processing..."):
    if np.isnan(image).any() or np.isinf(image).any():
        print(f"Skipping image due to NaN or Infinity values.")
        continue
    inputs = image_processor(image, return_tensors='tf', do_rescale=False)
    outputs = modelViTClasif(**inputs)
    last_hidden_states = outputs.last_hidden_state
    embedding = tf.reduce_mean(last_hidden_states, axis=1)
    if np.isnan(embedding).any() or np.isinf(embedding).any():
        print(f"Skipping image due to NaN or Infinity values in embedding.")
        continue
    embeddings_train.append(embedding.numpy())
    filtered_labels_train.append(label)

X_emb_img_train = np.vstack(embeddings_train)
y_train_filtered = np.array(filtered_labels_train)

  image = (image - mean) / std
Processing...: 19748it [2:21:14,  2.19it/s]

Skipping image due to NaN or Infinity values in embedding.


Processing...: 31070it [3:42:52,  2.32it/s]


In [None]:
# Guardamos los embeddings y las labels para utilizarlas después
np.save('/content/drive/MyDrive/Proyecto Cultivos/deepglobe/embeddings_train_small.npy', X_emb_img_train)
np.save('/content/drive/MyDrive/Proyecto Cultivos/deepglobe/y_train_filtered.npy', y_train_filtered)

In [None]:
# Se repite el procedimiento con las muestras de test y validación
from tqdm import tqdm

embeddings_val = []
filtered_labels_val = []
for image, label in tqdm(zip(X_val, y_val), desc="Processing..."):
    if np.isnan(image).any() or np.isinf(image).any():
        print(f"Skipping image due to NaN or Infinity values.")
        continue
    inputs = image_processor(image, return_tensors='tf', do_rescale=False)
    outputs = modelViTClasif(**inputs)
    last_hidden_states = outputs.last_hidden_state
    embedding = tf.reduce_mean(last_hidden_states, axis=1)
    embeddings_val.append(embedding.numpy())
    filtered_labels_val.append(label)

# Reconvertir los embeddings al formato para entrenar
X_emb_img_val = np.vstack(embeddings_val)
y_val_filtered = np.array(filtered_labels_val)

np.save('/content/drive/MyDrive/Proyecto Cultivos/deepglobe/embeddings_val_small.npy', X_emb_img_val)
np.save('/content/drive/MyDrive/Proyecto Cultivos/deepglobe/y_val_filtered.npy', y_val_filtered)

Processing...: 5484it [39:24,  2.32it/s]


In [None]:
from tqdm import tqdm

embeddings_test = []
filtered_labels_test = []
for image, label in tqdm(zip(X_test, y_test), desc="Processing..."):
    if np.isnan(image).any() or np.isinf(image).any():
        print(f"Skipping image due to NaN or Infinity values.")
        continue
    inputs = image_processor(image, return_tensors='tf', do_rescale=False)
    outputs = modelViTClasif(**inputs)
    last_hidden_states = outputs.last_hidden_state
    embedding = tf.reduce_mean(last_hidden_states, axis=1)
    embeddings_test.append(embedding.numpy())
    filtered_labels_test.append(label)

# Reconvertir los embeddings al formato para entrenar
X_emb_img_test = np.vstack(embeddings_test)
y_test_filtered = np.array(filtered_labels_test)

np.save('/content/drive/MyDrive/Proyecto Cultivos/deepglobe/embeddings_test_small.npy', X_emb_img_test)
np.save('/content/drive/MyDrive/Proyecto Cultivos/deepglobe/y_test_filtered.npy', y_test_filtered)

Processing...: 6451it [44:46,  2.40it/s]


In [None]:
# Cargamos embeddings y labels

embeddings_train = np.load('/content/drive/MyDrive/Proyecto Cultivos/deepglobe/embeddings_train_small.npy')
y_train_filtered = np.load('/content/drive/MyDrive/Proyecto Cultivos/deepglobe/y_train_filtered.npy')
embeddings_val = np.load('/content/drive/MyDrive/Proyecto Cultivos/deepglobe/embeddings_val_small.npy')
y_val_filtered = np.load('/content/drive/MyDrive/Proyecto Cultivos/deepglobe/y_val_filtered.npy')
embeddings_test = np.load('/content/drive/MyDrive/Proyecto Cultivos/deepglobe/embeddings_test_small.npy')
y_test_filtered = np.load('/content/drive/MyDrive/Proyecto Cultivos/deepglobe/y_test_filtered.npy')

In [None]:
print(f"embeddings_train: {embeddings_train.shape}")
print(f"y_train_filtered: {y_train_filtered.shape}")
print(f"embeddings_val: {embeddings_val.shape}")
print(f"y_val_filtered: {y_val_filtered.shape}")
print(f"embeddings_test: {embeddings_test.shape}")
print(f"y_test_filtered: {y_test_filtered.shape}")

embeddings_train: (31069, 768)
y_train_filtered: (31069, 7)
embeddings_val: (5484, 768)
y_val_filtered: (5484, 7)
embeddings_test: (6451, 768)
y_test_filtered: (6451, 7)


In [None]:
# Hacemos GridSearch para buscar unos buenos parámetros

inputs = Input(shape=(embeddings_train.shape[1],))
x = Dense(128, activation='relu')(inputs)
predictions = Dense(7, activation='softmax')(x)

modelViT = Model(inputs, predictions)


param_dist = {
    'learning_rate': np.random.uniform(0.0001, 0.1, size=10),
    'batch_size': [16, 32, 64],
    'num_hidden_units': [64, 128, 256],
    'dropout_rate': np.random.uniform(0.2, 0.5, size=10)
}

num_trials = 20
epochs = 100
best_hyperparameters = None
best_validation_accuracy = 0.0

for _ in range(num_trials):
  sampled_params = {
      'learning_rate': np.random.choice(param_dist['learning_rate']),
      'batch_size': np.random.choice(param_dist['batch_size']),
      'num_hidden_units': np.random.choice(param_dist['num_hidden_units']),
      'dropout_rate': np.random.choice(param_dist['dropout_rate'])
  }

  modelViT.compile(optimizer=Adam(learning_rate=sampled_params['learning_rate']), loss='categorical_crossentropy', metrics=['accuracy'])
  H = modelViT.fit(embeddings_train, y_train_filtered, validation_data=(embeddings_val, y_val_filtered), epochs=epochs, batch_size=sampled_params['batch_size'])

  val_probs = modelViT.predict(embeddings_val)
  val_labels_pred = np.argmax(val_probs, axis=1)
  y_val_filtered__ = np.argmax(y_val_filtered, axis=1)
  val_accuracy =accuracy_score(y_val_filtered__, val_labels_pred)

  if val_accuracy > best_validation_accuracy:
    best_validation_accuracy = val_accuracy
    best_hyperparameters = sampled_params

print("Best Hyperparameters:", best_hyperparameters)
print("Best Validation Accuracy:", best_validation_accuracy)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

In [None]:
best_num_hidden_units = 128
best_dropout_rate = 0.36513715711429373

inputs = Input(shape=(embeddings_train.shape[1],))
x = Dense(best_num_hidden_units, activation='relu')(inputs)
dropout_layer = Dropout(rate=best_dropout_rate)(x)

# Define the output layer (assuming it's a classification task)
predictions = Dense(7, activation='softmax')(dropout_layer)

# Create the functional model
modelViT = Model(inputs, predictions)

In [None]:
epochs = 10000
batch_size = 64


# Drop learning rate scheduler
def learning_rate_scheduler(epoch, learning_rate):
  if epoch % epochs_drop == 0 and epoch != 0:
    return learning_rate * drop_rate
  else:
    return learning_rate

initial_learning_rate =0.0669615757398038
epochs_drop = 150
drop_rate = 0.95
lr_scheduler = LearningRateScheduler(learning_rate_scheduler)
optimizer = Adam(learning_rate=initial_learning_rate)
# Early Stopping
early_stopping = EarlyStopping(monitor='val_loss', patience=400, restore_best_weights=True)


modelViT.compile(optimizer=optimizer, loss='categorical_crossentropy', metrics=['accuracy'])
H = modelViT.fit(embeddings_train, y_train_filtered, validation_data=(embeddings_val, y_val_filtered), epochs=epochs, batch_size=batch_size, callbacks=[early_stopping])


Epoch 1/10000
Epoch 2/10000
Epoch 3/10000
Epoch 4/10000
Epoch 5/10000
Epoch 6/10000
Epoch 7/10000
Epoch 8/10000
Epoch 9/10000
Epoch 10/10000
Epoch 11/10000
Epoch 12/10000
Epoch 13/10000
Epoch 14/10000
Epoch 15/10000
Epoch 16/10000
Epoch 17/10000
Epoch 18/10000
Epoch 19/10000
Epoch 20/10000
Epoch 21/10000
Epoch 22/10000
Epoch 23/10000
Epoch 24/10000
Epoch 25/10000
Epoch 26/10000
Epoch 27/10000
Epoch 28/10000
Epoch 29/10000
Epoch 30/10000
Epoch 31/10000
Epoch 32/10000
Epoch 33/10000
Epoch 34/10000
Epoch 35/10000
Epoch 36/10000
Epoch 37/10000
Epoch 38/10000
Epoch 39/10000
Epoch 40/10000
Epoch 41/10000
Epoch 42/10000
Epoch 43/10000
Epoch 44/10000
Epoch 45/10000
Epoch 46/10000
Epoch 47/10000
Epoch 48/10000
Epoch 49/10000
Epoch 50/10000
Epoch 51/10000
Epoch 52/10000
Epoch 53/10000
Epoch 54/10000
Epoch 55/10000
Epoch 56/10000
Epoch 57/10000
Epoch 58/10000
Epoch 59/10000
Epoch 60/10000
Epoch 61/10000
Epoch 62/10000
Epoch 63/10000
Epoch 64/10000
Epoch 65/10000
Epoch 66/10000
Epoch 67/10000
Epoc

In [None]:
print("Evaluación del modelo...")
y_pred_modelViT_val = modelViT.predict(X_emb_img_val, batch_size=64)
print(classification_report(y_val_filtered.argmax(axis=1), y_pred_modelViT_val.argmax(axis=1)))

y_pred_modelViT_test = modelViT.predict(X_emb_img_test, batch_size=64)
print(classification_report(y_test_filtered.argmax(axis=1), y_pred_modelViT_test.argmax(axis=1)))


Evaluación del modelo...
              precision    recall  f1-score   support

           0       0.93      0.34      0.49       898
           1       0.89      0.32      0.47       917
           2       0.95      0.61      0.74       909
           3       0.31      0.94      0.46       921
           4       1.00      0.59      0.74        22
           5       0.95      0.70      0.81       922
           6       0.98      0.82      0.90       895

    accuracy                           0.62      5484
   macro avg       0.86      0.62      0.66      5484
weighted avg       0.83      0.62      0.65      5484

              precision    recall  f1-score   support

           0       0.91      0.31      0.46      1065
           1       0.90      0.35      0.51      1044
           2       0.94      0.61      0.74      1040
           3       0.31      0.92      0.46      1093
           4       1.00      0.59      0.74        27
           5       0.93      0.71      0.80      1089

Guardamos los valores obtenidos para los hiperparámetros, para futuras iteraciones.
Los valores obtenidos en este modelo multiclase no son muy buenos, pero nos penaliza la clase unknown, que sería conveniente eliminar.

In [None]:
Best Hyperparameters: {'learning_rate': 0.013535321123055095, 'batch_size': 32, 'num_hidden_units': 128, 'dropout_rate': 0.22925637991902226}

In [None]:
modelViT.save('/content/drive/MyDrive/Proyecto Cultivos/deepglobe/model_vit_128_6classes.keras')
modelViT.save('/content/drive/MyDrive/Proyecto Cultivos/deepglobe/model_vit_128_6classes.h5')