In [54]:
import tensorflow as tf
from tensorflow.keras import layers, models
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.preprocessing import image_dataset_from_directory
from sklearn.metrics import classification_report
from sklearn.utils.class_weight import compute_class_weight
from keras.saving import register_keras_serializable

import numpy as np
from collections import Counter

In [58]:
img_size = (384, 384)
batch_size = 16

train_ds = image_dataset_from_directory(
    'C:/Users/Natalia/Desktop/PADIA/SIRIA_copernicus/dataset_soil/training',
    image_size=img_size,
    batch_size=batch_size,
    shuffle=True
)

val_ds = image_dataset_from_directory(
    'C:/Users/Natalia/Desktop/PADIA/SIRIA_copernicus/dataset_soil/validation',
    image_size=img_size,
    batch_size=batch_size,
    shuffle=False
)

test_ds = image_dataset_from_directory(
    'C:/Users/Natalia/Desktop/PADIA/SIRIA_copernicus/dataset_soil/testing',
    image_size=img_size,
    batch_size=batch_size,
    shuffle=False
)

# Calcular class weights y class distribution
def get_class_distribution(dataset):
    class_counts = {}
    total_samples = 0

    for images, labels in dataset:
        for label in labels.numpy():
            class_name = dataset.class_names[label]
            class_counts[class_name] = class_counts.get(class_name, 0) + 1
            total_samples += 1

    print("Distribución de clases:")
    for class_name, count in class_counts.items():
        percentage = (count / total_samples) * 100
        print(f"  {class_name}: {count} samples ({percentage:.1f}%)")

    return class_counts, total_samples

class_counts, total_samples = get_class_distribution(train_ds)

def calculate_moderate_class_weights(class_counts, class_names): #Calcular el class weights moderado para balancear

    y_integers = []
    for images, labels in train_ds:
        y_integers.extend(labels.numpy())


    full_weights = compute_class_weight(
        'balanced',
        classes=np.unique(y_integers),
        y=y_integers
    )

    moderate_weights = np.sqrt(full_weights)

    min_weight = np.min(moderate_weights) #Normalizar
    moderate_weights = moderate_weights / min_weight

    class_weight_dict = dict(zip(np.unique(y_integers), moderate_weights))

    print("\nClass weights moderados calculados:")
    for i, class_name in enumerate(class_names):
        print(f"  {class_name}: {class_weight_dict[i]:.3f}")

    return class_weight_dict

class_weights = calculate_moderate_class_weights(class_counts, train_ds.class_names)

@register_keras_serializable()
class RandomSaturation(layers.Layer):
    def __init__(self, factor_min=0.8, factor_max=1.2, **kwargs):
        super().__init__(**kwargs)
        self.factor_min = factor_min
        self.factor_max = factor_max

    def call(self, inputs, training=None):
        if training:
            factor = tf.random.uniform([], self.factor_min, self.factor_max)
            return tf.image.adjust_saturation(inputs, factor)
        return inputs

    def get_config(self):
        config = super().get_config()
        config.update({
            'factor_min': self.factor_min,
            'factor_max': self.factor_max
        })
        return config

@register_keras_serializable()
class RandomHue(layers.Layer):
    def __init__(self, delta_min=-0.05, delta_max=0.05, **kwargs):
        super().__init__(**kwargs)
        self.delta_min = delta_min
        self.delta_max = delta_max

    def call(self, inputs, training=None):
        if training:
            delta = tf.random.uniform([], self.delta_min, self.delta_max)
            return tf.image.adjust_hue(inputs, delta)
        return inputs

    def get_config(self):
        config = super().get_config()
        config.update({
            'delta_min': self.delta_min,
            'delta_max': self.delta_max
        })
        return config


data_augmentation = tf.keras.Sequential([
    layers.RandomFlip("horizontal_and_vertical"),
    layers.RandomRotation(0.3),  # Aumentado de 0.2 a 0.3
    layers.RandomZoom(0.2),      # Aumentado de 0.15 a 0.2
    layers.RandomContrast(0.2),  # Aumentado de 0.15 a 0.2
    layers.RandomBrightness(0.15), # Aumentado de 0.1 a 0.15
    layers.RandomTranslation(0.15, 0.15), # Aumentado de 0.1 a 0.15
    RandomSaturation(0.7, 1.3),  # Más agresivo: de (0.8,1.2) a (0.7,1.3)
    RandomHue(-0.08, 0.08),      # Más agresivo: de (-0.05,0.05) a (-0.08,0.08)
    # NUEVO: Añadir ruido gaussiano ocasional
    layers.GaussianNoise(0.01),
])

def preprocess(image, label):
    image = tf.keras.applications.efficientnet.preprocess_input(image)
    return image, label


class_names = train_ds.class_names
train_ds = train_ds.map(preprocess)
val_ds = val_ds.map(preprocess)
test_ds = test_ds.map(preprocess)


from keras.src.applications.efficientnet import EfficientNetB0
base_model = EfficientNetB0(
    include_top=False,
    weights='imagenet',
    input_shape=(384, 384, 3)
)

base_model.trainable = False



model = models.Sequential([
    data_augmentation,
    layers.Resizing(384, 384),
    base_model,
    layers.GlobalAveragePooling2D(),

    layers.Dense(256, activation='relu', kernel_regularizer=tf.keras.regularizers.l2(0.01)),
    layers.Dropout(0.6),
    layers.BatchNormalization(),

    layers.Dense(128, activation='relu', kernel_regularizer=tf.keras.regularizers.l2(0.01)),
    layers.Dropout(0.5),
    layers.BatchNormalization(),

    # MEJORA 4: Capa adicional más pequeña
    layers.Dense(64, activation='relu',
                kernel_regularizer=tf.keras.regularizers.l2(0.01)),
    layers.Dropout(0.3),

    layers.Dense(len(class_names), activation='softmax')
])

model.compile(
    optimizer=tf.keras.optimizers.Adam(learning_rate=0.0005, weight_decay=0.0001),
    loss='sparse_categorical_crossentropy',
    metrics=['accuracy']
)

early_stopping = EarlyStopping(
    monitor='val_loss',
    patience=10,
    restore_best_weights=True,
    verbose=1,
    min_delta=0.001
)

reduce_lr = tf.keras.callbacks.ReduceLROnPlateau(
    monitor='val_loss',
    factor=0.3,
    patience=5,
    min_lr=1e-9,
    verbose=1
)


print("\ntraining con class weights")
history = model.fit(
    train_ds,
    validation_data=val_ds,
    epochs=20,
    callbacks=[early_stopping, reduce_lr],
    class_weight=class_weights,
    verbose=1
)

def calculate_detailed_metrics(model, dataset, dataset_name, class_names):
    print(f"\n{dataset_name}")

    y_true = []
    y_pred = []
    y_prob = []

    for images, labels in dataset:
        predictions = model.predict(images, verbose=0)
        predicted_classes = np.argmax(predictions, axis=1)

        y_true.extend(labels.numpy())
        y_pred.extend(predicted_classes)
        y_prob.extend(predictions)

    y_true = np.array(y_true)
    y_pred = np.array(y_pred)
    y_prob = np.array(y_prob)

    accuracy = np.mean(y_true == y_pred)
    print(f"Accuracy: {accuracy:.4f} ({accuracy:.2%})")

    print("\nClassification report:")
    report = classification_report(y_true, y_pred,
                                 target_names=class_names,
                                 digits=4)
    print(report)

    print(f"\nConfianza promedi:")
    for i, class_name in enumerate(class_names):
        class_mask = y_true == i
        if np.sum(class_mask) > 0:
            avg_confidence = np.mean(np.max(y_prob[class_mask], axis=1))
            print(f"  {class_name}: {avg_confidence:.4f}")

print("despues de entrenamiento balanceado")
val_loss, val_acc = model.evaluate(val_ds)
print(f"Precisión en validación: {val_acc:.2%}")

test_loss, test_acc = model.evaluate(test_ds)
print(f"Precisión en test: {test_acc:.2%}")

calculate_detailed_metrics(model, val_ds, "Validación", class_names)
calculate_detailed_metrics(model, test_ds, "Test", class_names)


Found 1131 files belonging to 4 classes.
Found 169 files belonging to 4 classes.
Found 169 files belonging to 4 classes.
Distribución de clases:
  Alluvial soil: 495 samples (43.8%)
  Black Soil: 228 samples (20.2%)
  Clay soil: 169 samples (14.9%)
  Red soil: 239 samples (21.1%)

Class weights moderados calculados:
  Alluvial soil: 1.000
  Black Soil: 1.473
  Clay soil: 1.711
  Red soil: 1.439

training con class weights
Epoch 1/20
[1m71/71[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m52s[0m 667ms/step - accuracy: 0.3239 - loss: 8.8269 - val_accuracy: 0.7101 - val_loss: 7.4976 - learning_rate: 5.0000e-04
Epoch 2/20
[1m71/71[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m46s[0m 643ms/step - accuracy: 0.4433 - loss: 8.0053 - val_accuracy: 0.7751 - val_loss: 6.8889 - learning_rate: 5.0000e-04
Epoch 3/20
[1m71/71[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m47s[0m 665ms/step - accuracy: 0.5509 - loss: 7.3661 - val_accuracy: 0.8402 - val_loss: 6.3306 - learning_rate: 5.0000e-04
E

In [60]:
class_names = class_names

In [61]:
print("Class names:", class_names)
print("Number of classes:", len(class_names))

Class names: ['Alluvial soil', 'Black Soil', 'Clay soil', 'Red soil']
Number of classes: 4


In [62]:
import json

# Crear diccionario con los índices de clase
class_names = class_names
class_indices = {name: idx for idx, name in enumerate(class_names)}

# Guardar en JSON
with open("class_indices.json", "w") as f:
    json.dump(class_indices, f)

print("class_indices.json")

class_indices.json


In [63]:
model.save("soil_classifier.keras")

In [64]:

model = tf.keras.models.load_model("soil_classifier.keras")

In [65]:
from tensorflow.keras.utils import load_img, img_to_array
from tensorflow.keras.applications.efficientnet import preprocess_input
import numpy as np
import tensorflow as tf

img_path = "C:/Users/Natalia/Desktop/PADIA/SIRIA_copernicus/dataset_soil/testing/Clay soil/Clay_13.jpg"  # Cambia esto por la ruta de tu imagen
img_size = (384,384)

# Cargar imagen y redimensionar
img = load_img(img_path, target_size=img_size)

# Convertir a array y escalar
img_array = img_to_array(img)
img_array = preprocess_input(img_array)  # Normalizar igual que en el modelo

# Expandir dimensiones para simular un batch de tamaño 1
img_array = np.expand_dims(img_array, axis=0)

predictions = model.predict(img_array)
class_names = class_names

predicted_class = np.argmax(predictions[0])
confidence = 100 * np.max(predictions[0])

print(f"La imagen probablemente pertenece a '{class_names[predicted_class]}' con una confianza de {confidence:.2f}%")

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 845ms/step
La imagen probablemente pertenece a 'Clay soil' con una confianza de 96.85%
