# EUROSAT Cnn optimization


In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
# GPU Config
import tensorflow as tf

# Check available GPUs
gpus = tf.config.list_physical_devices('GPU')
if not gpus:
    print("No GPU devices found. Ensure your system recognizes the GPU.")
else:
    try:
        # Limit TensorFlow to use only the first GPU
        tf.config.set_visible_devices(gpus[0], 'GPU')
        print(f"Configured TensorFlow to use GPU: {gpus[0].name}")

        # Enable dynamic memory growth on the GPU
        tf.config.experimental.set_memory_growth(gpus[0], True)
        print("Memory growth enabled for the first GPU.")

        # Optional: Display additional GPU configuration details
        logical_gpus = tf.config.experimental.list_logical_devices('GPU')
        print(f"Physical GPUs: {len(gpus)}, Logical GPUs: {len(logical_gpus)}")

    except RuntimeError as e:
        print(f"RuntimeError during GPU setup: {e}")
    except Exception as e:
        print(f"An unexpected error occurred: {e}")

# Further GPU diagnostics
print("TensorFlow version:", tf.__version__)
print("CUDA device detected:", tf.test.is_built_with_cuda())
print("GPU availability:", tf.test.is_gpu_available(cuda_only=False, min_cuda_compute_capability=None))


In [None]:
gpus

# Download the data

In [None]:
import os

# Set your condition
download_condition = False  # Replace this with your actual condition

if download_condition:
    # Check if the 'data' directory exists, if not create it
    if not os.path.exists('./data'):
        os.makedirs('./data')

    # Download the file and unzip it
    !wget https://madm.dfki.de/files/sentinel/EuroSAT.zip -P /tmp/ --no-check-certificate
    !unzip -qq /tmp/EuroSAT.zip -d ./data/

else:
    print("Download condition not met. Skipping download.")


In [None]:
from eurosat_model import count_images_in_subdirectories
# Define the parent directory and subdirectories
parent_directory = './data/2750/'  # Change this to your directory path
subdirectories = ['AnnualCrop', 'Forest', 'HerbaceousVegetation', 'Highway', 
                  'Industrial', 'Pasture', 'PermanentCrop', 'Residential', 
                  'River', 'SeaLake']

# Count the total number of images
total_images = count_images_in_subdirectories(parent_directory, subdirectories)
print(f"Total number of images across all subdirectories: {total_images}")


# Load the data for exploration

In [None]:
import numpy as np
import os
from PIL import Image
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt

# Set the path to the EuroSAT dataset directory after unzipping
data_dir = './data/2750/'

# Define image size
img_size = (64, 64)  # EuroSAT images are 64x64

# Initialize empty lists for images and labels
images = []
labels = []
class_names = []

# Loop through each class directory and load images
for i, class_name in enumerate(sorted(os.listdir(data_dir))):
    class_path = os.path.join(data_dir, class_name)
    
    # Check if it's a directory
    if os.path.isdir(class_path):
        class_names.append(class_name)
        
        # Loop through all image files in the class directory
        for img_file in os.listdir(class_path):
            img_path = os.path.join(class_path, img_file)
            
            # Open and preprocess image
            img = Image.open(img_path).convert('RGB')
            img = img.resize(img_size)  # Resize image
            img = np.array(img) / 255.0  # Normalize pixel values
            
            # Append image and corresponding label
            images.append(img)
            labels.append(i)  # Label is the index of the class

# Convert lists to numpy arrays
images = np.array(images)
labels = np.array(labels)

# Split into training and validation sets
X_train, X_val, y_train, y_val = train_test_split(images, labels, test_size=0.2, random_state=42, stratify=labels)

print(f"Loaded {len(X_train)} training images and {len(X_val)} validation images.")
print(f"Class Names: {class_names}")

In [None]:
y_train[0] #label

In [None]:
plt.imshow(X_train[0]); #image

In [None]:
from eurosat_model import plot_images

# Plot a few images from the training dataset
plot_images(X_train, y_train, class_names, num_images=10)

# 1. Build a simple Cnn

In [None]:
# Import necessary functions from the eurosat_model script
from eurosat_model import (
    create_data_generators, 
    build_cnn_model,
    build_cnn_model_with_regularization, 
    train_model, 
    plot_training_history
    )

In [None]:
# Input shape and number of classes for EuroSAT
input_shape = (64, 64, 3)
num_classes = 10  # classes in EuroSAT

# Build and summarize the CNN model
model_A = build_cnn_model(input_shape, num_classes)
model_A.summary()

# 2. Data Augmentation

In [None]:
from tensorflow.keras.preprocessing.image import ImageDataGenerator

# Set the path to the EuroSAT dataset directory after unzipping
data_dir = './data/2750/'

# Define image size and batch size for preprocessing
img_size = (64, 64)  # EuroSAT images are 64x64
batch_size = 32

# Create an image data generator with data augmentation (not rescaling!)
datagen_aug = ImageDataGenerator(
    rotation_range=40,        # Rotate the image up to 40 degrees
    width_shift_range=0.2,    # Shift the image horizontally by 20% of the width
    height_shift_range=0.2,   # Shift the image vertically by 20% of the height
    shear_range=0.2,          # Apply shear transformations
    zoom_range=0.2,           # Zoom in/out by 20%
    horizontal_flip=True,     # Flip images horizontally
    fill_mode='nearest',      # Fill pixels after transformation
    validation_split=0.2      # Reserve 20% of the data for validation
)

# Load the training set
train_generator_aug = datagen_aug.flow_from_directory(
    data_dir,
    target_size=img_size,
    batch_size=batch_size,
    class_mode='categorical',
    subset='training'  # Use subset='training' to load the training set
)

# Load the validation set
val_generator = datagen_aug.flow_from_directory(
    data_dir,
    target_size=img_size,
    batch_size=batch_size,
    class_mode='categorical',
    subset='validation'  # Use subset='validation' to load the validation set
)

# Get the class names (directories inside the EuroSAT dataset folder)
class_names = list(train_generator_aug.class_indices.keys())
print("Class Names:", class_names)

In [None]:
from eurosat_model import visualize_augmented_images

In [None]:
# Extract one batch of images and labels for visualization
X_train_sample, y_train_sample = next(train_generator_aug)

# Use the function to visualize the original image and its augmentations for a specific index
visualize_augmented_images(X_train_sample, y_train_sample, index=2, datagen_aug=datagen_aug, class_names=class_names, num_augments=9)

## 3.2 Train & evaluate cnn with data augmentation

In [None]:
# Train the model with data augmentation
history = model_A.fit(
    train_generator_aug,  # Training data with augmentation
    validation_data=val_generator,  # Validation data
    epochs=10,  # the number of epochs
)

In [None]:
# Plot training history (accuracy and loss)
plt.figure(figsize=(12, 4))

# Accuracy plot
plt.subplot(1, 2, 1)
plt.plot(history.history['accuracy'], label='Training Accuracy')
plt.plot(history.history['val_accuracy'], label='Validation Accuracy')
plt.title('Training and Validation Accuracy')
plt.legend()

# Loss plot
plt.subplot(1, 2, 2)
plt.plot(history.history['loss'], label='Training Loss')
plt.plot(history.history['val_loss'], label='Validation Loss')
plt.title('Training and Validation Loss')
plt.legend()

plt.show()

In [None]:
# save the model
model_A.save('eurosat_conv_model_A.keras')

# 3 & 4. Preventing Overfitting: Regularization

**Techniques to Prevent Overfitting**

1. **Dropout:**

Dropout is a regularization technique that randomly sets a fraction of input units to zero during each forward and backward pass in training. This forces the network to not rely too heavily on any particular neuron, making the model more robust.
You can add Dropout layers after some of the dense or convolutional layers.

2. **L1/L2 Regularization:**

L1 (Lasso) and L2 (Ridge) regularization add a penalty to the loss function based on the size of the weights, discouraging the model from fitting the training data too closely.
L2 is more common and can be added to layers by setting the kernel_regularizer parameter.

3. **Batch Normalization:**

Batch Normalization normalizes the input of each layer, stabilizing the learning process and often improving both training speed and model performance.
It is usually added after convolutional or dense layers.

### Adjusted Cnn setup

In [None]:
# Set the directory of your dataset
data_dir = './data/2750/'

# Create data generators
train_generator, val_generator, class_names = create_data_generators(data_dir)

# Define input shape and number of classes
input_shape = (64, 64, 3)
num_classes = len(class_names)

# Build the model
model_B = build_cnn_model_with_regularization(input_shape, num_classes)

# Train the model
history = train_model(
    model=model_B, n_epochs=10, n_patience=5, 
    train_generator=train_generator, val_generator=val_generator
)

# Plot training history
plot_training_history(history)


### Why Regularization Can Cause Less Smooth Metric Curves

#### Regularization Adds Noise to Training:
- **Dropout**: Randomly drops units in the network during training, introducing noise and variability, which can cause training loss and accuracy to fluctuate from one batch to another.
- **L1/L2 Regularization**: Penalizes large weights, forcing the model to learn simpler representations. This can slow down learning or cause the model to oscillate as it adjusts weights under these constraints.

#### Regularization Prevents Overfitting, Not Noise:
- Regularization techniques are designed to reduce overfitting by making the model less sensitive to noise in the training data. However, during the training process, these techniques can introduce their own form of noise, leading to fluctuating metrics.
- The primary goal of regularization is to improve generalization, which might come at the cost of stability in training curves.

#### Immediate Impact vs. Long-Term Stability:
- Initially, regularization might make training and validation curves look more erratic because the model is learning under constraints.
- Over time, if regularization is effective, you should observe that the validation metrics (especially loss) improve or stabilize, even if the training metrics fluctuate.

#### Examples of Regularization Impact:
- **Dropout**: Causes each mini-batch to behave differently because different neurons are dropped randomly, which can lead to jumps in accuracy and loss.
- **L2 Regularization (Weight Decay)**: Shrinks weights towards zero, which can slow learning, causing small oscillations as the model gradually finds a balance between minimizing loss and keeping weights small.

#### Strategies to Mitigate Fluctuations While Using Regularization:
- **Gradual Learning Rate Decay**: Use learning rate schedules (like Exponential Decay) to gradually reduce the learning rate over time, helping to stabilize training as the model converges.
- **Increase Batch Size**: Larger batch sizes reduce the variance in gradient updates, leading to smoother training curves even with regularization.
- **Adjust Regularization Strength**: Fine-tune the dropout rate, L1/L2 penalties to find a balance where regularization helps without introducing excessive fluctuations.
- **Use Smoothing for Visualization**: Apply exponential smoothing when plotting metrics to better visualize the overall trends despite inherent fluctuations.

#### Key Takeaways:
- Regularization introduces controlled instability during training but helps in achieving better generalization.
- Fluctuations are normal and expected when regularization is applied; the goal is to see improved stability in validation metrics over the course of training.
- Focus on long-term trends rather than short-term noise when evaluating the effect of regularization on your model.

#### Visualizar los pesos

In [None]:
from eurosat_model import visualize_weights

# Visualizar los pesos de la 4 capa convolucional (Determinar si se observan patrones claros)
visualize_weights(model_B, layer_index=4)

### Cross validation

In [None]:
import pandas as pd
from sklearn.model_selection import KFold

# Configuración inicial
data_dir = './data/2750/'  # Directorio de datos
input_shape = (64, 64, 3)
batch_size = 32
num_folds = 3

# Cargar nombres de archivos y etiquetas de clase
all_images = []
all_labels = []

for class_index, class_name in enumerate(os.listdir(data_dir)):
    class_dir = os.path.join(data_dir, class_name)
    for image_name in os.listdir(class_dir):
        all_images.append(os.path.join(class_dir, image_name))
        all_labels.append(class_index)

# Convertir listas a arrays de numpy
all_images = np.array(all_images)
all_labels = np.array(all_labels)

# Variables para almacenar las métricas de cada pliegue
accuracy_per_fold = []
loss_per_fold = []

# Configurar KFold para la validación cruzada
kf = KFold(n_splits=num_folds, shuffle=True, random_state=42)

# Validación cruzada
for fold, (train_indices, val_indices) in enumerate(kf.split(all_images)):
    print(f'Training fold {fold+1}/{num_folds}...')

    # Dividir los datos en entrenamiento y validación para el pliegue actual
    train_images, val_images = all_images[train_indices], all_images[val_indices]
    train_labels, val_labels = all_labels[train_indices], all_labels[val_indices]

    # Convertir las etiquetas a tipo string para que flow_from_dataframe funcione correctamente
    train_df = pd.DataFrame({'filename': train_images, 'class': train_labels.astype(str)})
    val_df = pd.DataFrame({'filename': val_images, 'class': val_labels.astype(str)})

    # Crear generadores de datos
    datagen = ImageDataGenerator(rescale=1.0/255)
    
    train_generator = datagen.flow_from_dataframe(
        dataframe=train_df,
        x_col='filename',
        y_col='class',
        target_size=input_shape[:2],
        class_mode='categorical',
        batch_size=batch_size,
        shuffle=True
    )

    val_generator = datagen.flow_from_dataframe(
        dataframe=val_df,
        x_col='filename',
        y_col='class',
        target_size=input_shape[:2],
        class_mode='categorical',
        batch_size=batch_size,
        shuffle=False
    )

    # Construir el modelo para el pliegue actual
    model = build_cnn_model_with_regularization(input_shape, len(np.unique(all_labels)))

    # Entrenar el modelo en el pliegue actual
    history = train_model(
        model=model, n_epochs=20, n_patience=5,
        train_generator=train_generator, val_generator=val_generator
    )

    # Evaluar el modelo en los datos de validación del pliegue actual
    scores = model.evaluate(val_generator, verbose=0)
    print(f'Fold {fold+1} - Loss: {scores[0]} - Accuracy: {scores[1]}')

    # Guardar las métricas del pliegue actual
    accuracy_per_fold.append(scores[1])
    loss_per_fold.append(scores[0])

# Calcular precisión y pérdida promedio de todos los pliegues
average_accuracy = np.mean(accuracy_per_fold)
average_loss = np.mean(loss_per_fold)
print(f'Average Loss: {average_loss}')
print(f'Average Accuracy: {average_accuracy}')

In [None]:
# Model evaluation
loss, accuracy = model_B.evaluate(val_generator)
print(f"Loss: {loss}")
print(f"Accuracy: {accuracy}")

In [None]:
plot_training_history(history, smooth=True, smoothing_factor=0.8)

# 5. Fine tuning exploration

In [None]:
import tensorflow as tf 
print(tf.__version__)#2.17.0

In [None]:
#!pip install keras-tuner --upgrade

In [None]:
import kerastuner as kt
print("KerasTuner version:", kt.__version__)

In [None]:
from eurosat_model import build_model_with_hp
from keras_tuner.tuners import RandomSearch
import os
import shutil

# Defines if starting a fresh tuning session or resuming from a previous checkpoint
start_fresh_tuning_session = True

# Directory paths
tuning_results_dir = 'tuner_results/eurosat_tuning'
temp_val_dir = './temp_val_test_split'

# Restart the tuning session if needed
if start_fresh_tuning_session:
    # Remove the existing tuner results directory
    if os.path.exists(tuning_results_dir):
        shutil.rmtree(tuning_results_dir, ignore_errors=True)
        print("Restarted tuning session: Tuning results directory cleared.")

    # Remove the temporary validation and test split directory if it exists
    if os.path.exists(temp_val_dir):
        shutil.rmtree(temp_val_dir, ignore_errors=True)
        print("Temporary validation/test split directory cleared.")

print("Session setup completed.")

In [None]:
from eurosat_model import create_extended_generators

# Reset validation generator before tuning to avoid state issues
#val_generator.reset()

# Create the data generators
train_generator, val_generator, test_generator = create_extended_generators(
    data_dir=data_dir,
    img_size=(64, 64),
    batch_size=32,
    validation_split=0.2,
    test_split=0.1
)

In [None]:
from tensorflow.keras.callbacks import EarlyStopping, LearningRateScheduler, ModelCheckpoint

# Define steps per epoch
steps_per_epoch = len(train_generator)
validation_steps = val_generator.samples // val_generator.batch_size

# Enhanced Early Stopping callback
early_stopping = EarlyStopping(
    monitor='val_loss',        # Monitor validation loss
    patience=3,               # Allow more patience if gradual improvements occur
    restore_best_weights=True, # Restore best weights based on validation loss
    min_delta=0.001,           # Minimum change to be considered as an improvement
    verbose=1                  # Verbose output to provide more insights during training
)

# Model Checkpoint to save the best model during training
model_checkpoint = ModelCheckpoint(
    filepath='best_model.keras',  # Path to save the best model
    monitor='val_loss',        # Monitor the validation loss
    save_best_only=True,       # Save only the best model during training
    verbose=1                  # Show save messages
)

# Learning rate schedule callback (optional during tuning)
def lr_scheduler(epoch, lr):
    return lr * 0.9  # Reduce the learning rate gradually

lr_callback = LearningRateScheduler(lr_scheduler)

# Use both callbacks during model training
callbacks = [early_stopping, lr_callback, model_checkpoint]

In [None]:
# Perform the hyperparameter search
print("Starting hyperparameter search...")

# Initialize the tuner with the corrected parameters
tuner = RandomSearch(
    build_model_with_hp,
    objective='val_accuracy',
    max_trials=5,
    executions_per_trial=1,
    directory='tuner_results',
    project_name='eurosat_tuning',
    overwrite=True  # Ensures the session starts fresh
)

# Run the hyperparameter search
tuner.search(
    train_generator,
    validation_data=val_generator,
    epochs=8,  # Define your desired number of epochs per trial
    steps_per_epoch=steps_per_epoch,
    validation_steps=validation_steps,
    callbacks=callbacks #[early_stopping]  # Ensures early stopping will prevent overfitting
)

# Retrieve the best model found by the tuner
best_model = tuner.get_best_models(num_models=1)[0]
best_hyperparameters = tuner.get_best_hyperparameters(num_trials=1)[0]
print("Best Hyperparameters: ", best_hyperparameters.values)

# Compile the best model with tuned optimizer settings if needed
best_model.compile(
    optimizer=best_model.optimizer,  # Use the optimizer configuration from tuning
    loss='categorical_crossentropy',
    metrics=['accuracy']
)

# Retrain the best model on the combined training and validation data
print("Retraining the best model on the combined training and validation data...")
best_model.fit(
    train_generator,
    epochs=8,
    steps_per_epoch=steps_per_epoch,
    validation_data=val_generator,
    validation_steps=validation_steps,
    callbacks=callbacks,#[early_stopping, lr_callback],
    verbose=2
)

# Evaluate the best model on the test set
test_loss, test_accuracy = best_model.evaluate(test_generator)
print(f"Best Model Test Loss: {test_loss:.4f}, Test Accuracy: {test_accuracy:.4f}")

## 5.1 Simplier model with train, val, test data generators

In [None]:
# construcción del modelo CNN
# Input shape and number of classes for EuroSAT
input_shape = (64, 64, 3)
num_classes = 10  # classes in EuroSAT

# Build and summarize the CNN model
model_C = build_cnn_model(input_shape, num_classes)

model_C.summary()

In [None]:
# Compilar el modelo
model_C.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

# Entrenamiento del modelo
history = model_C.fit(
    train_generator,
    validation_data=val_generator,
    epochs=20,
    callbacks=[EarlyStopping(monitor='val_loss', patience=3)]
)

In [None]:
plot_training_history(history)

In [None]:
model_C.save('model_C.keras')

# 6. Test best model

In [None]:
from tensorflow.keras.models import load_model

# Load the best model
model_C = load_model('model_C.keras')


In [None]:
# Generate predictions
predictions = model_C.predict(test_generator)


In [None]:
# Convert predictions to class indices
predicted_classes = np.argmax(predictions, axis=1)

# Actual class labels
actual_classes = test_generator.classes

# Get the class labels from the generator
class_labels = list(test_generator.class_indices.keys())

# Convert predicted indices to class labels
predicted_labels = [class_labels[i] for i in predicted_classes]


In [None]:
# Create a DataFrame with actual and predicted labels
comparison_df = pd.DataFrame({
    'Actual Label': [class_labels[i] for i in actual_classes],
    'Predicted Label': predicted_labels
})

# Display a random sample of comparisons
sample_comparisons = comparison_df.sample(10)
print(sample_comparisons)

In [None]:
import matplotlib.pyplot as plt

# Get file paths from the test generator
file_paths = test_generator.filepaths

# Select a random subset for visualization
num_samples = 5
indices = np.random.choice(len(file_paths), num_samples, replace=False)

# Plot sample images with actual and predicted labels
plt.figure(figsize=(12, 8))
for i, idx in enumerate(indices):
    img = plt.imread(file_paths[idx])
    plt.subplot(1, num_samples, i+1)
    plt.imshow(img)
    plt.axis('off')
    plt.title(f"Actual: {comparison_df['Actual Label'][idx]}\nPredicted: {comparison_df['Predicted Label'][idx]}")
plt.show()


In [None]:
from sklearn.metrics import accuracy_score

# Calculate accuracy
accuracy = accuracy_score(actual_classes, predicted_classes)
print(f"Test Accuracy: {accuracy}")