In [None]:
# Imports
import os
import random

import numpy as np
from tensorflow.keras.applications import VGG16
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Dense, Flatten, Dropout
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint
from tensorflow.keras.models import load_model
from tensorflow.keras.preprocessing import image


import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import confusion_matrix, classification_report

import kagglehub
from google.colab import files

In [None]:
# Download latest version
path = kagglehub.dataset_download("apollo2506/eurosat-dataset")

print("Path to dataset files:", path)

Downloading from https://www.kaggle.com/api/v1/datasets/download/apollo2506/eurosat-dataset?dataset_version_number=6...


100%|██████████| 2.04G/2.04G [00:18<00:00, 119MB/s]

Extracting files...





Path to dataset files: /root/.cache/kagglehub/datasets/apollo2506/eurosat-dataset/versions/6


In [None]:
# 1. Nastavení správné cesty ke složce s obrázky
DATASET_PATH = "/root/.cache/kagglehub/datasets/apollo2506/eurosat-dataset/versions/6"
print("Dataset obsahuje:", os.listdir(DATASET_PATH))

Dataset obsahuje: ['EuroSATallBands', 'EuroSAT']


In [None]:
# 2. Path to image folders
data_dir = os.path.join(DATASET_PATH, "EuroSAT")

# 3. Dataset parameters
IMG_SIZE = (64, 64)
BATCH_SIZE = 32
NUM_CLASSES = 10

In [None]:
# 4. Loading the dataset and preparing data using ImageDataGenerator
train_datagen = ImageDataGenerator(
    rescale=1./255,
    rotation_range=20,         # slight rotation – satellite angles may vary
    zoom_range=0.1,            # slight zoom in/out
    horizontal_flip=True,      # satellite images are symmetrical – horizontal flip makes sense
    fill_mode='nearest',
    validation_split=0.2)

train_generator = train_datagen.flow_from_directory(
    data_dir,
    shuffle=True,
    target_size=IMG_SIZE,
    batch_size=BATCH_SIZE,
    class_mode='categorical',
    subset='training'
)

val_generator = train_datagen.flow_from_directory(
    data_dir,
    shuffle=False,            # keep for accurate evaluation
    target_size=IMG_SIZE,
    batch_size=BATCH_SIZE,
    class_mode='categorical',
    subset='validation'
)

Found 21600 images belonging to 10 classes.
Found 5400 images belonging to 10 classes.


In [None]:
# 5. Load pre-trained VGG16 model without classification layers
base_model = VGG16(weights="imagenet", include_top=False, input_shape=(64, 64, 3))

# 6. Freeze convolutional layers (feature extractor)
for layer in base_model.layers:
    layer.trainable = True

# 7. Add custom classification head
x = Flatten()(base_model.output)
x = Dense(128, activation='relu')(x)
x = Dropout(0.5)(x)
outputs = Dense(NUM_CLASSES, activation='softmax')(x)  # Uložení do 'outputs'

# 8. Model assembly
model = Model(inputs=base_model.input, outputs=outputs)  # Použití 'outputs'

Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/vgg16/vgg16_weights_tf_dim_ordering_tf_kernels_notop.h5
[1m58889256/58889256[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step


In [None]:
# 9. Model compilation
model.compile(optimizer=Adam(learning_rate=0.00005),
              loss="categorical_crossentropy",
              metrics=["accuracy"])
model.summary()

In [None]:
# 11. Define EarlyStopping callback
early_stopping = EarlyStopping(
    monitor="val_accuracy",
    patience=10,
    min_delta=0.003,
    restore_best_weights=True # Restores the best model
)

In [None]:
# 12. Define callback to save the best model
checkpoint = ModelCheckpoint(
    filepath="FINAL_best.keras",
    monitor="val_accuracy",
    save_best_only=True,
    save_weights_only=False,
    verbose=1
)

In [None]:
# 13. Model training
EPOCHS = 100
history = model.fit(train_generator, epochs=EPOCHS, validation_data=val_generator,verbose=1, callbacks=[early_stopping, checkpoint])

  self._warn_if_super_not_called()


Epoch 1/100
[1m675/675[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 74ms/step - accuracy: 0.6347 - loss: 1.0642
Epoch 1: val_accuracy improved from -inf to 0.83741, saving model to FINAL_best.keras
[1m675/675[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m78s[0m 95ms/step - accuracy: 0.6349 - loss: 1.0636 - val_accuracy: 0.8374 - val_loss: 0.5180
Epoch 2/100
[1m675/675[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 79ms/step - accuracy: 0.9119 - loss: 0.3044
Epoch 2: val_accuracy improved from 0.83741 to 0.92778, saving model to FINAL_best.keras
[1m675/675[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m72s[0m 94ms/step - accuracy: 0.9119 - loss: 0.3043 - val_accuracy: 0.9278 - val_loss: 0.2439
Epoch 3/100
[1m675/675[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 76ms/step - accuracy: 0.9370 - loss: 0.2213
Epoch 3: val_accuracy improved from 0.92778 to 0.94185, saving model to FINAL_best.keras
[1m675/675[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m79s[

In [None]:
# Saving the history (only the history.history dictionary, because the history object itself is not serializable)
import pickle
with open('training_history.pkl', 'wb') as f:
    pickle.dump(history.history, f)

In [None]:
# 14. Model evaluation on the validation set
val_loss, val_acc = model.evaluate(val_generator)
print(f"Validation accuracy: {val_acc:.4f}")

[1m169/169[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 57ms/step - accuracy: 0.9743 - loss: 0.0909
Validační přesnost: 0.9752


In [None]:
# 16. Získání jmen tříd
class_indices = train_generator.class_indices

In [None]:
# 18. Vizualizace trénování – Graf přesnosti a ztráty
def plot_training_history(history):
    fig, axs = plt.subplots(1, 2, figsize=(12, 5))

    # Graf ztráty (Loss)
    axs[0].plot(history.history['loss'], label='Training loss')
    axs[0].plot(history.history['val_loss'], label='Validation loss')
    axs[0].set_title('Loss Over Epochs')
    axs[0].set_xlabel('Epoch')
    axs[0].set_ylabel('Loss')
    axs[0].legend()

    # Graf přesnosti (Accuracy)
    axs[1].plot(history.history['accuracy'], label='Training Accuracy')
    axs[1].plot(history.history['val_accuracy'], label='Validation Accuracy')
    axs[1].set_title('Accuracy Over Epochs')
    axs[1].set_xlabel('Epoch')
    axs[1].set_ylabel('Accuracy')
    axs[1].legend()

    plt.show()

In [None]:
# Getting true and predicted values
y_true = val_generator.classes
y_pred_probs = model.predict(val_generator)
y_pred = y_pred_probs.argmax(axis=1)  # Most probable classes

# Confusion matrix
cm = confusion_matrix(y_true, y_pred)
labels = list(val_generator.class_indices.keys())

plt.figure(figsize=(8, 6))
sns.heatmap(cm, annot=True, fmt="d", cmap="Blues", xticklabels=labels, yticklabels=labels)
plt.xlabel("Predicted Classes")
plt.ylabel("True Classes")
plt.title("Confusion Matrix")
plt.show()

# Print classification report
print("\nClassification Report:")
print(classification_report(y_true, y_pred, target_names=labels))

In [None]:
# 19. Function to plot the confusion matrix
def plot_confusion_matrix(model, val_generator):
    # Getting true and predicted values
    y_true = val_generator.classes
    y_pred_probs = model.predict(val_generator)
    y_pred = y_pred_probs.argmax(axis=1)  # Most probable classes

    # Confusion matrix
    cm = confusion_matrix(y_true, y_pred)
    labels = list(val_generator.class_indices.keys())

    plt.figure(figsize=(8, 6))
    sns.heatmap(cm, annot=True, fmt="d", cmap="Blues", xticklabels=labels, yticklabels=labels)
    plt.xlabel("Predikované třídy")
    plt.ylabel("Skutečné třídy")
    plt.title("Matice záměn")
    plt.show()

    # Print classification report
    print("\nClassification report:")
    print(classification_report(y_true, y_pred, target_names=labels))

In [None]:
# Downloading the model to computer
files.download("FINAL_best.keras")

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [None]:
# 20. Launching visualizations
plot_training_history(history)  # Accuracy and loss graph
# plot_confusion_matrix(model, val_generator)  # Confusion matrix

In [None]:
# Loading the best saved model – I uploaded the file to Colab
# model = load_model('/content/FINAL_best.keras')
# model.summary()

In [None]:
# Class names
class_names = ['AnnualCrop', 'Forest', 'HerbaceousVegetation', 'Highway', 'Industrial',
               'Pasture', 'PermanentCrop', 'Residential', 'River', 'SeaLake']

# Randomly select 10 images and get their true label (folder name)
image_paths = []
for class_name in os.listdir(data_dir):
    class_folder = os.path.join(data_dir, class_name)
    if os.path.isdir(class_folder):
        images = [os.path.join(class_folder, f) for f in os.listdir(class_folder) if f.endswith(('.jpg', '.png'))]
        for img_path in images:
            image_paths.append((img_path, class_name))  # Save the label as well

# Select one random image from each of the 10 classes
selected_images = []
for class_name in class_names:
    class_folder = os.path.join(data_dir, class_name)
    images = [os.path.join(class_folder, f) for f in os.listdir(class_folder) if f.endswith(('.jpg', '.png'))]
    if images:
        img_path = random.choice(images)
        selected_images.append((img_path, class_name))

# Display images with both prediction and true label
plt.figure(figsize=(15, 6))
for i, (img_path, true_label) in enumerate(selected_images):
    img = image.load_img(img_path, target_size=(64, 64))
    img_array = image.img_to_array(img)
    img_array = np.expand_dims(img_array, axis=0) / 255.0

    prediction = model.predict(img_array)
    predicted_class = class_names[np.argmax(prediction)]

    plt.subplot(2, 5, i + 1)
    plt.imshow(img)
    plt.axis('off')
    plt.title(f'True: {true_label}\nPredicted: {predicted_class}')

plt.tight_layout()
plt.show()

## Vizualizace nejčastějších záměn

In [None]:
# Most frequent misclassification – prints the most common misclassifications

# Get the most frequent incorrect prediction (ignore the diagonal = correct predictions)
cm_copy = cm.copy()
np.fill_diagonal(cm_copy, 0)  # Remove correct classifications

max_confusion = np.unravel_index(np.argmax(cm_copy), cm_copy.shape)

true_class_idx, pred_class_idx = max_confusion
true_class = labels[true_class_idx]
pred_class = labels[pred_class_idx]

print(f"Most frequent misclassification: True class = '{true_class}', Predicted = '{pred_class}'")

Nejčastější záměna: Skutečná třída = 'PermanentCrop', Predikovaná = 'AnnualCrop'


In [None]:
# Find specific images where misclassification occurred

# Convert generator output to DataFrame (if you're using flow_from_dataframe, you can use val_df directly)
import pandas as pd
val_filenames = val_generator.filenames  # paths to images
df_val = pd.DataFrame({
    "filename": val_filenames,
    "true_label": y_true,
    "pred_label": y_pred
})

# Select examples where a specific misclassification occurred
mistakes = df_val[(df_val["true_label"] == true_class_idx) & (df_val["pred_label"] == pred_class_idx)]
mistakes

Unnamed: 0,filename,true_label,pred_label
3203,PermanentCrop/PermanentCrop_1000.jpg,6,0
3217,PermanentCrop/PermanentCrop_1013.jpg,6,0
3256,PermanentCrop/PermanentCrop_1049.jpg,6,0
3283,PermanentCrop/PermanentCrop_1073.jpg,6,0
3292,PermanentCrop/PermanentCrop_1081.jpg,6,0
3305,PermanentCrop/PermanentCrop_1093.jpg,6,0
3312,PermanentCrop/PermanentCrop_11.jpg,6,0
3359,PermanentCrop/PermanentCrop_1141.jpg,6,0
3391,PermanentCrop/PermanentCrop_1170.jpg,6,0
3474,PermanentCrop/PermanentCrop_1245.jpg,6,0


In [None]:
# Visualize 5 examples of these misclassifications – requires a library!

from tensorflow.keras.preprocessing.image import load_img, img_to_array

# Display up to 5 misclassified images
sample_mistakes = mistakes.sample(n=min(5, len(mistakes)))

plt.figure(figsize=(15, 5))
for i, row in enumerate(sample_mistakes.itertuples()):
    img_path = os.path.join(val_generator.directory, row.filename)
    img = load_img(img_path, target_size=(128, 128))  # adjust target_size

    plt.subplot(1, 5, i + 1)
    plt.imshow(img)
    plt.axis("off")
    plt.title(f"Skutečné: {true_class}\nPredikce: {pred_class}")

plt.suptitle(f"Most frequent misclassification of the best model: {true_class} ➝ {pred_class}", fontsize=16)
plt.tight_layout()
plt.show() # Visualize 5 examples of these misclassifications

KeyboardInterrupt: 

In [None]:
# 15. Funkce pro predikci TOP-2 tříd u nového obrázku
def predict_top_2(image_path, model, class_indices):
    from tensorflow.keras.preprocessing import image

    img = image.load_img(image_path, target_size=IMG_SIZE)
    img_array = image.img_to_array(img) / 255.0
    img_array = np.expand_dims(img_array, axis=0)

    predictions = model.predict(img_array)[0]

    top_2_indices = np.argsort(predictions)[-2:][::-1]
    top_2_classes = [list(class_indices.keys())[list(class_indices.values()).index(i)] for i in top_2_indices]
    top_2_probs = [predictions[i] for i in top_2_indices]

    return list(zip(top_2_classes, top_2_probs))

In [None]:
# 17. Prediction test on a random image from the dataset
random_image = random.choice(train_generator.filepaths)

top_2_predictions = predict_top_2(random_image, model, class_indices)
print(f"Top-2 predicted classes for the image: {top_2_predictions}")

In [None]:
# After training is complete, you can load the model anytime as follows:

# from tensorflow.keras.models import load_model

# Load the best saved model
# model = load_model("best_model.keras")

# Load the best saved model – I uploaded the file to Colab
# model = load_model('/content/FINAL_best.keras')

# Verify the model structure
# model.summary()