## Step 2: Data Exploration:

In [1]:
import os
import matplotlib.pyplot as plt
import numpy as np
from PIL import Image
import seaborn as sns
import cv2
from collections import Counter

In [2]:
# Define dataset path
dataset_path = "EuroSAT/2750/"

In [3]:
# List all land type categories
categories = os.listdir(dataset_path)

## Step 3: Data Preprocessing

In [4]:
from sklearn.model_selection import train_test_split
from tensorflow.keras.preprocessing.image import ImageDataGenerator

In [5]:
# Resize, normalize, and convert to RGB
def load_and_preprocess_images(path, size=(224, 224)):
    data, labels = [], []
    for category in categories:
        category_path = os.path.join(path, category)
        for img_name in os.listdir(category_path):
            img = cv2.imread(os.path.join(category_path, img_name))
            img = cv2.resize(img, size)
            img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)  # Convert BGR to RGB
            img = img / 255.0  # Normalize to [0,1]
            data.append(img)
            labels.append(category)
    return np.array(data, dtype="float32"), np.array(labels)

In [6]:
# Load data
data, labels = load_and_preprocess_images(dataset_path)

In [7]:
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.preprocessing.image import ImageDataGenerator

In [8]:
# Encode labels into integers
label_encoder = LabelEncoder()
labels_encoded = label_encoder.fit_transform(labels)  # Convert category names to integer labels

In [9]:
# Convert to one-hot encoding
labels_categorical = to_categorical(labels_encoded)

In [19]:
# Split dataset into train (70%), validation (20%), and test (10%)
X_train, X_temp, y_train, y_temp = train_test_split(data, labels_categorical, test_size=0.3, random_state=42)
X_val, X_test, y_val, y_test = train_test_split(X_temp, y_temp, test_size=1/3, random_state=42)

In [20]:
# Data augmentation
datagen = ImageDataGenerator(
    rotation_range=20,
    width_shift_range=0.2,
    height_shift_range=0.2,
    horizontal_flip=True,
    zoom_range=0.2,
)

# datagen = ImageDataGenerator(
#     rotation_range=25,    # Slightly increased, but not too much
#     width_shift_range=0.2,
#     height_shift_range=0.2,
#     zoom_range=0.2,
#     horizontal_flip=True,
#     shear_range=0.1  # Just a little shear (not too aggressive)
# )


# datagen = ImageDataGenerator(
#     rotation_range=20,
#     width_shift_range=0.2,
#     height_shift_range=0.2,
#     horizontal_flip=True,
#     zoom_range=0.2,
#     shear_range=0.15,   # Skewing effect for robustness
# )

In [21]:
val_test_datagen = ImageDataGenerator()

# Final datasets
train_generator = datagen.flow(X_train, y_train, batch_size=32, shuffle=True)
val_generator = val_test_datagen.flow(X_val, y_val, batch_size=32, shuffle=False)
test_generator = val_test_datagen.flow(X_test, y_test, batch_size=32, shuffle=False)