In [1]:
# Cell 1: Colab Setup (Delete)
# (Empty Cell)

# Cell 2: Library Install (Delete)
# (Empty Cell - Remember to install locally: pip install tensorflow keras matplotlib scikit-learn albumentations rasterio pandas)

# Cell 3: Kaggle Config (Local Check)
import os
import shutil

# --- LOCAL PATH FOR KAGGLE AUTHENTICATION ---
# Sets environment variable to look in the local .secrets/ folder for security
os.environ['KAGGLE_CONFIG_DIR'] = './.secrets' 
print("✅ Kaggle config pointing to ./.secrets/. Ensure your key is in this local folder.")

# Cell 4: Data Acquisition (Checks for Manual Download)
import os
import zipfile

# --- LOCAL PATH FOR RAW DATA ---
dataset_extract_path = './datasets/eurosat/'

# We check for the EuroSATallBands folder, which confirms the data is properly placed
if not os.path.exists(os.path.join(dataset_extract_path, 'EuroSATallBands')):
    print("❌ Raw data folder NOT found. Please ensure 'EuroSATallBands' is inside './datasets/eurosat/'.")
    # You can add the !kaggle download command back here if needed.
else:
    print("✅ Raw dataset folder found locally and ready for preprocessing.")


# Cell 5: Verify CSV Files (Path Change)
# This confirms the overall folder structure is correct.
datasets_path = './datasets/eurosat'
for root, dirs, files in os.walk(datasets_path):
    print("Directory:", root)
    for filename in files:
        if filename.endswith('.csv'):
            print(" - CSV file:", filename)

✅ Kaggle config pointing to ./.secrets/. Ensure your key is in this local folder.
✅ Raw dataset folder found locally and ready for preprocessing.
Directory: ./datasets/eurosat
Directory: ./datasets/eurosat\EuroSAT
 - CSV file: test.csv
 - CSV file: train.csv
 - CSV file: validation.csv
Directory: ./datasets/eurosat\EuroSAT\AnnualCrop
Directory: ./datasets/eurosat\EuroSAT\Forest
Directory: ./datasets/eurosat\EuroSAT\HerbaceousVegetation
Directory: ./datasets/eurosat\EuroSAT\Highway
Directory: ./datasets/eurosat\EuroSAT\Industrial
Directory: ./datasets/eurosat\EuroSAT\Pasture
Directory: ./datasets/eurosat\EuroSAT\PermanentCrop
Directory: ./datasets/eurosat\EuroSAT\Residential
Directory: ./datasets/eurosat\EuroSAT\River
Directory: ./datasets/eurosat\EuroSAT\SeaLake
Directory: ./datasets/eurosat\EuroSATallBands
 - CSV file: test.csv
 - CSV file: train.csv
 - CSV file: validation.csv
Directory: ./datasets/eurosat\EuroSATallBands\AnnualCrop
Directory: ./datasets/eurosat\EuroSATallBands\Fores

In [3]:
# Cell 6 & 8: Inspect CSV (Path Change)
import pandas as pd
# --- LOCAL PATH ---
csv_path = './datasets/eurosat/EuroSATallBands/train.csv'
df = pd.read_csv(csv_path)
print("CSV Columns:", df.columns.tolist())
print(df.head())


# Cell 7: Load TIF Paths (Corrected Local Path)
import pandas as pd
# --- LOCAL PATH ---
base_dir = './datasets/eurosat/EuroSATallBands'

train_df = pd.read_csv(f'{base_dir}/train.csv')
val_df = pd.read_csv(f'{base_dir}/validation.csv')
test_df = pd.read_csv(f'{base_dir}/test.csv')

def make_absolute(p):
    return f"{base_dir}/{p}" if not p.startswith('/') else p

train_img_paths = [make_absolute(p) for p in train_df['Filename']]
train_labels = train_df['Label'].tolist()
val_img_paths = [make_absolute(p) for p in val_df['Filename']]
val_labels = val_df['Label'].tolist()
test_img_paths = [make_absolute(p) for p in test_df['Filename']]
test_labels = test_df['Label'].tolist()

print("✅ Original TIF paths and labels loaded.")


# Cell 9: Preprocessing (Path and FIX Confirmed)
import os
import numpy as np
import rasterio
import cv2

def preprocess_and_save(image_path, save_path):
    try:
        with rasterio.open(image_path) as src:
            image = src.read()
        # Change from (bands, H, W) to (H, W, bands)
        image = np.transpose(image, (1, 2, 0))
        # Resize to 64x64
        image = cv2.resize(image, (64, 64), interpolation=cv2.INTER_AREA)

        # --- CRITICAL FIX: Correct Normalization (10000.0) ---
        image = image.astype(np.float32) / 10000.0

        np.save(save_path, image)
    except Exception as e:
        print(f"Error processing {image_path}: {e}")

def preprocess_dataset(img_paths, save_dir):
    os.makedirs(save_dir, exist_ok=True)
    print(f"Starting preprocessing for {len(img_paths)} images...")
    for img_path in img_paths:
        filename = os.path.basename(img_path).replace('.tif', '.npy')
        save_path = os.path.join(save_dir, filename)
        preprocess_and_save(img_path, save_path)
    print("Preprocessing complete.")

# --- LOCAL OUTPUT PATH ---
base_preprocessed_dir = './preprocessed_eurosat/' # <--- YOUR DESIRED NAME

# Execute preprocessing
preprocess_dataset(train_img_paths, os.path.join(base_preprocessed_dir, 'train'))
preprocess_dataset(val_img_paths, os.path.join(base_preprocessed_dir, 'val'))
preprocess_dataset(test_img_paths, os.path.join(base_preprocessed_dir, 'test'))

# Save the labels
np.save(os.path.join(base_preprocessed_dir, 'train_labels.npy'), np.array(train_labels))
np.save(os.path.join(base_preprocessed_dir, 'val_labels.npy'), np.array(val_labels))
np.save(os.path.join(base_preprocessed_dir, 'test_labels.npy'), np.array(test_labels))

print("✅ Corrected preprocessing complete. NPY files saved locally to:", base_preprocessed_dir)

CSV Columns: ['Filename', 'Label', 'ClassName']
                                            Filename  Label  \
0               PermanentCrop/PermanentCrop_2401.tif      6   
1               PermanentCrop/PermanentCrop_1006.tif      6   
2  HerbaceousVegetation/HerbaceousVegetation_1025...      2   
3                           SeaLake/SeaLake_1439.tif      9   
4                               River/River_1052.tif      8   

              ClassName  
0         PermanentCrop  
1         PermanentCrop  
2  HerbaceousVegetation  
3               SeaLake  
4                 River  
✅ Original TIF paths and labels loaded.
Starting preprocessing for 19317 images...
Preprocessing complete.
Starting preprocessing for 5519 images...
Preprocessing complete.
Starting preprocessing for 2759 images...
Preprocessing complete.
✅ Corrected preprocessing complete. NPY files saved locally to: ./preprocessed_eurosat/
