### Importing Necessary Libraries

In [1]:
# Core Python Libraries
import os  # Operating system interactions, such as reading and writing files.
import shutil  # High-level file operations like copying and moving files.
import random  # Random number generation for various tasks.
import textwrap  # Formatting text into paragraphs of a specified width.
import warnings  # Warning control context manager.
import zipfile  # Work with ZIP archives.
import platform  # Access to underlying platform’s identifying data.
import itertools  # Functions creating iterators for efficient looping.
from dataclasses import dataclass  # Class decorator for adding special methods to classes.

# PyTorch-related Libraries (Deep Learning)
import torch  # Core PyTorch library for tensor computations.
import torch.nn as nn  # Neural network module for defining layers and architectures.
import torch.optim as optim  # Optimizer module for training models (SGD, Adam, etc.).
from torch.utils.data import Dataset, DataLoader, Subset, random_split  # Dataset and DataLoader for managing and batching data.
import torchvision # PyTorch's computer vision library.
from torchvision import datasets, transforms  # Datasets and transformations for image processing.
import torchvision.datasets as datasets  # Datasets for computer vision tasks.
import torchvision.transforms as transforms  # Transformations for image preprocessing.
from torchvision.utils import make_grid  # Make grid for displaying images.
import torchvision.models as models  # Pretrained models for transfer learning.
import torchvision.transforms.functional as TF  # Functional transformations for image preprocessing.
from torchsummary import summary # PyTorch model summary for Keras-like model summary.
from torchviz import make_dot  # PyTorch model visualization.
from torchvision.ops import sigmoid_focal_loss  # Focal loss for handling class imbalance in object detection.
from torchmetrics import MeanMetric  # Intersection over Union (IoU) metric for object detection.
from torchmetrics.classification import MultilabelF1Score, MultilabelRecall, MultilabelPrecision, MultilabelAccuracy  # Multilabel classification metrics.

import pytorch_lightning as pl  # PyTorch Lightning for high-level training loops.
from pytorch_lightning.callbacks import ModelCheckpoint, LearningRateMonitor # Callbacks for model checkpointing and learning rate monitoring.

# Geospatial Data Processing Libraries
import rasterio  # Library for reading and writing geospatial raster data.
from rasterio.warp import calculate_default_transform, reproject  # Reprojection and transformation functions.
from rasterio.enums import Resampling  # Resampling methods used for resizing raster data.
from rasterio.plot import show  # Visualization of raster data.

# Data Manipulation and Analysis Libraries
import pandas as pd  # Data analysis and manipulation library for DataFrames and CSVs.
import numpy as np  # Numpy for array operations and numerical computations.
from sklearn.metrics import confusion_matrix, accuracy_score  # Evaluation metrics for classification models.

# Visualization Libraries
import matplotlib.pyplot as plt  # Plotting library for creating static and interactive visualizations.
import seaborn as sns  # High-level interface for drawing attractive statistical graphics.

# Utilities
from tqdm import tqdm  # Progress bar for loops and processes.
from PIL import Image  # Image handling, opening, manipulating, and saving.
import ast  # Abstract Syntax Trees for parsing Python code.
import requests  # HTTP library for sending requests.
import zstandard as zstd  # Zstandard compression for fast compression and decompression.
from collections import Counter # Counter for counting hashable objects.
import certifi  # Certificates for verifying HTTPS requests.
import ssl  # Secure Sockets Layer for secure connections.
import urllib.request  # URL handling for requests.
import kaggle # Kaggle API for downloading datasets.
import zipfile # Work with ZIP archives.

In [2]:
pl.seed_everything(42)  # Set seed for reproducibility.

dataset_path = r'D:\Datasets\eurosat\2750' # Path to the EuroSAT dataset.

torch.set_float32_matmul_precision('medium')  # Set the default FP32 matmul precision to 'medium'.

Seed set to 42


In [3]:
@dataclass
class Config:
    img_size: int = 64  
    input_size: int = 224
    img_mean = [0.485, 0.456, 0.406]
    img_std = [0.229, 0.224, 0.225]
    num_classes: int = 10
    num_epochs: int = 10
    num_workers: int = 1
    
    batch_size: int = 64
    train_size: float = 0.8
    learning_rate: float = 0.001
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")


In [4]:
class EuroSATDataset(Dataset):
    # Initialize the EuroSAT dataset class
    def __init__(self, dataset, transform=None):
        self.dataset = dataset  # Store the dataset
        self.transform = transform  # Store the optional transformation function

    def __getitem__(self, index):
        image, label = self.dataset[index]  # Get the image and label from the dataset at the specified index

        if self.transform:
            image = self.transform(image)  # Apply the transformation if provided

        return image, label  # Return the image and label

    def __len__(self):
        return len(self.dataset)  # Return the length of the dataset

In [5]:
# Create an ImageFolder dataset instance for the specified directory
dataset = datasets.ImageFolder(dataset_path)
print('Dataset length: ', len(dataset))

class_names = dataset.classes  # Get the class names from the dataset
num_classes = len(class_names)  # Get the number of classes

print(f'Class names: {class_names} and number of classes: {num_classes}')

Dataset length:  27000
Class names: ['AnnualCrop', 'Forest', 'HerbaceousVegetation', 'Highway', 'Industrial', 'Pasture', 'PermanentCrop', 'Residential', 'River', 'SeaLake'] and number of classes: 10


In [6]:
train_transform = transforms.Compose([
    transforms.RandomResizedCrop(Config.input_size),
    transforms.RandomHorizontalFlip(),
    transforms.RandomVerticalFlip(),
    transforms.ToTensor(),
    transforms.Normalize(Config.img_mean, Config.img_std)
])

test_transform = transforms.Compose([
    transforms.Resize(Config.img_size),
    transforms.CenterCrop(Config.img_size),
    transforms.ToTensor(),
    transforms.Normalize(Config.img_mean, Config.img_std)
])

dataset = datasets.ImageFolder(dataset_path, transform=train_transform)

In [7]:
class EuroSATDataModule(pl.LightningDataModule):
    def __init__(self, dataset_path, train_size=0.8, batch_size=16, num_workers=2):
        super().__init__()
        self.dataset_path = dataset_path
        self.train_size = train_size
        self.batch_size = batch_size
        self.num_workers = num_workers

        # Define transformations
        self.train_transform = transforms.Compose([
            transforms.RandomResizedCrop(224),
            transforms.RandomHorizontalFlip(),
            transforms.ToTensor(),
            transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
        ])

        self.test_transform = transforms.Compose([
            transforms.Resize(256),
            transforms.CenterCrop(224),
            transforms.ToTensor(),
            transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
        ])

    def prepare_data(self):
        if not os.path.exists(self.dataset_path):
            kaggle.api.dataset_download_files('apollo2506/eurosat-dataset', path=self.dataset_path, unzip=True)

    def setup(self, stage=None):
        # Initialize the dataset without transformations
        self.dataset = datasets.ImageFolder(self.dataset_path)

        # Generate indices
        indices = list(range(len(self.dataset)))
        split = int(np.floor(self.train_size * len(self.dataset)))
        np.random.shuffle(indices)

        train_indices, test_indices = indices[:split], indices[split:]

        # Create subsets and apply transformations
        self.train_dataset = EuroSATDataset(Subset(self.dataset, train_indices), transform=self.train_transform)
        self.test_dataset = EuroSATDataset(Subset(self.dataset, test_indices), transform=self.test_transform)

    def train_dataloader(self):
        return DataLoader(self.train_dataset, batch_size=self.batch_size, shuffle=True, num_workers=self.num_workers)

    def test_dataloader(self):
        return DataLoader(self.test_dataset, batch_size=self.batch_size, shuffle=False, num_workers=self.num_workers)
    

In [8]:
class EuroSATModel(pl.LightningModule):
    def __init__(self):
        super().__init__()
        self.model = models.resnet18(weights=models.ResNet18_Weights.IMAGENET1K_V1)
        self.model.fc = torch.nn.Linear(self.model.fc.in_features, len(class_names))
        self.model = self.model.to(Config.device)

    def forward(self, x):
        return self.model(x)

    def training_step(self, train_batch, batch_idx):
        x, y = train_batch
        logits = self.forward(x)
        loss = self.cross_entropy_loss(logits, y)
        self.log('train_loss', loss)
        return loss

    def validation_step(self, val_batch, batch_idx):
        x, y = val_batch
        logits = self.forward(x)
        loss = self.cross_entropy_loss(logits, y)
        self.log('val_loss', loss)
        return loss

    def configure_optimizers(self):
        optimizer = torch.optim.Adam(self.parameters(), lr=Config.learning_rate)
        return optimizer

    def cross_entropy_loss(self, logits, labels):
        return TF.cross_entropy(logits, labels)

In [None]:
data_module = EuroSATDataModule()

data_module.prepare_data()
data_module.setup(stage='fit')

valid_loader = data_module.val_dataloader()

# Fetch a batch of data from the validation DataLoader with a progress bar
print("Fetching a batch of data from the validation DataLoader...")
for images, labels in tqdm(valid_loader, desc="Loading batches"):
    print(f'Batch shape: {images.shape}, Labels shape: {labels.shape}')
    break  # Just to test one batch
print("Batch fetched successfully.")

Fetching a batch of data from the validation DataLoader...


Loading batches:   0%|          | 0/85 [00:00<?, ?it/s]

In [None]:
model = EuroSATModel()

trainer = pl.Trainer(max_epochs=Config.num_epochs)  # Use gpus=1 if you have a GPU
trainer.fit(model, data_module)

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name  | Type   | Params | Mode 
-----------------------------------------
0 | model | ResNet | 11.2 M | train
-----------------------------------------
11.2 M    Trainable params
0         Non-trainable params
11.2 M    Total params
44.727    Total estimated model params size (MB)
68        Modules in train mode
0         Modules in eval mode


Sanity Checking: |          | 0/? [00:00<?, ?it/s]

C:\Users\isaac\anaconda3\envs\Fyp311\Lib\site-packages\pytorch_lightning\trainer\connectors\data_connector.py:419: Consider setting `persistent_workers=True` in 'val_dataloader' to speed up the dataloader worker initialization.
