In [138]:
# Core Python Libraries
import os  # Operating system interactions, such as reading and writing files.
import shutil  # High-level file operations like copying and moving files.
import random  # Random number generation for various tasks.
import textwrap  # Formatting text into paragraphs of a specified width.
import warnings  # Warning control context manager.
import zipfile  # Work with ZIP archives.
import platform  # Access to underlying platform’s identifying data.
import itertools  # Functions creating iterators for efficient looping.
from dataclasses import dataclass  # Class decorator for adding special methods to classes.

# PyTorch-related Libraries (Deep Learning)
import torch  # Core PyTorch library for tensor computations.
import torch.nn as nn  # Neural network module for defining layers and architectures.
from torch.nn import functional as F  # Functional module for defining functions and loss functions.
import torch.optim as optim  # Optimizer module for training models (SGD, Adam, etc.).
from torch.utils.data import Dataset, DataLoader, Subset, random_split  # Dataset and DataLoader for managing and batching data.
import torchvision # PyTorch's computer vision library.
from torchvision import datasets, transforms  # Datasets and transformations for image processing.
import torchvision.datasets as datasets  # Datasets for computer vision tasks.
import torchvision.transforms as transforms  # Transformations for image preprocessing.
from torchvision.utils import make_grid  # Make grid for displaying images.
import torchvision.models as models  # Pretrained models for transfer learning.
import torchvision.transforms.functional as TF  # Functional transformations for image preprocessing.
from torchsummary import summary # PyTorch model summary for Keras-like model summary.
import torchsummary
import torchmetrics
from torchviz import make_dot  # PyTorch model visualization.
from torchvision.ops import sigmoid_focal_loss  # Focal loss for handling class imbalance in object detection.
from torchmetrics import MeanMetric, Accuracy  # Intersection over Union (IoU) metric for object detection.
from torchmetrics.classification import MultilabelF1Score, MultilabelRecall, MultilabelPrecision, MultilabelAccuracy  # Multilabel classification metrics.

import pytorch_lightning as pl  # PyTorch Lightning for high-level training loops.
from pytorch_lightning.callbacks import ModelCheckpoint, LearningRateMonitor # Callbacks for model checkpointing and learning rate monitoring.
from pytorch_lightning.loggers import TensorBoardLogger  # Logger for TensorBoard visualization.

# Geospatial Data Processing Libraries
import rasterio  # Library for reading and writing geospatial raster data.
from rasterio.warp import calculate_default_transform, reproject  # Reprojection and transformation functions.
from rasterio.enums import Resampling  # Resampling methods used for resizing raster data.
from rasterio.plot import show  # Visualization of raster data.

# Data Manipulation and Analysis Libraries
import pandas as pd  # Data analysis and manipulation library for DataFrames and CSVs.
import numpy as np  # Numpy for array operations and numerical computations.
from sklearn.metrics import confusion_matrix, accuracy_score  # Evaluation metrics for classification models.

# Visualization Libraries
import matplotlib.pyplot as plt  # Plotting library for creating static and interactive visualizations.
import seaborn as sns  # High-level interface for drawing attractive statistical graphics.

# Utilities
from tqdm import tqdm  # Progress bar for loops and processes.
from PIL import Image  # Image handling, opening, manipulating, and saving.
import ast  # Abstract Syntax Trees for parsing Python code.
import requests  # HTTP library for sending requests.
import zstandard as zstd  # Zstandard compression for fast compression and decompression.
from collections import Counter # Counter for counting hashable objects.
import certifi  # Certificates for verifying HTTPS requests.
import ssl  # Secure Sockets Layer for secure connections.
import urllib.request  # URL handling for requests.
import kaggle # Kaggle API for downloading datasets.
import zipfile # Work with ZIP archives.

from torchvision.datasets import MNIST, EuroSAT

In [139]:
# Set seed for reproducibility
# Setting a seed ensures that the results are consistent and reproducible each time the code is run.
SEED = 42
random.seed(SEED)
np.random.seed(SEED)
torch.manual_seed(SEED)
torch.cuda.manual_seed_all(SEED)

# Check if GPU is enabled
# PyTorch allows for the use of GPU to speed up training. Here we check if a GPU is available and set the device accordingly.
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(f"Device: {device}")
if device.type == 'cuda':
    # If a GPU is available, print the name of the GPU
    print(f"GPU: {torch.cuda.get_device_name(0)}")


Device: cuda:0
GPU: NVIDIA GeForce RTX 3050


In [140]:
@dataclass
class Config:
    dataset_path = r'C:\Users\isaac\eurosat_small'
    imagenet_mean, imagenet_std = [0.485, 0.456, 0.406], [0.229, 0.224, 0.225]


In [141]:
class EuroSATDataset(Dataset):
    # Initialize the EuroSAT dataset class
    def __init__(self, dataset, transform=None):
        self.dataset = dataset  # Store the dataset
        self.transform = transform  # Store the optional transformation function

    def __getitem__(self, index):
        image, label = self.dataset[index]  # Get the image and label from the dataset at the specified index

        if self.transform:
            image = self.transform(image)  # Apply the transformation if provided

        return image, label  # Return the image and label

    def __len__(self):
        return len(self.dataset)  # Return the length of the dataset


In [142]:
class EuroSATDataModule1(pl.LightningDataModule):

  def setup(self, stage=None):
    # transforms for images
    transform=transforms.Compose([transforms.ToTensor(), 
                                   transforms.Normalize(Config.imagenet_mean, Config.imagenet_std)])
    
    ssl._create_default_https_context = ssl._create_unverified_context

    # load the full dataset
    full_dataset = EuroSAT(os.getcwd(), download=True, transform=transform)
    
    print(full_dataset)

    class_names = full_dataset.classes
    print(class_names)
  
    train_size = int(0.8 * len(full_dataset))
    test_size = len(full_dataset) - train_size
    self.eurosat_train, self.eurosat_test = random_split(full_dataset, [train_size, test_size])

    print(f"Train size: {len(self.eurosat_train)}")
    print(f"Test size: {len(self.eurosat_test)}")
    

  def train_dataloader(self):
    return DataLoader(self.eurosat_train, batch_size=64, num_workers=4, pin_memory=True, shuffle=True)

  def val_dataloader(self):
    return DataLoader(self.eurosat_test, batch_size=64, num_workers=4, pin_memory=True)

In [143]:
class EuroSATDataModule2(pl.LightningDataModule):

  def setup(self, stage=None):
    transform=transforms.Compose([transforms.ToTensor(), 
                                   transforms.Normalize(Config.imagenet_mean, Config.imagenet_std)])
      
    full_dataset = datasets.ImageFolder(Config.dataset_path, transform=transform) 
    print(full_dataset)


    train_size = int(0.8 * len(full_dataset))
    test_size = len(full_dataset) - train_size
    self.eurosat_train, self.eurosat_test = random_split(full_dataset, [train_size, test_size])


  def train_dataloader(self):
    return DataLoader(self.eurosat_train, batch_size=64, num_workers=4, pin_memory=True, shuffle=True)

  def val_dataloader(self):
    return DataLoader(self.eurosat_test, batch_size=64, num_workers=4, pin_memory=True)
  
  def test_dataloader(self):
    return DataLoader(self.eurosat_test, batch_size=64, num_workers=4, pin_memory=True)
  

In [144]:
class EuroSATDataModule3(pl.LightningDataModule):

  def setup(self, stage=None):
    
    train_transform = transforms.Compose([
      transforms.RandomResizedCrop(224),
      transforms.RandomHorizontalFlip(),
      transforms.ToTensor(),
      transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ])
    test_transform = transforms.Compose([
      transforms.Resize(256),
      transforms.CenterCrop(224),
      transforms.ToTensor(),
      transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ])
    
    
    full_dataset = datasets.ImageFolder(Config.dataset_path) 
    print(full_dataset)

    # Extract and print the class names from the dataset
    self.class_labels = full_dataset.classes

    # Split the dataset
    train_size = int(0.8 * len(full_dataset))
    test_size = len(full_dataset) - train_size
    train_dataset, test_dataset = random_split(full_dataset, [train_size, test_size])

    # Apply transforms to the specific splits
    train_dataset.dataset.transform = train_transform
    test_dataset.dataset.transform = test_transform

    # Assign datasets to module attributes
    self.eurosat_train = train_dataset
    self.eurosat_test = test_dataset


  def train_dataloader(self):
    return DataLoader(self.eurosat_train, batch_size=64, num_workers=4, pin_memory=True, shuffle=True)

  def val_dataloader(self):
    return DataLoader(self.eurosat_test, batch_size=64, num_workers=4, pin_memory=True)
  
  def test_dataloader(self):
    return DataLoader(self.eurosat_test, batch_size=64, num_workers=4, pin_memory=True)
  

In [145]:
class EuroSATModel(pl.LightningModule):
    def __init__(self):
        super().__init__()

        # EuroSAT images are (3, 64, 64) (channels, width, height) 
        # Flattened image size for fully connected layer input = 3 * 64 * 64 = 12288
        self.layer_1 = nn.Linear(3 * 64 * 64, 128)
        self.layer_2 = nn.Linear(128, 256)
        self.layer_3 = nn.Linear(256, 10)  # EuroSAT has 10 classes

    def forward(self, x):
        batch_size, channels, width, height = x.size()

        # Flatten the image tensor (b, 3, 64, 64) -> (b, 3*64*64)
        x = x.view(batch_size, -1)

        # Apply layers
        x = self.layer_1(x)
        x = torch.relu(x)
        x = self.layer_2(x)
        x = torch.relu(x)
        x = self.layer_3(x)
        
        # Output as log softmax for stable training
        x = torch.log_softmax(x, dim=1)
        return x

    def cross_entropy_loss(self, logits, labels):
        return F.nll_loss(logits, labels)

    def training_step(self, train_batch, batch_idx):
        x, y = train_batch
        logits = self.forward(x)
        loss = self.cross_entropy_loss(logits, y)
        self.log('train_loss', loss)
        return loss

    def validation_step(self, val_batch, batch_idx):
        x, y = val_batch
        logits = self.forward(x)
        loss = self.cross_entropy_loss(logits, y)
        self.log('val_loss', loss)

    def test_step(self, test_batch, batch_idx):
        x, y = test_batch
        logits = self.forward(x)
        loss = self.cross_entropy_loss(logits, y)
        self.log('test_loss', loss)

    def configure_optimizers(self):
        optimizer = torch.optim.Adam(self.parameters(), lr=1e-3)
        
        return optimizer

In [147]:
# Pretrained ResNet18 model
class EuroSATModel2(pl.LightningModule):
    def __init__(self):
        super(EuroSATModel2, self).__init__()
        self.model = models.resnet18(pretrained=True)
        # Modify the final layer to match the 10 classes in EuroSAT
        self.model.fc = nn.Linear(self.model.fc.in_features, 10)
        self.model.to(device)

        self.train_acc = torchmetrics.Accuracy(task='multiclass', num_classes=10)
        self.val_acc = torchmetrics.Accuracy(task='multiclass', num_classes=10)
        self.test_acc = torchmetrics.Accuracy(task='multiclass', num_classes=10)

        torchsummary.summary(self.model, (3, 224, 224))

    def forward(self, x):
        return self.model(x)

    def cross_entropy_loss(self, logits, labels):
        return F.cross_entropy(logits, labels)

    def training_step(self, train_batch, batch_idx):
        x, y = train_batch
        logits = self.forward(x)
        loss = self.cross_entropy_loss(logits, y)
        acc = self.train_acc(logits, y)
        self.log('train_loss', loss, on_step=True, on_epoch=True, prog_bar=True)
        self.log('train_acc', acc, on_step=True, on_epoch=True, prog_bar=True)
        return loss

    def validation_step(self, val_batch, batch_idx):
        x, y = val_batch
        logits = self.forward(x)
        loss = self.cross_entropy_loss(logits, y)
        acc = self.val_acc(logits, y)
        self.log('val_loss', loss, on_epoch=True, prog_bar=True)
        self.log('val_acc', acc, on_epoch=True, prog_bar=True)
        return loss

    def test_step(self, test_batch, batch_idx):
        x, y = test_batch
        logits = self.forward(x)
        loss = self.cross_entropy_loss(logits, y)
        acc = self.test_acc(logits, y)
        self.log('test_loss', loss, on_epoch=True, prog_bar=True)
        self.log('test_acc', acc, on_epoch=True, prog_bar=True)
        return loss

    def configure_optimizers(self):
        optimizer = torch.optim.Adam(self.parameters(), lr=1e-3)
        return optimizer


In [152]:
# Create an instance of the data module
data_module = EuroSATDataModule3()

# Call setup to initialize the datasets
data_module.setup()

# Access class_labels and create a dictionary
class_dict = {i: label for i, label in enumerate(data_module.class_labels)}

# Print the dictionary
print(class_dict)


Dataset ImageFolder
    Number of datapoints: 1000
    Root location: C:\Users\isaac\eurosat_small
{0: 'AnnualCrop', 1: 'Forest', 2: 'HerbaceousVegetation', 3: 'Highway', 4: 'Industrial', 5: 'Pasture', 6: 'PermanentCrop', 7: 'Residential', 8: 'River', 9: 'SeaLake'}


In [149]:
data_module = EuroSATDataModule3()
data_module.setup()
model = EuroSATModel2()

# Initialize the logger
logger = TensorBoardLogger("tb_logs", name="my_model")

trainer = pl.Trainer(
    default_root_dir=r'C:\Users\isaac\FYPCodeLatest\model_checkpoints',
    max_epochs=10,
    logger=logger,
    accelerator='gpu' if torch.cuda.is_available() else 'cpu',
    devices=1 if torch.cuda.is_available() else None,
)

trainer.fit(model, data_module)

Dataset ImageFolder
    Number of datapoints: 1000
    Root location: C:\Users\isaac\eurosat_small
----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1         [-1, 64, 112, 112]           9,408
       BatchNorm2d-2         [-1, 64, 112, 112]             128
              ReLU-3         [-1, 64, 112, 112]               0
         MaxPool2d-4           [-1, 64, 56, 56]               0
            Conv2d-5           [-1, 64, 56, 56]          36,864
       BatchNorm2d-6           [-1, 64, 56, 56]             128
              ReLU-7           [-1, 64, 56, 56]               0
            Conv2d-8           [-1, 64, 56, 56]          36,864
       BatchNorm2d-9           [-1, 64, 56, 56]             128
             ReLU-10           [-1, 64, 56, 56]               0
       BasicBlock-11           [-1, 64, 56, 56]               0
           Conv2d-12           [-1, 64, 56, 56]          36,864
    

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name      | Type               | Params | Mode 
---------------------------------------------------------
0 | model     | ResNet             | 11.2 M | train
1 | train_acc | MulticlassAccuracy | 0      | train
2 | val_acc   | MulticlassAccuracy | 0      | train
3 | test_acc  | MulticlassAccuracy | 0      | train
---------------------------------------------------------
11.2 M    Trainable params
0         Non-trainable params
11.2 M    Total params
44.727    Total estimated model params size (MB)
71        Modules in train mode
0         Modules in eval mode


Dataset ImageFolder
    Number of datapoints: 1000
    Root location: C:\Users\isaac\eurosat_small


Sanity Checking: |          | 0/? [00:00<?, ?it/s]

C:\Users\isaac\anaconda3\envs\Fyp311\Lib\site-packages\pytorch_lightning\trainer\connectors\data_connector.py:419: Consider setting `persistent_workers=True` in 'val_dataloader' to speed up the dataloader worker initialization.
C:\Users\isaac\anaconda3\envs\Fyp311\Lib\site-packages\pytorch_lightning\trainer\connectors\data_connector.py:419: Consider setting `persistent_workers=True` in 'train_dataloader' to speed up the dataloader worker initialization.
C:\Users\isaac\anaconda3\envs\Fyp311\Lib\site-packages\pytorch_lightning\loops\fit_loop.py:298: The number of training batches (13) is smaller than the logging interval Trainer(log_every_n_steps=50). Set a lower value for log_every_n_steps if you want to see logs for the training epoch.


Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

`Trainer.fit` stopped: `max_epochs=10` reached.


In [150]:
trainer.test(model, datamodule=data_module)

LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Dataset ImageFolder
    Number of datapoints: 1000
    Root location: C:\Users\isaac\eurosat_small


C:\Users\isaac\anaconda3\envs\Fyp311\Lib\site-packages\pytorch_lightning\trainer\connectors\data_connector.py:419: Consider setting `persistent_workers=True` in 'test_dataloader' to speed up the dataloader worker initialization.


Testing: |          | 0/? [00:00<?, ?it/s]

────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
        test_acc            0.9750000238418579
        test_loss           0.09977135807275772
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────


[{'test_loss': 0.09977135807275772, 'test_acc': 0.9750000238418579}]

In [154]:
def predict_image(model, image_path, transform):
    # Load the image
    image = Image.open(image_path).convert("RGB")
    
    # Apply the test transformation
    image = transform(image).unsqueeze(0)  # Add batch dimension

    # Send image to the correct device (GPU if available)
    image = image.to(model.device)
    
    # Set the model to evaluation mode and disable gradient calculations
    model.eval()
    with torch.no_grad():
        logits = model(image)
        # Get the predicted class (highest logit)
        _, predicted_class = torch.max(logits, 1)
        # Calculate probabilities
        probabilities = F.softmax(logits, dim=1)
    
    return predicted_class.item(), probabilities.squeeze().cpu().numpy()  # Return the predicted class and probabilities

# Define the test transformation used during validation and testing
test_transform = transforms.Compose([
    transforms.Resize(256),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])

# Create an instance of the data module
data_module = EuroSATDataModule3()

# Call setup to initialize the datasets
data_module.setup()

# Access class_labels and create a dictionary
class_dict = {i: label for i, label in enumerate(data_module.class_labels)}

# Test the model with an image
image_path = r"C:\Users\isaac\eurosat_small\SeaLake\SeaLake_10.jpg"  # Replace with the path to your image
predicted_class, probabilities = predict_image(model, image_path, test_transform)
print(f"Predicted Class: {predicted_class}")

# Create a DataFrame with the class labels and probabilities
df = pd.DataFrame(list(class_dict.items()), columns=['Class Index', 'Class Label'])
df['Probability'] = probabilities

# Highlight the predicted class
def highlight_predicted(s):
    return ['background-color: yellow' if s['Class Index'] == predicted_class else '' for _ in s]

# Apply the highlight function
df = df.style.apply(highlight_predicted, axis=1)

# Display the DataFrame
display(df)

Dataset ImageFolder
    Number of datapoints: 1000
    Root location: C:\Users\isaac\eurosat_small
Predicted Class: 9


Unnamed: 0,Class Index,Class Label,Probability
0,0,AnnualCrop,0.000926
1,1,Forest,0.000164
2,2,HerbaceousVegetation,0.000109
3,3,Highway,1.9e-05
4,4,Industrial,2.5e-05
5,5,Pasture,0.000259
6,6,PermanentCrop,3.2e-05
7,7,Residential,1.3e-05
8,8,River,0.001932
9,9,SeaLake,0.99652
