<a href="https://colab.research.google.com/github/Judyxyang/judyxyang/blob/master/HSi_UP_AB_VIM_V3_6_0330.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# HyperMamba Model

In [None]:
pip install spectral mat73  einops

Collecting spectral
  Downloading spectral-0.23.1-py3-none-any.whl (212 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m212.9/212.9 kB[0m [31m1.4 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting mat73
  Downloading mat73-0.62-py3-none-any.whl (19 kB)
Collecting einops
  Downloading einops-0.7.0-py3-none-any.whl (44 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m44.6/44.6 kB[0m [31m2.2 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: spectral, einops, mat73
Successfully installed einops-0.7.0 mat73-0.62 spectral-0.23.1


In [None]:
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
import sys
import os
import math

from einops import rearrange
import torch.optim as optim
import torchvision
from torchvision import datasets, transforms
from scipy import io
import torch.utils.data
import scipy.io as sio
import mat73
import matplotlib.pyplot as plt
from torch.utils.data import Dataset, DataLoader
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# 0 Upload Data

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
! ls '/content/drive/MyDrive/A02_RemoteSensingData/PaviaU/'

PaviaU_gt.mat  PaviaU_model_state_dict.pth	 PaviaU_p7_model_state_dict.pth
PaviaU.mat     PaviaU_p7ab_model_state_dict.pth


In [None]:
# # Define the path
path='/content/drive/MyDrive/A02_RemoteSensingData/PaviaU/'

In [None]:
PaviaU_hsi=sio.loadmat(path+'PaviaU.mat')['paviaU']
print('PaviaU_hsi shape:', PaviaU_hsi.shape)

#Load ground truth labels
PaviaU_gt=sio.loadmat(path+'PaviaU_gt.mat')['paviaU_gt']
print('PaviaU_gt.shape:', PaviaU_gt.shape)

PaviaU_hsi shape: (610, 340, 103)
PaviaU_gt.shape: (610, 340)


In [None]:
class_info = [
    (1, "Asphalt", 548, 6304, 6852),
    (2, "Meadows", 540, 18146, 18686),
    (3, "Gravel", 392, 1815, 2207),
    (4, "Trees", 524, 2912, 3436),
    (5, "Metal Sheets", 265, 1113, 1378),
    (6, "Bare Soil", 532, 4572, 5104),
    (7, "Bitumen", 375, 981, 1356),
    (8, "Bricks", 514, 3364, 3878),
    (9, "Shadows", 231, 795, 1026)
]

# Create a dictionary to store class number, class name, training samples, test samples, and total samples
class_dict = {
    class_number: {
        "class_name": class_name,
        "training_samples": training_samples,
        "test_samples": test_samples,
        "total_samples": total_samples
    }
    for class_number, class_name, training_samples, test_samples, total_samples in class_info
}


### Supervised Data

In [None]:
# 2.2 Samples Extraction

# # Create a mask with all class labels
# mask = np.copy(gt_2013_data)

# # Set the background class to 0
# mask[mask == 0] = 0

# Define patch size and stride
patch_size = 7
stride = 1

# Create an empty list to store patches and labels
hsi_samples = []
labels = []

# Initialize a dictionary to store class count
class_count = {i: 0 for i in class_dict.keys()}

# Function to check if all classes have the required number of samples
def all_classes_completed(class_count, class_dict):
    return all(class_count[class_num] == class_dict[class_num]["total_samples"] for class_num in class_dict.keys())

while not all_classes_completed(class_count, class_dict):
    # Loop through the ground truth data
    for label in class_dict.keys():
        # Get the coordinates of the ground truth pixels
        coords = np.argwhere((PaviaU_gt == label))

        # Shuffle the coordinates to randomize the patch extraction
        np.random.shuffle(coords)

        for coord in coords:
            i, j = coord
            # Calculate the patch indices
            i_start, i_end = i - patch_size // 2, i + patch_size // 2 + 1
            j_start, j_end = j - patch_size // 2, j + patch_size // 2 + 1

            # Check if the indices are within the bounds of the HSI data
            if i_start >= 0 and i_end <= PaviaU_hsi.shape[0] and j_start >= 0 and j_end <= PaviaU_hsi.shape[1]:
                # Extract the patch
                hsi_patch = PaviaU_hsi[i_start:i_end, j_start:j_end, :]

                # If the class count is less than the required samples
                if class_count[label] < class_dict[label]["total_samples"]:
                    # Append the patch and its label to the list
                    hsi_samples.append(hsi_patch)
                    labels.append(label)
                    class_count[label] += 1

                    # If all classes have the required number of samples, exit the loop
                    if all_classes_completed(class_count, class_dict):
                        break

# Convert the list of patches and labels into arrays
hsi_samples = np.array(hsi_samples)
labels = np.array(labels)
print('hsi_samples shape:', hsi_samples.shape)
print('labels shape:', labels.shape)

hsi_samples shape: (43923, 7, 7, 103)
labels shape: (43923,)


In [None]:
import numpy as np

# Example assumptions, replace with your actual data setup
patch_size = 7

# Reset class count trackers for a fresh start
class_count_training = {i: 0 for i in class_dict.keys()}
class_count_test = {i: 0 for i in class_dict.keys()}

hsi_samples_training = []
labels_training = []
hsi_samples_test = []
labels_test = []

# Let's simplify the check to ensure we're capturing the essence of the loop correctly
for label in class_dict.keys():
    coords = np.argwhere((PaviaU_gt == label))
    np.random.shuffle(coords)

    for coord in coords:
        if class_count_training[label] >= class_dict[label]["training_samples"] and class_count_test[label] >= class_dict[label]["test_samples"]:
            continue  # Move to the next class if both training and test samples met

        i, j = coord
        i_start, i_end = i - patch_size // 2, i + patch_size // 2 + 1
        j_start, j_end = j - patch_size // 2, j + patch_size // 2 + 1

        if i_start >= 0 and i_end <= PaviaU_hsi.shape[0] and j_start >= 0 and j_end <= PaviaU_hsi.shape[1]:
            hsi_patch = PaviaU_hsi[i_start:i_end, j_start:j_end, :]

            # Decide whether to append to training or test
            if class_count_training[label] < class_dict[label]["training_samples"]:
                hsi_samples_training.append(hsi_patch)
                labels_training.append(label)
                class_count_training[label] += 1
            elif class_count_test[label] < class_dict[label]["test_samples"]:
                hsi_samples_test.append(hsi_patch)
                labels_test.append(label)
                class_count_test[label] += 1

# Convert lists to arrays
training_hsi_samples = np.array(hsi_samples_training)
training_labels = np.array(labels_training)
test_hsi_samples = np.array(hsi_samples_test)
test_labels = np.array(labels_test)

print('training_hsi_samples shape:', training_hsi_samples.shape)
print('training_labels shape:', training_labels.shape)
print('test_hsi_samples shape:', test_hsi_samples.shape)
print('test_labels shape:', test_labels.shape)


training_hsi_samples shape: (3921, 7, 7, 103)
training_labels shape: (3921,)
test_hsi_samples shape: (37451, 7, 7, 103)
test_labels shape: (37451,)


In [None]:
hsi_train=training_hsi_samples
y_train=training_labels
hsi_test=test_hsi_samples
y_test=test_labels

In [None]:
import numpy as np
from scipy.ndimage import rotate

def augment_training_data(hsi_training_data,  training_labels, rotations=[45, 90, 135], flip_up_down=True, flip_left_right=True):
    augmented_hsi = []
    augmented_labels = []

    for hsi,label in zip(hsi_training_data,  training_labels):
        # Original data
        augmented_hsi.append(hsi)
        augmented_labels.append(label)

        # Rotations
        for angle in rotations:
            hsi_rotated = rotate(hsi, angle, axes=(0, 1), reshape=False, mode='nearest')
            augmented_hsi.append(hsi_rotated)
            augmented_labels.append(label)

        # Flip up-down
        if flip_up_down:
            hsi_flipped_ud = np.flipud(hsi)

            augmented_hsi.append(hsi_flipped_ud)
            augmented_labels.append(label)

        # Flip left-right
        if flip_left_right:
            hsi_flipped_lr = np.fliplr(hsi)

            augmented_hsi.append(hsi_flipped_lr)
            augmented_labels.append(label)

    return np.array(augmented_hsi), np.array(augmented_labels)

# Augmenting the training samples
augmented_hsi_training_samples,  augmented_training_labels = augment_training_data(hsi_train, y_train)

# Print shapes to verify the augmented training data
print('Augmented HSI training samples shape:', augmented_hsi_training_samples.shape)
print('Augmented training labels shape:', augmented_training_labels.shape)

Augmented HSI training samples shape: (23526, 7, 7, 103)
Augmented training labels shape: (23526,)


# 0.0 YAML
an overall architecture description in YAML format for a model adapted for hyperspectral image classification that includes patch embedding, spectral band processing, a bidirectional state space model block, and spatial feature processing:

#1.0  Model Building

In [None]:
# Configuration class
class Config:
    def __init__(self, in_channels, num_patches, kernel_size, patch_size, emb_size, dim, depth, heads, dim_head, mlp_ratio, num_classes, dropout, pos_emb_size, class_emb_size, stride, output_dim):  # Set default output_dim to 1
        self.in_channels = in_channels
        self.num_patches = num_patches
        self.kernel_size = kernel_size
        self.patch_size = patch_size
        self.emb_size = emb_size
        self.dim = dim
        self.depth = depth
        self.heads = heads
        self.dim_head = dim_head
        self.mlp_ratio = mlp_ratio
        self.num_classes = num_classes
        self.dropout = dropout
        self.pos_emb_size = pos_emb_size
        self.class_emb_size = class_emb_size
        self.stride = stride
        self.output_dim = output_dim  # Ensure output_dim is a part of the config

To incorporate the improvement points into the SpectralBandProcessing class for enhanced spectral band processing, including the use of attention mechanisms and other suggested improvements, the class can be extended with a spectral attention layer. This will allow the model to focus on the most informative spectral bands dynamically. Here is how we can integrate these improvements into the existing architecture:

In [None]:
# Version 2.0 This involves reversing the input tensor for the backward path before applying the backward_conv1d operation
import torch
import torch.nn as nn
import torch.nn.functional as F

class HSIVimBlock(nn.Module):
    def __init__(self, spatial_dim, num_bands, hidden_dim, output_dim, delta_param_init):
        super(HSIVimBlock, self).__init__()
        # Initialization with self.hidden_dim
        self.spatial_dim = spatial_dim
        self.num_bands = num_bands
        self.hidden_dim = hidden_dim

        # LayerNorm is now expecting a flattened feature vector of Bands*H*W elements
        self.norm = nn.LayerNorm(num_bands * spatial_dim * spatial_dim)

        # Adjust linear layers according to the new input dimension
        self.linear_x = nn.Linear(num_bands * spatial_dim * spatial_dim, hidden_dim)
        self.linear_z = nn.Linear(num_bands * spatial_dim * spatial_dim, hidden_dim)

        self.forward_conv1d = nn.Conv1d(in_channels=hidden_dim, out_channels=hidden_dim, kernel_size=3, padding=1)
        self.backward_conv1d = nn.Conv1d(in_channels=hidden_dim, out_channels=hidden_dim, kernel_size=3, padding=1)

        self.A = nn.Parameter(torch.randn(hidden_dim, hidden_dim))
        self.B = nn.Parameter(torch.randn(hidden_dim, hidden_dim))
        #self.C = nn.Parameter(torch.randn(output_dim, hidden_dim))
        self.delta_param = nn.Parameter(torch.full((hidden_dim,), delta_param_init))

        self.linear_forward = nn.Linear(hidden_dim, output_dim)
        self.linear_backward = nn.Linear(hidden_dim, output_dim)

    def forward(self, x):
        Batch, H, W, Bands = x.shape  # Correct shape extraction assuming [Batch, Height, Width, Bands]

        # Correctly reshape for LayerNorm to flatten all spatial and spectral information
        x = x.reshape(Batch, -1)  # New shape: [Batch, Bands*H*W]

        # Normalize across the flattened spatial-spectral data
        x = self.norm(x)

        # Projection to hidden dimensions
        x_proj = self.linear_x(x)
        z_proj = self.linear_z(x)

        # Ensure correct reshaping for Conv1d compatibility
        x_proj = x_proj.view(Batch, self.hidden_dim, -1)
        z_proj = z_proj.view(Batch, self.hidden_dim, -1)

        # Reverse z_proj for the backward path
        z_proj_reversed = torch.flip(z_proj, dims=[-1])

        # Bidirectional Conv1d processing using reversed input for the backward path
        x_forward = F.silu(self.forward_conv1d(x_proj))
        x_backward = F.silu(self.backward_conv1d(z_proj_reversed))

        # Apply delta parameter correctly
        delta_expanded = self.delta_param.unsqueeze(0).unsqueeze(2)  # Correct shape for broadcasting

        # SSM processing with delta applied, using the original and reversed inputs for forward and backward paths respectively
        forward_ssm_output = torch.tanh(self.forward_conv1d(x_proj) + self.A * delta_expanded)
        backward_ssm_output = torch.tanh(self.backward_conv1d(z_proj_reversed) + self.B * delta_expanded)

        # Combine forward and backward outputs into a single representation
        forward_reduced = forward_ssm_output.mean(dim=2)
        backward_reduced = backward_ssm_output.mean(dim=2)

        # Combine the reduced forward and backward paths
        y_forward = self.linear_forward(forward_reduced)
        y_backward = self.linear_backward(backward_reduced)

        # Element-wise sum of forward and backward outputs
        y_combined = y_forward + y_backward

        # Return the combined output
        return y_combined

In [None]:
# New version
import torch
import torch.nn as nn
import torch.nn.functional as F

class SpatialFeatureProcessing(nn.Module):
    def __init__(self, input_channels):
        super(SpatialFeatureProcessing, self).__init__()
        self.conv_layers = nn.Sequential(
            # First convolutional layer with dilation rate of 1 (standard convolution)
            nn.Conv2d(in_channels=input_channels, out_channels=256, kernel_size=(3, 3), padding=1, dilation=1),
            nn.ReLU(),
            nn.BatchNorm2d(256),
            # Second convolutional layer with a higher dilation rate to increase the receptive field
            nn.Conv2d(in_channels=256, out_channels=512, kernel_size=(3, 3), padding=2, dilation=2),  # Note the increased padding to maintain the spatial dimensions
            nn.ReLU(),
            nn.BatchNorm2d(512)
        )
        self.global_avg_pool = nn.AdaptiveAvgPool2d((1, 1))  # Adding global average pooling

    def forward(self, x):
        x = self.conv_layers(x)
        x = self.global_avg_pool(x)  # Apply global average pooling
        x = torch.flatten(x, start_dim=1)  # Flatten all dimensions except batch
        return x


In [None]:
class Classifier(nn.Module):
    def __init__(self, in_features, num_classes):
        super(Classifier, self).__init__()
        self.fc_layers = nn.Sequential(
            nn.Linear(in_features=in_features, out_features=1024),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(in_features=1024, out_features=num_classes),
        )

    def forward(self, x):
        x = self.fc_layers(x)
        # Remove softmax here if you're using a loss function that includes it, such as nn.CrossEntropyLoss
        return x


###1.4Integrated into Main Model

In [None]:
class HSIClassificationMambaModel(nn.Module):
    def __init__(self, spatial_dim, num_bands, hidden_dim, output_dim, delta_param_init, num_classes):
        super(HSIClassificationMambaModel, self).__init__()
        self.vim_block = HSIVimBlock(spatial_dim, num_bands, hidden_dim, output_dim, delta_param_init)
        self.output_dim = output_dim  # Save output_dim as an attribute of the class

        # Initialize SpatialFeatureProcessing and Classifier here
        # Adjusted to pass 'output_dim' as 'input_channels' to SpatialFeatureProcessing
        self.spatial_processing = SpatialFeatureProcessing(input_channels=output_dim)
        # Assuming the output of SpatialFeatureProcessing matches the in_features expected by Classifier
        self.classifier = Classifier(in_features=512, num_classes=num_classes)

    def forward(self, x):
        x = self.vim_block(x)
        # This is a placeholder. Actual reshaping depends on the output of HSIVimBlock and the input expectation of SpatialFeatureProcessing
        x = x.view(-1, self.output_dim, 1, 1)  # Reshape to include spatial dimensions if needed
        x = self.spatial_processing(x)

        # Flatten the output from spatial processing if it's not already flat
        x = torch.flatten(x, start_dim=1)

        x = self.classifier(x)
        return x


# Instance the Model

In [None]:

# Instantiate the model
model = HSIClassificationMambaModel(
    spatial_dim=7,
    num_bands=103,
    hidden_dim=256,
    output_dim=128,  # Make sure this matches the actual output of your HSIVimBlock
    delta_param_init=0.1,
    num_classes=9
)

# Print the model architecture
print(model)


HSIClassificationMambaModel(
  (vim_block): HSIVimBlock(
    (norm): LayerNorm((5047,), eps=1e-05, elementwise_affine=True)
    (linear_x): Linear(in_features=5047, out_features=256, bias=True)
    (linear_z): Linear(in_features=5047, out_features=256, bias=True)
    (forward_conv1d): Conv1d(256, 256, kernel_size=(3,), stride=(1,), padding=(1,))
    (backward_conv1d): Conv1d(256, 256, kernel_size=(3,), stride=(1,), padding=(1,))
    (linear_forward): Linear(in_features=256, out_features=128, bias=True)
    (linear_backward): Linear(in_features=256, out_features=128, bias=True)
  )
  (spatial_processing): SpatialFeatureProcessing(
    (conv_layers): Sequential(
      (0): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (1): ReLU()
      (2): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (3): Conv2d(256, 512, kernel_size=(3, 3), stride=(1, 1), padding=(2, 2), dilation=(2, 2))
      (4): ReLU()
      (5): BatchNorm2d(512, 

### Training Data Preparation

In [None]:
import numpy as np
import torch
from torch.utils.data import TensorDataset, DataLoader
from sklearn.model_selection import train_test_split

# Split the augmented training data into training, validationsets
X_train, X_val, y_train, y_val = train_test_split(
    augmented_hsi_training_samples, augmented_training_labels, test_size=0.1, random_state=42, stratify=augmented_training_labels
)
X_test=hsi_test
y_test=y_test

print('X_train shape:', X_train.shape)
print('X_train_val shape:', X_val.shape)
print('y_train shape:', y_train.shape)

print('X_test shape:', X_test.shape)
print('y_test shape:', y_test.shape)


# Convert the splitted datasets to tensor datasets
train_dataset = TensorDataset(torch.tensor(X_train.astype(np.float32)), torch.tensor(y_train).long())
val_dataset = TensorDataset(torch.tensor(X_val.astype(np.float32)), torch.tensor(y_val).long())
test_dataset = TensorDataset(torch.tensor(X_test.astype(np.float32)), torch.tensor(y_test).long())

# Create DataLoader instances for training, validation, and testing
batch_size = 32
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=4)
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False, num_workers=4)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False, num_workers=4)


X_train shape: (21173, 7, 7, 103)
X_train_val shape: (2353, 7, 7, 103)
y_train shape: (21173,)
X_test shape: (37451, 7, 7, 103)
y_test shape: (37451,)




In [None]:
# import torch
# from torch.utils.data import Dataset, DataLoader
# from torch.utils.data import DataLoader, random_split
# import numpy as np

# batch_size = 32

# # Training DataLoader
# train_dataloader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)

# # Validation DataLoader
# val_dataloader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)

# # Testing DataLoader
# test_dataloader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)


# 5.0 Training Model Memeory and Time calcualtion

In [None]:
import psutil
import os

# Function to get current process memory usage
def get_memory_usage():
    process = psutil.Process(os.getpid())
    return process.memory_info().rss / (1024 * 1024)  # Convert bytes to MB

initial_memory = get_memory_usage()
print(f"Initial Memory Usage: {initial_memory:.2f} MB")


In [None]:
# # Before the training loop, to record the initial memory usage (GPU)
# if torch.cuda.is_available():
#     torch.cuda.reset_peak_memory_stats()  # Reset peak memory stats at the start
#     initial_memory = torch.cuda.memory_allocated()
#     print(f"Initial Memory Allocated: {initial_memory / 1e6} MB")

### 5.1 Training Model

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
import copy
import time  # Step 1: Import the time module

model = HSIClassificationMambaModel(
    spatial_dim=7, num_bands=103, hidden_dim=256, output_dim=128, delta_param_init=0.01, num_classes=9
)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.00001)

epochs = 50
best_val_loss = float('inf')
best_model_wts = copy.deepcopy(model.state_dict())
patience = 10

start_time = time.time()  # Step 2: Record the start time

for epoch in range(epochs):
    model.train()
    running_train_loss = 0.0
    for inputs, labels in train_loader:
        optimizer.zero_grad()

        labels -= 1
        outputs = model(inputs)
        loss = criterion(outputs, labels)

        loss.backward()
        optimizer.step()

        running_train_loss += loss.item()

    epoch_train_loss = running_train_loss / len(train_loader.dataset)

    model.eval()
    val_running_loss = 0.0
    with torch.no_grad():
        for inputs, labels in val_loader:
            outputs = model(inputs)
            labels -= 1
            loss = criterion(outputs, labels)
            val_running_loss += loss.item()

        epoch_val_loss = val_running_loss / len(val_loader.dataset)

    print(f'Epoch [{epoch+1}/{epochs}], Train Loss: {epoch_train_loss:.4f}, Val Loss: {epoch_val_loss:.4f}')

    if epoch_val_loss < best_val_loss:
        print(f'Validation Loss Decreased({best_val_loss:.6f}--->{epoch_val_loss:.6f}) \t Saving The Model')
        best_val_loss = epoch_val_loss
        best_model_wts = copy.deepcopy(model.state_dict())
        no_improve_epochs = 0
    else:
        no_improve_epochs += 1

    if no_improve_epochs > patience:
        print('Early stopping!')
        model.load_state_dict(best_model_wts)
        break

end_time = time.time()  # Step 3: Record the end time
total_time = end_time - start_time  # Step 4: Calculate total training time

print(f'Finished training. Total training time: {total_time:.2f} seconds')  # Print the total training time


Epoch [1/50], Train Loss: 0.0310, Val Loss: 0.0176
Validation Loss Decreased(inf--->0.017630) 	 Saving The Model
Epoch [2/50], Train Loss: 0.0151, Val Loss: 0.0116
Validation Loss Decreased(0.017630--->0.011589) 	 Saving The Model
Epoch [3/50], Train Loss: 0.0101, Val Loss: 0.0094
Validation Loss Decreased(0.011589--->0.009418) 	 Saving The Model
Epoch [4/50], Train Loss: 0.0075, Val Loss: 0.0074
Validation Loss Decreased(0.009418--->0.007401) 	 Saving The Model
Epoch [5/50], Train Loss: 0.0060, Val Loss: 0.0076
Epoch [6/50], Train Loss: 0.0049, Val Loss: 0.0046
Validation Loss Decreased(0.007401--->0.004635) 	 Saving The Model
Epoch [7/50], Train Loss: 0.0043, Val Loss: 0.0053
Epoch [8/50], Train Loss: 0.0037, Val Loss: 0.0036
Validation Loss Decreased(0.004635--->0.003622) 	 Saving The Model
Epoch [9/50], Train Loss: 0.0033, Val Loss: 0.0031
Validation Loss Decreased(0.003622--->0.003075) 	 Saving The Model
Epoch [10/50], Train Loss: 0.0029, Val Loss: 0.0059
Epoch [11/50], Train Loss

### We kow we use GPU

In [None]:
# # Memory usage clacualtion
# final_memory = get_memory_usage()
# print(f"Final Memory Usage: {final_memory:.2f} MB")

# memory_used = final_memory - initial_memory
# print(f"Memory Used: {memory_used:.2f} MB")

In [None]:
## If GPU USed
# if torch.cuda.is_available():
#     final_memory = torch.cuda.memory_allocated()
#     peak_memory = torch.cuda.max_memory_allocated()
#     print(f"Final Memory Allocated: {final_memory / 1e6} MB")
#     print(f"Peak Memory Allocated During Training: {peak_memory / 1e6} MB")
#     memory_used = final_memory - initial_memory
#     print(f"Memory Used: {memory_used / 1e6} MB")


### Save the modle

In [None]:
# Assuming 'model' is your instance of HSIClassificationModel or any other model
# and it's been trained
torch.save(model.state_dict(),'PaviaU_P7GPU_ab_model_state_dict.pth')


### Claculte th test time

In [None]:
import torch
import numpy as np
from sklearn.metrics import confusion_matrix, accuracy_score, cohen_kappa_score
import time  # Import the time module for timing the test phase

# Assuming 'model' is your instance of HSIClassificationModel or any other model
# and it's been trained

# Save the model
model_save_path =  'PaviaU_P7GPU_ab_model_state_dict.pth'
torch.save(model.state_dict(), model_save_path)
print(f'Model saved to {model_save_path}')

# Load the model (make sure to initialize the model architecture first)
model.load_state_dict(torch.load(model_save_path))
model.to(device)

# Ensure the model is in evaluation mode
model.eval()

# Store predictions and actual labels
predictions = []
actual_labels = []

start_time = time.time()  # Start timing

with torch.no_grad():
    for hsi_patches, labels in test_loader:
        # Move data to the appropriate device
        hsi_patches = hsi_patches.to(device)
        labels -= 1  # Adjust labels if necessary

        # Forward pass
        outputs = model(hsi_patches)

        # Get predictions
        _, predicted = torch.max(outputs, 1)
        predictions.extend(predicted.cpu().numpy())
        actual_labels.extend(labels.cpu().numpy())

end_time = time.time()  # End timing
test_time = end_time - start_time  # Calculate the test time

# Optionally, calculate accuracy or other metrics using predictions and actual_labels

# Convert lists to NumPy arrays for easier manipulation
predictions_array = np.array(predictions)
actual_labels_array = np.array(actual_labels)

# Overall Accuracy
oa = accuracy_score(actual_labels_array, predictions_array)

# Confusion Matrix
cm = confusion_matrix(actual_labels_array, predictions_array)
# Calculate per-class accuracy from the confusion matrix
class_accuracy = cm.diagonal() / cm.sum(axis=1)
# Average Accuracy
aa = np.mean(class_accuracy)

# Kappa Coefficient
kappa = cohen_kappa_score(actual_labels_array, predictions_array)

print(f'Overall Accuracy (OA): {oa:.4f}')
print(f'Average Accuracy (AA): {aa:.4f}')
print(f'Kappa Coefficient: {kappa:.4f}')
print(f'Test time: {test_time:.2f} seconds')  # Print the test time


Model saved to PaviaU_P7GPU_ab_model_state_dict.pth
Overall Accuracy (OA): 0.9675
Average Accuracy (AA): 0.9701
Kappa Coefficient: 0.9563
Test time: 3.43 seconds


In [None]:
for i, acc in enumerate(class_accuracy): print(f'Class {i+1} Accuracy: {acc:.4f}')


Class 1 Accuracy: 0.9702
Class 2 Accuracy: 0.9686
Class 3 Accuracy: 0.9477
Class 4 Accuracy: 0.9908
Class 5 Accuracy: 1.0000
Class 6 Accuracy: 0.9789
Class 7 Accuracy: 0.9801
Class 8 Accuracy: 0.9141
Class 9 Accuracy: 0.9804
