In [2]:
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import models, transforms
from torch.utils.data import DataLoader
from PIL import Image
import os
from sklearn.preprocessing import LabelEncoder

# Custom Dataset for loading and transforming images
class CustomImageDataset(torch.utils.data.Dataset):
    def __init__(self, base_dir, subfolders, transform=None):
        self.base_dir = base_dir
        self.subfolders = subfolders
        self.transform = transform
        self.image_paths = []
        self.labels = []
        
        for subfolder in subfolders:
            folder_path = os.path.join(base_dir, subfolder)
            label = subfolder

            for img_name in os.listdir(folder_path):
                if img_name.lower().endswith(('.png', '.jpg', '.jpeg')):
                    img_path = os.path.join(folder_path, img_name)
                    self.image_paths.append(img_path)
                    self.labels.append(label)
        
        self.label_encoder = LabelEncoder()
        self.labels = self.label_encoder.fit_transform(self.labels)

    def __len__(self):
        return len(self.image_paths)

    def __getitem__(self, idx):
        img_path = self.image_paths[idx]
        image = Image.open(img_path).convert('RGB')
        
        if self.transform:
            image = self.transform(image)
        
        label = self.labels[idx]
        return image, label

# Define the LSTM model with a pretrained CNN for feature extraction
class PretrainedCNN_LSTM(nn.Module):
    def __init__(self, num_classes):
        super(PretrainedCNN_LSTM, self).__init__()
        
        # Pretrained CNN (ResNet18)
        self.cnn = models.resnet18(pretrained=True)
        self.cnn.fc = nn.Identity()  # Remove the fully connected layer
        
        # LSTM
        self.lstm = nn.LSTM(input_size=512, hidden_size=256, num_layers=2, batch_first=True)
        
        # Classification layer
        self.fc = nn.Linear(256, num_classes)
    
    def forward(self, x):
        # Feature extraction with CNN
        batch_size, time_steps, C, H, W = x.size()
        c_in = x.view(batch_size * time_steps, C, H, W)
        
        # Apply CNN to each image in the sequence
        cnn_features = self.cnn(c_in)
        cnn_features = cnn_features.view(batch_size, time_steps, -1)
        
        # Pass features through LSTM
        lstm_out, (hn, cn) = self.lstm(cnn_features)
        lstm_out = lstm_out[:, -1, :]  # Take the output from the last time step
        
        # Classification
        out = self.fc(lstm_out)
        return out

# Define transformations
transform = transforms.Compose([
    transforms.Resize((64, 64)),
    transforms.ToTensor(),
    transforms.Normalize([0.5], [0.5])
])

# Define dataset and dataloader
base_dir = r'DIAT-uSAT_dataset'
subfolders = [
    r"3_long_blade_rotor", 
    r"3_short_blade_rotor", 
    r"Bird", 
    r"Bird+mini-helicopter", 
    r"drone", 
    r"rc_plane"
]

# Initialize dataset and dataloader
dataset = CustomImageDataset(base_dir=base_dir, subfolders=subfolders, transform=transform)
dataloader = DataLoader(dataset, batch_size=32, shuffle=True)

# Initialize the model, loss, and optimizer
model = PretrainedCNN_LSTM(num_classes=6)  # 6 classes in the dataset
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.0002)

# Variables to track the best validation performance
best_loss = float('inf')
best_model_path = "best_model_CustomVGGWithAttentionattention.pt"

# Training loop
num_epochs = 10
for epoch in range(num_epochs):
    model.train()
    for images, labels in dataloader:
        # Reshape the images to [batch_size, time_steps, C, H, W]
        images = images.unsqueeze(1)  # Add time_steps dimension if needed

        # Forward pass
        outputs = model(images)
        loss = criterion(outputs, labels)

        # Backward pass and optimization
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

    # For demonstration, we assume `loss.item()` is the validation loss
    # Normally, you would compute this on a validation set

    # Check if current loss is the best so far
    if loss.item() < best_loss:
        best_loss = loss.item()
        torch.save(model.state_dict(), best_model_path)  # Save the model
        print(f"Model saved at epoch {epoch+1} with validation loss: {best_loss:.4f}")

    print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}')




Model saved at epoch 1 with validation loss: 0.4525
Epoch [1/10], Loss: 0.4525
Model saved at epoch 2 with validation loss: 0.3909
Epoch [2/10], Loss: 0.3909
Model saved at epoch 3 with validation loss: 0.1003
Epoch [3/10], Loss: 0.1003
Epoch [4/10], Loss: 0.1022
Model saved at epoch 5 with validation loss: 0.0183
Epoch [5/10], Loss: 0.0183
Epoch [6/10], Loss: 0.7165
Epoch [7/10], Loss: 0.0261
Epoch [8/10], Loss: 0.3348
Epoch [9/10], Loss: 0.0270
Epoch [10/10], Loss: 0.0190


In [3]:
import torch
from sklearn.metrics import accuracy_score
from torch.utils.data import DataLoader
from torchvision import transforms

# Function to evaluate the model on a test dataset
def evaluate_model(model, dataloader, device):
    model.eval()  # Set the model to evaluation mode
    true_labels = []
    pred_labels = []

    with torch.no_grad():  # Disable gradient calculation
        for images, labels in dataloader:
            images, labels = images.to(device), labels.to(device)

            # Add time_steps dimension if needed
            images = images.unsqueeze(1)  # Shape: [batch_size, time_steps, C, H, W]

            # Forward pass
            outputs = model(images)
            _, predicted = torch.max(outputs, 1)

            # Collect true and predicted labels
            true_labels.extend(labels.cpu().numpy())
            pred_labels.extend(predicted.cpu().numpy())

    # Calculate accuracy
    accuracy = accuracy_score(true_labels, pred_labels)
    return accuracy

# Set up for testing
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Load the saved model
model = PretrainedCNN_LSTM(num_classes=6)
model.load_state_dict(torch.load("best_model_CustomVGGWithAttentionattention.pt"))
model.to(device)

# Define the test dataset and DataLoader (similar to the training set)
test_dataset = CustomImageDataset(base_dir=base_dir, subfolders=subfolders, transform=transform)
test_dataloader = DataLoader(test_dataset, batch_size=32, shuffle=False)

# Evaluate the model and print accuracy
accuracy = evaluate_model(model, test_dataloader, device)
print(f'Test Accuracy: {accuracy * 100:.2f}%')


  model.load_state_dict(torch.load("best_model_CustomVGGWithAttentionattention.pt"))


Test Accuracy: 98.02%
