In [74]:
import os
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision.transforms as transforms
from torch.utils.data import DataLoader, Dataset
from sklearn.preprocessing import LabelEncoder
import matplotlib.pyplot as plt
from PIL import Image


In [75]:
class CustomImageDataset(Dataset):
    def __init__(self, base_dir, subfolders, transform=None):
        self.base_dir = base_dir
        self.subfolders = subfolders
        self.transform = transform
        self.image_paths = []
        self.labels = []
        
        for subfolder in subfolders:
            folder_path = os.path.join(base_dir, subfolder)
            label = subfolder

            for img_name in os.listdir(folder_path):
                if img_name.lower().endswith(('.png', '.jpg', '.jpeg')):
                    img_path = os.path.join(folder_path, img_name)
                    self.image_paths.append(img_path)
                    self.labels.append(label)
        
        self.label_encoder = LabelEncoder()
        self.labels = self.label_encoder.fit_transform(self.labels)

    def __len__(self):
        return len(self.image_paths)

    def __getitem__(self, idx):
        img_path = self.image_paths[idx]
        image = Image.open(img_path).convert('RGB')
        
        if self.transform:
            image = self.transform(image)
        
        label = self.labels[idx]
        return image, label


In [76]:
# Define transformations for data augmentation and normalization
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.RandomHorizontalFlip(),
    transforms.ColorJitter(brightness=0.2, contrast=0.2),
    transforms.ToTensor(),  
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])  # Normalize with ImageNet stats
])

# Define base directory and subfolders for different classes
base_dir = r'DIAT-uSAT_dataset'
subfolders = [
    r"3_long_blade_rotor", 
    r"3_short_blade_rotor", 
    r"Bird", 
    r"Bird+mini-helicopter", 
    r"drone", 
    r"rc_plane", 
]

# Hyperparameters
input_size = 3 * 224 * 224  # 3 channels, image size 224x224
hidden_size = 128
num_layers = 2
num_classes = len(subfolders)
learning_rate = 0.001
batch_size = 32
num_epochs = 50
best_val_accuracy = 0
best_model_path = 'best_model_CustomVGGWithAttentionattention.pt'


In [77]:
# Create the dataset and dataloaders
dataset = CustomImageDataset(base_dir, subfolders, transform=transform)

# Split dataset into training and validation sets
train_size = int(0.8 * len(dataset))
val_size = len(dataset) - train_size
train_dataset, val_dataset = torch.utils.data.random_split(dataset, [train_size, val_size])

# DataLoader for batching
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)


In [78]:
class LSTMImageClassifier(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, num_classes):
        super(LSTMImageClassifier, self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        
        # LSTM layer (input_size will be the flattened size of image)
        self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True)
        
        # Fully connected layer
        self.fc = nn.Linear(hidden_size, num_classes)
    
    def forward(self, x):
        # Initialize hidden and cell state
        h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(x.device)
        c0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(x.device)
        
        # Forward propagate the LSTM
        out, _ = self.lstm(x, (h0, c0))
        
        # Take the output of the last time step
        out = out[:, -1, :]
        
        # Fully connected layer
        out = self.fc(out)
        return out


In [79]:
# Initialize device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Initialize the model
model = LSTMImageClassifier(input_size=input_size, hidden_size=hidden_size, num_layers=num_layers, num_classes=num_classes).to(device)

# Define loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)


In [80]:
# Training loop
for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0
    for imgs, labels in train_loader:
        imgs, labels = imgs.to(device), labels.to(device)
        
        # Flatten the image from (batch_size, channels, height, width) to (batch_size, 1, input_size)
        imgs = imgs.view(imgs.size(0), 1, -1)
        
        # Forward pass
        outputs = model(imgs)
        loss = criterion(outputs, labels)
        
        # Backward pass and optimization
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        running_loss += loss.item()

    print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {running_loss/len(train_loader):.4f}')

    # Validation after each epoch
    model.eval()
    val_correct = 0
    val_total = 0
    with torch.no_grad():
        for imgs, labels in val_loader:
            imgs, labels = imgs.to(device), labels.to(device)
            imgs = imgs.view(imgs.size(0), 1, -1)  # Flatten for validation
            
            outputs = model(imgs)
            _, predicted = torch.max(outputs.data, 1)
            val_total += labels.size(0)
            val_correct += (predicted == labels).sum().item()

    val_accuracy = 100 * val_correct / val_total
    print(f'Validation Accuracy after Epoch {epoch+1}: {val_accuracy:.2f}%')

    # Save the best model if validation accuracy improves
    if val_accuracy > best_val_accuracy:
        best_val_accuracy = val_accuracy
        torch.save(model.state_dict(), best_model_path)


Epoch [1/50], Loss: 1.7197
Validation Accuracy after Epoch 1: 29.59%
Epoch [2/50], Loss: 1.5413
Validation Accuracy after Epoch 2: 33.81%
Epoch [3/50], Loss: 1.5228
Validation Accuracy after Epoch 3: 34.74%
Epoch [4/50], Loss: 1.4690
Validation Accuracy after Epoch 4: 31.86%
Epoch [5/50], Loss: 1.4741
Validation Accuracy after Epoch 5: 31.55%
Epoch [6/50], Loss: 1.4963
Validation Accuracy after Epoch 6: 28.97%
Epoch [7/50], Loss: 1.5950
Validation Accuracy after Epoch 7: 25.15%
Epoch [8/50], Loss: 1.5778
Validation Accuracy after Epoch 8: 30.82%
Epoch [9/50], Loss: 1.5524
Validation Accuracy after Epoch 9: 31.44%
Epoch [10/50], Loss: 1.5683
Validation Accuracy after Epoch 10: 30.52%
Epoch [11/50], Loss: 1.5321
Validation Accuracy after Epoch 11: 26.91%
Epoch [12/50], Loss: 1.5207
Validation Accuracy after Epoch 12: 30.62%
Epoch [13/50], Loss: 1.5247
Validation Accuracy after Epoch 13: 31.96%
Epoch [14/50], Loss: 1.4937
Validation Accuracy after Epoch 14: 33.40%
Epoch [15/50], Loss: 1.5

In [None]:
# Load the best saved model
model = LSTMImageClassifier(input_size=input_size, hidden_size=hidden_size, num_layers=num_layers, num_classes=num_classes).to(device)
model.load_state_dict(torch.load(best_model_path))
model.eval()  # Set model to evaluation mode


In [None]:
def test_model(test_loader, model):
    model.eval()  # Set model to evaluation mode
    test_correct = 0
    test_total = 0
    all_preds = []
    all_labels = []

    with torch.no_grad():
        for imgs, labels in test_loader:
            imgs, labels = imgs.to(device), labels.to(device)
            
            # Flatten the image from (batch_size, channels, height, width) to (batch_size, 1, input_size)
            imgs = imgs.view(imgs.size(0), 1, -1)
            
            # Forward pass through the model
            outputs = model(imgs)
            _, predicted = torch.max(outputs.data, 1)
            
            test_total += labels.size(0)
            test_correct += (predicted == labels).sum().item()

            all_preds.extend(predicted.cpu().numpy())
            all_labels.extend(labels.cpu().numpy())

    test_accuracy = 100 * test_correct / test_total
    print(f'Test Accuracy: {test_accuracy:.2f}%')
    
    return all_preds, all_labels


In [None]:
# Assuming you have a test_loader similar to train_loader
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

# Test the model
all_preds, all_labels = test_model(test_loader, model)


In [None]:
from sklearn.metrics import classification_report, confusion_matrix
import seaborn as sns

# Print classification report
print(classification_report(all_labels, all_preds, target_names=subfolders))

# Confusion matrix
conf_matrix = confusion_matrix(all_labels, all_preds)

# Plot the confusion matrix
plt.figure(figsize=(8, 6))
sns.heatmap(conf_matrix, annot=True, fmt="d", cmap="Blues", xticklabels=subfolders, yticklabels=subfolders)
plt.xlabel("Predicted")
plt.ylabel("True")
plt.title("Confusion Matrix")
plt.show()


In [None]:
import torch
from sklearn.metrics import accuracy_score

# Function to test the model and display accuracy
def test_model(test_loader, model):
    model.eval()  # Set the model to evaluation mode
    all_preds = []
    all_labels = []

    with torch.no_grad():  # Disable gradient tracking
        for imgs, labels in test_loader:
            imgs, labels = imgs.to(device), labels.to(device)

            # Forward pass through the model
            outputs = model(imgs)
            _, predicted = torch.max(outputs.data, 1)

            # Store predictions and labels
            all_preds.extend(predicted.cpu().numpy())
            all_labels.extend(labels.cpu().numpy())

    # Calculate accuracy
    accuracy = accuracy_score(all_labels, all_preds) * 100
    print(f'Test Accuracy: {accuracy:.2f}%')

# Load the best saved model
model = LSTMImageClassifier(input_size=input_size, hidden_size=hidden_size, num_layers=num_layers, num_classes=num_classes).to(device)
model.load_state_dict(torch.load(best_model_path))
model.eval()  # Set model to evaluation mode

# Assuming you have a test_loader defined
test_model(test_loader, model)
