<a href="https://colab.research.google.com/github/Shilpa2190/Aerial_Object_Classification/blob/main/Aerial_Classification.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
import os
from torchvision import transforms, datasets
import torch
import torch.nn as nn
import torchvision.models as models
from torch.utils.data import DataLoader

In [None]:
data_dir = r"/content/drive/MyDrive/Aerial_Classification"
train_dir = r"/content/drive/MyDrive/Aerial_Classification/classification_dataset/train"
test_dir =  r"/content/drive/MyDrive/Aerial_Classification/classification_dataset/test"
valid_dir = r"/content/drive/MyDrive/Aerial_Classification/classification_dataset/valid"

In [None]:
# Choose image size: 224 is standard for ResNet/MobileNet
image_size = 224
batch_size = 32   # reduce to 16 or 8 if you OOM

train_transforms = transforms.Compose([
    transforms.RandomResizedCrop(image_size),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize([0.485,0.456,0.406], [0.229,0.224,0.225])
])

val_test_transforms = transforms.Compose([
    transforms.Resize(int(image_size*1.14)),
    transforms.CenterCrop(image_size),
    transforms.ToTensor(),
    transforms.Normalize([0.485,0.456,0.406], [0.229,0.224,0.225])
])

train_ds = datasets.ImageFolder(train_dir, transform=train_transforms)
test_ds  = datasets.ImageFolder(test_dir,  transform=val_test_transforms)
valid_ds = datasets.ImageFolder(valid_dir, transform=val_test_transforms)

train_loader = DataLoader(train_ds, batch_size=batch_size, shuffle=True, num_workers=2, pin_memory=True)
valid_loader = DataLoader(valid_ds, batch_size=batch_size, shuffle=False, num_workers=2, pin_memory=True)
test_loader  = DataLoader(test_ds,  batch_size=batch_size, shuffle=False, num_workers=2, pin_memory=True)

print("Classes:", train_ds.classes)
print("Train images:", len(train_ds))
print("Valid images:", len(valid_ds))
print("Test images:", len(test_ds))

Classes: ['bird', 'drone']
Train images: 2675
Valid images: 442
Test images: 215


In [None]:
import torch.nn as nn
import torch.nn.functional as F

class CustomCNN(nn.Module):
  def __init__(self, num_classes=2):
    super(CustomCNN, self).__init__()

    self.conv1 = nn.Conv2d(3, 32, kernel_size=3, stride=1, padding=1)
    self.bn1 = nn.BatchNorm2d(32)
    self.pool = nn.MaxPool2d(2, 2)

    self.conv2 = nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=1)
    self.bn2 = nn.BatchNorm2d(64)

    self.conv3 = nn.Conv2d(64, 128, kernel_size=3, stride=1, padding=1)
    self.bn3 = nn.BatchNorm2d(128)

    self.fc1 = nn.Linear(128 * 28 * 28, 256)
    self.dropout = nn.Dropout(0.5)
    self.fc2 = nn.Linear(256, num_classes)

  def forward(self, x):
    x = self.pool(F.relu(self.bn1(self.conv1(x))))
    x = self.pool(F.relu(self.bn2(self.conv2(x))))
    x = self.pool(F.relu(self.bn3(self.conv3(x))))

    x = x.view(x.size(0), -1)
    x = F.relu(self.fc1(x))
    x = self.dropout(x)
    x = self.fc2(x)
    return x

In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

num_classes = len(train_ds.classes)
model = CustomCNN(num_classes=num_classes).to(device)

print(model)

CustomCNN(
  (conv1): Conv2d(3, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (bn1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (pool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (conv2): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (conv3): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (bn3): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (fc1): Linear(in_features=100352, out_features=256, bias=True)
  (dropout): Dropout(p=0.5, inplace=False)
  (fc2): Linear(in_features=256, out_features=2, bias=True)
)


In [None]:
import torch.optim as optim

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)
scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=5, gamma=0.5)

In [None]:
import time
from tqdm import tqdm

def train_model(model, train_loader, valid_loader, criterion, optimizer, scheduler, device, num_epochs=10):
    best_acc = 0.0
    best_model_wts = model.state_dict()

    for epoch in range(num_epochs):
        print(f"\nEpoch {epoch+1}/{num_epochs}")
        start_time = time.time()

        # Train phase
        model.train()
        running_loss, running_corrects = 0.0, 0

        for inputs, labels in tqdm(train_loader, desc="Train"):
            inputs, labels = inputs.to(device), labels.to(device)

            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            _, preds = torch.max(outputs, 1)
            running_loss += loss.item() * inputs.size(0)
            running_corrects += torch.sum(preds == labels.data)

        epoch_loss = running_loss / len(train_loader.dataset)
        epoch_acc = running_corrects.double() / len(train_loader.dataset)

        # Validation phase
        model.eval()
        val_loss, val_corrects = 0.0, 0

        with torch.no_grad():
            for inputs, labels in tqdm(valid_loader, desc="Valid"):
                inputs, labels = inputs.to(device), labels.to(device)
                outputs = model(inputs)
                loss = criterion(outputs, labels)
                _, preds = torch.max(outputs, 1)
                val_loss += loss.item() * inputs.size(0)
                val_corrects += torch.sum(preds == labels.data)

        val_loss /= len(valid_loader.dataset)
        val_acc = val_corrects.double() / len(valid_loader.dataset)

        print(f"Train Loss: {epoch_loss:.4f} Acc: {epoch_acc:.4f} | Val Loss: {val_loss:.4f} Acc: {val_acc:.4f} | Time: {time.time()-start_time:.1f}s")

        if val_acc > best_acc:
            best_acc = val_acc
            best_model_wts = model.state_dict()
            torch.save(best_model_wts, "best_customcnn.pth")
            print("Saved best model.")

        scheduler.step()

    model.load_state_dict(best_model_wts)
    print(f"Best Validation Accuracy: {best_acc:.4f}")
    return model


In [None]:
model = train_model(model, train_loader, valid_loader, criterion, optimizer, scheduler, device, num_epochs=10)


Epoch 1/10


Train: 100%|██████████| 84/84 [03:50<00:00,  2.74s/it]
Valid: 100%|██████████| 14/14 [01:17<00:00,  5.57s/it]


Train Loss: 2.8731 Acc: 0.6404 | Val Loss: 0.5768 Acc: 0.7104 | Time: 308.5s
Saved best model.

Epoch 2/10


Train: 100%|██████████| 84/84 [00:17<00:00,  4.81it/s]
Valid: 100%|██████████| 14/14 [00:03<00:00,  3.84it/s]


Train Loss: 0.5894 Acc: 0.6890 | Val Loss: 0.5486 Acc: 0.6968 | Time: 21.1s

Epoch 3/10


Train: 100%|██████████| 84/84 [00:16<00:00,  4.98it/s]
Valid: 100%|██████████| 14/14 [00:02<00:00,  5.04it/s]


Train Loss: 0.5413 Acc: 0.7110 | Val Loss: 0.5102 Acc: 0.7443 | Time: 19.7s
Saved best model.

Epoch 4/10


Train: 100%|██████████| 84/84 [00:17<00:00,  4.73it/s]
Valid: 100%|██████████| 14/14 [00:03<00:00,  3.72it/s]


Train Loss: 0.5551 Acc: 0.7204 | Val Loss: 0.5203 Acc: 0.7262 | Time: 21.5s

Epoch 5/10


Train: 100%|██████████| 84/84 [00:17<00:00,  4.93it/s]
Valid: 100%|██████████| 14/14 [00:02<00:00,  5.22it/s]


Train Loss: 0.5564 Acc: 0.7204 | Val Loss: 0.5060 Acc: 0.7398 | Time: 19.7s

Epoch 6/10


Train: 100%|██████████| 84/84 [00:17<00:00,  4.94it/s]
Valid: 100%|██████████| 14/14 [00:03<00:00,  3.61it/s]


Train Loss: 0.5010 Acc: 0.7548 | Val Loss: 0.4945 Acc: 0.7534 | Time: 20.9s
Saved best model.

Epoch 7/10


Train: 100%|██████████| 84/84 [00:17<00:00,  4.80it/s]
Valid: 100%|██████████| 14/14 [00:02<00:00,  5.34it/s]


Train Loss: 0.4944 Acc: 0.7649 | Val Loss: 0.6440 Acc: 0.6968 | Time: 20.1s

Epoch 8/10


Train: 100%|██████████| 84/84 [00:16<00:00,  4.99it/s]
Valid: 100%|██████████| 14/14 [00:03<00:00,  4.01it/s]


Train Loss: 0.5027 Acc: 0.7551 | Val Loss: 0.4452 Acc: 0.8100 | Time: 20.3s
Saved best model.

Epoch 9/10


Train: 100%|██████████| 84/84 [00:17<00:00,  4.79it/s]
Valid: 100%|██████████| 14/14 [00:02<00:00,  5.27it/s]


Train Loss: 0.4784 Acc: 0.7634 | Val Loss: 0.5310 Acc: 0.7783 | Time: 20.2s

Epoch 10/10


Train: 100%|██████████| 84/84 [00:17<00:00,  4.93it/s]
Valid: 100%|██████████| 14/14 [00:03<00:00,  4.17it/s]

Train Loss: 0.4824 Acc: 0.7705 | Val Loss: 0.5128 Acc: 0.7828 | Time: 20.4s
Best Validation Accuracy: 0.8100





In [None]:
# Test Accuracy

model.eval()
test_corrects, total = 0, 0
with torch.no_grad():
    for inputs, labels in test_loader:
        inputs, labels = inputs.to(device), labels.to(device)
        outputs = model(inputs)
        _, preds = torch.max(outputs, 1)
        test_corrects += torch.sum(preds == labels.data)
        total += labels.size(0)

print(f"Test Accuracy: {test_corrects.double() / total:.4f}")

Test Accuracy: 0.8233


In [None]:
# Model Setup (Tranfer Learning)
# ResNet

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

num_classes = len(train_ds.classes)  # should be 2
print("num_classes:", num_classes)

model = models.resnet18(pretrained=True)   # fast and small
# Option A: freeze feature layers for quick training (uncomment if desired)
for param in model.parameters():
    param.requires_grad = False

# Replace final fully connected layer
in_features = model.fc.in_features
model.fc = nn.Linear(in_features, num_classes)

# Only parameters of final layer will be updated (if above freeze used)
model = model.to(device)
print(model)

num_classes: 2
Downloading: "https://download.pytorch.org/models/resnet18-f37072fd.pth" to /root/.cache/torch/hub/checkpoints/resnet18-f37072fd.pth


100%|██████████| 44.7M/44.7M [00:00<00:00, 172MB/s]


ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (1): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
  

In [None]:
import torch.optim as optim

criterion = nn.CrossEntropyLoss()
# only params with requires_grad=True are updated
optimizer = optim.Adam(filter(lambda p: p.requires_grad, model.parameters()), lr=1e-4)
scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=7, gamma=0.1)

In [None]:
from copy import deepcopy

def train_model(model, train_loader, valid_loader, criterion, optimizer, scheduler, device, num_epochs=10):
    best_model_wts = deepcopy(model.state_dict())
    best_acc = 0.0

    for epoch in range(num_epochs):
        print(f"Epoch {epoch+1}/{num_epochs}")
        t0 = time.time()

        # Training phase
        model.train()
        running_loss = 0.0
        running_corrects = 0

        for inputs, labels in tqdm(train_loader, desc="Train", leave=False):
            inputs = inputs.to(device, non_blocking=True)
            labels = labels.to(device, non_blocking=True)

            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            preds = outputs.argmax(dim=1)
            running_loss += loss.item() * inputs.size(0)
            running_corrects += torch.sum(preds == labels.data).item()

        epoch_loss = running_loss / len(train_loader.dataset)
        epoch_acc  = running_corrects / len(train_loader.dataset)

        # Validation phase
        model.eval()
        val_running_loss = 0.0
        val_running_corrects = 0

        with torch.no_grad():
            for inputs, labels in tqdm(valid_loader, desc="Valid", leave=False):
                inputs = inputs.to(device, non_blocking=True)
                labels = labels.to(device, non_blocking=True)
                outputs = model(inputs)
                loss = criterion(outputs, labels)
                preds = outputs.argmax(dim=1)
                val_running_loss += loss.item() * inputs.size(0)
                val_running_corrects += torch.sum(preds == labels.data).item()

        val_loss = val_running_loss / len(valid_loader.dataset)
        val_acc  = val_running_corrects / len(valid_loader.dataset)

        print(f"train_loss: {epoch_loss:.4f} train_acc: {epoch_acc:.4f} | val_loss: {val_loss:.4f} val_acc: {val_acc:.4f} | time: {time.time()-t0:.1f}s")

        # deep copy best
        if val_acc > best_acc:
            best_acc = val_acc
            best_model_wts = deepcopy(model.state_dict())
            torch.save(best_model_wts, "best_model.pth")
            print("Saved best_model.pth")

        scheduler.step()

    print(f"Best val acc: {best_acc:.4f}")
    model.load_state_dict(best_model_wts)
    return model

In [None]:
# STEP 5: evaluate on test set
import numpy as np
model.eval()
correct = 0
total = 0
all_preds, all_labels = [], []

with torch.no_grad():
    for inputs, labels in test_loader:
        inputs = inputs.to(device, non_blocking=True)
        labels = labels.to(device, non_blocking=True)
        outputs = model(inputs)
        preds = outputs.argmax(dim=1)
        correct += (preds == labels).sum().item()
        total += labels.size(0)
        all_preds.extend(preds.cpu().numpy().tolist())
        all_labels.extend(labels.cpu().numpy().tolist())

print("Test accuracy:", correct / total)

# Optional: confusion matrix using sklearn
try:
    from sklearn.metrics import classification_report, confusion_matrix
    print(classification_report(all_labels, all_preds, target_names=train_ds.classes))
    print("Confusion matrix:")
    print(confusion_matrix(all_labels, all_preds))
except Exception as e:
    print("sklearn not installed or error:", e)

Test accuracy: 0.44651162790697674
              precision    recall  f1-score   support

        bird       0.51      0.41      0.46       121
       drone       0.39      0.49      0.44        94

    accuracy                           0.45       215
   macro avg       0.45      0.45      0.45       215
weighted avg       0.46      0.45      0.45       215

Confusion matrix:
[[50 71]
 [48 46]]
