In [None]:
import torch
print(torch.__file__)
print(torch.__version__)

c:\Users\HAI\AppData\Local\Programs\Python\Python310\lib\site-packages\torch\__init__.py
2.8.0+cu128


In [1]:
import torch
print("Torch version:", torch.__version__)
print("CUDA available:", torch.cuda.is_available())
if torch.cuda.is_available():
    print("CUDA version (PyTorch build):", torch.version.cuda)
    print("GPU name:", torch.cuda.get_device_name(0))


Torch version: 2.8.0+cu128
CUDA available: True
CUDA version (PyTorch build): 12.8
GPU name: Quadro P2000


# **Count of images per class**

In [4]:
import os

data_dir = r"C:\Users\HAI\GUVI\Aerial_object\classification_dataset"  # change path if needed
for split in ["train", "valid", "test"]:
    path = os.path.join(data_dir, split)
    print(f"\n{split.upper()} set:")
    for cls in os.listdir(path):
        cls_path = os.path.join(path, cls)
        if os.path.isdir(cls_path):
            print(f" - {cls}: {len(os.listdir(cls_path))} images")



TRAIN set:
 - bird: 1414 images
 - drone: 1248 images

VALID set:
 - bird: 217 images
 - drone: 225 images

TEST set:
 - bird: 121 images
 - drone: 94 images


# **Datasets & DataLoaders**

In [6]:
import os
from torchvision import transforms, datasets
import torch
from torch.utils.data import DataLoader

data_dir = r"C:\Users\HAI\GUVI\Aerial_object\classification_dataset"   # change if needed
train_dir = os.path.join(data_dir, "train")
valid_dir = os.path.join(data_dir, "valid")
test_dir  = os.path.join(data_dir, "test")

# Choose image size: 224 is standard for ResNet/MobileNet
image_size = 224
batch_size = 32   # reduce to 16 or 8 if you OOM

train_transforms = transforms.Compose([
    transforms.RandomResizedCrop(image_size),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize([0.485,0.456,0.406], [0.229,0.224,0.225])
])

val_test_transforms = transforms.Compose([
    transforms.Resize(int(image_size*1.14)),
    transforms.CenterCrop(image_size),
    transforms.ToTensor(),
    transforms.Normalize([0.485,0.456,0.406], [0.229,0.224,0.225])
])

train_ds = datasets.ImageFolder(train_dir, transform=train_transforms)
valid_ds = datasets.ImageFolder(valid_dir, transform=val_test_transforms)
test_ds  = datasets.ImageFolder(test_dir,  transform=val_test_transforms)

train_loader = DataLoader(train_ds, batch_size=batch_size, shuffle=True, num_workers=2, pin_memory=True)
valid_loader = DataLoader(valid_ds, batch_size=batch_size, shuffle=False, num_workers=2, pin_memory=True)
test_loader  = DataLoader(test_ds,  batch_size=batch_size, shuffle=False, num_workers=2, pin_memory=True)

print("Classes:", train_ds.classes)
print("Train images:", len(train_ds))
print("Valid images:", len(valid_ds))
print("Test images:", len(test_ds))


Classes: ['bird', 'drone']
Train images: 2662
Valid images: 442
Test images: 215


# **CNN Architecture**

In [7]:
import torch
import torch.nn as nn
import torch.nn.functional as F

class CustomCNN(nn.Module):
    def __init__(self, num_classes=2):
        super(CustomCNN, self).__init__()
        
        self.conv1 = nn.Conv2d(3, 32, kernel_size=3, stride=1, padding=1)
        self.bn1   = nn.BatchNorm2d(32)
        self.pool  = nn.MaxPool2d(2, 2)
        
        self.conv2 = nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=1)
        self.bn2   = nn.BatchNorm2d(64)
        
        self.conv3 = nn.Conv2d(64, 128, kernel_size=3, stride=1, padding=1)
        self.bn3   = nn.BatchNorm2d(128)
        
        self.fc1   = nn.Linear(128 * 28 * 28, 256)  # adjust based on input size
        self.dropout = nn.Dropout(0.5)
        self.fc2   = nn.Linear(256, num_classes)

    def forward(self, x):
        x = self.pool(F.relu(self.bn1(self.conv1(x))))  # -> [B, 32, H/2, W/2]
        x = self.pool(F.relu(self.bn2(self.conv2(x))))  # -> [B, 64, H/4, W/4]
        x = self.pool(F.relu(self.bn3(self.conv3(x))))  # -> [B, 128, H/8, W/8]
        
        x = x.view(x.size(0), -1)  # flatten
        x = F.relu(self.fc1(x))
        x = self.dropout(x)
        x = self.fc2(x)
        return x


# **Instantiate the Model on GPU**

In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

num_classes = len(train_ds.classes) 
model = CustomCNN(num_classes=num_classes).to(device)

print(model)

CustomCNN(
  (conv1): Conv2d(3, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (bn1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (pool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (conv2): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (conv3): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (bn3): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (fc1): Linear(in_features=100352, out_features=256, bias=True)
  (dropout): Dropout(p=0.5, inplace=False)
  (fc2): Linear(in_features=256, out_features=2, bias=True)
)


In [None]:
import torch.optim as optim

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)
scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=5, gamma=0.5)

# **Training Loop (CUDA enabled)**

In [11]:
import time
from tqdm import tqdm

def train_model(model, train_loader, valid_loader, criterion, optimizer, scheduler, device, num_epochs=10):
    best_acc = 0.0
    best_model_wts = model.state_dict()

    for epoch in range(num_epochs):
        print(f"\nEpoch {epoch+1}/{num_epochs}")
        start_time = time.time()

        # Train phase
        model.train()
        running_loss, running_corrects = 0.0, 0

        for inputs, labels in tqdm(train_loader, desc="Train"):
            inputs, labels = inputs.to(device), labels.to(device)

            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            _, preds = torch.max(outputs, 1)
            running_loss += loss.item() * inputs.size(0)
            running_corrects += torch.sum(preds == labels.data)

        epoch_loss = running_loss / len(train_loader.dataset)
        epoch_acc = running_corrects.double() / len(train_loader.dataset)

        # Validation phase
        model.eval()
        val_loss, val_corrects = 0.0, 0

        with torch.no_grad():
            for inputs, labels in tqdm(valid_loader, desc="Valid"):
                inputs, labels = inputs.to(device), labels.to(device)
                outputs = model(inputs)
                loss = criterion(outputs, labels)
                _, preds = torch.max(outputs, 1)
                val_loss += loss.item() * inputs.size(0)
                val_corrects += torch.sum(preds == labels.data)

        val_loss /= len(valid_loader.dataset)
        val_acc = val_corrects.double() / len(valid_loader.dataset)

        print(f"Train Loss: {epoch_loss:.4f} Acc: {epoch_acc:.4f} | Val Loss: {val_loss:.4f} Acc: {val_acc:.4f} | Time: {time.time()-start_time:.1f}s")

        if val_acc > best_acc:
            best_acc = val_acc
            best_model_wts = model.state_dict()
            torch.save(best_model_wts, "best_customcnn.pth")
            print("Saved best model.")

        scheduler.step()

    model.load_state_dict(best_model_wts)
    print(f"Best Validation Accuracy: {best_acc:.4f}")
    return model


# **Run Training**

In [15]:
model = train_model(model, train_loader, valid_loader, criterion, optimizer, scheduler, device, num_epochs=10)


Epoch 1/10


Train: 100%|██████████| 84/84 [00:26<00:00,  3.17it/s]
Valid: 100%|██████████| 14/14 [00:06<00:00,  2.17it/s]


Train Loss: 0.4099 Acc: 0.8208 | Val Loss: 0.4198 Acc: 0.8145 | Time: 32.9s
Saved best model.

Epoch 2/10


Train: 100%|██████████| 84/84 [00:25<00:00,  3.23it/s]
Valid: 100%|██████████| 14/14 [00:06<00:00,  2.10it/s]


Train Loss: 0.4137 Acc: 0.8163 | Val Loss: 0.4241 Acc: 0.8145 | Time: 32.7s

Epoch 3/10


Train: 100%|██████████| 84/84 [00:26<00:00,  3.17it/s]
Valid: 100%|██████████| 14/14 [00:06<00:00,  2.19it/s]


Train Loss: 0.4130 Acc: 0.8182 | Val Loss: 0.4160 Acc: 0.8077 | Time: 32.9s

Epoch 4/10


Train: 100%|██████████| 84/84 [00:26<00:00,  3.20it/s]
Valid: 100%|██████████| 14/14 [00:06<00:00,  2.08it/s]


Train Loss: 0.4121 Acc: 0.8129 | Val Loss: 0.4250 Acc: 0.8235 | Time: 33.0s
Saved best model.

Epoch 5/10


Train: 100%|██████████| 84/84 [00:26<00:00,  3.20it/s]
Valid: 100%|██████████| 14/14 [00:06<00:00,  2.16it/s]


Train Loss: 0.4032 Acc: 0.8212 | Val Loss: 0.4153 Acc: 0.8167 | Time: 32.8s

Epoch 6/10


Train: 100%|██████████| 84/84 [00:26<00:00,  3.14it/s]
Valid: 100%|██████████| 14/14 [00:06<00:00,  2.10it/s]


Train Loss: 0.3990 Acc: 0.8246 | Val Loss: 0.4346 Acc: 0.8190 | Time: 33.4s

Epoch 7/10


Train: 100%|██████████| 84/84 [00:27<00:00,  3.09it/s]
Valid: 100%|██████████| 14/14 [00:06<00:00,  2.24it/s]


Train Loss: 0.4096 Acc: 0.8204 | Val Loss: 0.4183 Acc: 0.8190 | Time: 33.4s

Epoch 8/10


Train: 100%|██████████| 84/84 [00:26<00:00,  3.22it/s]
Valid: 100%|██████████| 14/14 [00:07<00:00,  1.84it/s]


Train Loss: 0.4059 Acc: 0.8193 | Val Loss: 0.4485 Acc: 0.8190 | Time: 33.7s

Epoch 9/10


Train: 100%|██████████| 84/84 [00:26<00:00,  3.16it/s]
Valid: 100%|██████████| 14/14 [00:06<00:00,  2.17it/s]


Train Loss: 0.4102 Acc: 0.8140 | Val Loss: 0.4227 Acc: 0.8167 | Time: 33.1s

Epoch 10/10


Train: 100%|██████████| 84/84 [00:26<00:00,  3.21it/s]
Valid: 100%|██████████| 14/14 [00:06<00:00,  2.12it/s]

Train Loss: 0.4063 Acc: 0.8118 | Val Loss: 0.4293 Acc: 0.8235 | Time: 32.8s
Best Validation Accuracy: 0.8235





# **Test Accuracy**

In [16]:
model.eval()
test_corrects, total = 0, 0
with torch.no_grad():
    for inputs, labels in test_loader:
        inputs, labels = inputs.to(device), labels.to(device)
        outputs = model(inputs)
        _, preds = torch.max(outputs, 1)
        test_corrects += torch.sum(preds == labels.data)
        total += labels.size(0)

print(f"Test Accuracy: {test_corrects.double() / total:.4f}")


Test Accuracy: 0.8419


# **STEP 3: model setup (transfer learning)**



#### Resnet

In [17]:
import torch.nn as nn
import torchvision.models as models

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

num_classes = len(train_ds.classes)  # should be 2
print("num_classes:", num_classes)

model = models.resnet18(pretrained=True)   # fast and small
# Option A: freeze feature layers for quick training (uncomment if desired)
for param in model.parameters():
    param.requires_grad = False

# Replace final fully connected layer
in_features = model.fc.in_features
model.fc = nn.Linear(in_features, num_classes)

# Only parameters of final layer will be updated (if above freeze used)
model = model.to(device)
print(model)

num_classes: 2
Downloading: "https://download.pytorch.org/models/resnet18-f37072fd.pth" to C:\Users\HAI/.cache\torch\hub\checkpoints\resnet18-f37072fd.pth


100%|██████████| 44.7M/44.7M [00:03<00:00, 13.5MB/s]


ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (1): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
  

In [18]:
import torch.optim as optim

criterion = nn.CrossEntropyLoss()
# only params with requires_grad=True are updated
optimizer = optim.Adam(filter(lambda p: p.requires_grad, model.parameters()), lr=1e-4)
scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=7, gamma=0.1)


In [21]:
# STEP 4: training loop
import time
from copy import deepcopy
from tqdm import tqdm


def train_model(model, train_loader, valid_loader, criterion, optimizer, scheduler, device, num_epochs=10):
    best_model_wts = deepcopy(model.state_dict())
    best_acc = 0.0

    for epoch in range(num_epochs):
        print(f"Epoch {epoch+1}/{num_epochs}")
        t0 = time.time()

        # Training phase
        model.train()
        running_loss = 0.0
        running_corrects = 0

        for inputs, labels in tqdm(train_loader, desc="Train", leave=False):
            inputs = inputs.to(device, non_blocking=True)
            labels = labels.to(device, non_blocking=True)

            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            preds = outputs.argmax(dim=1)
            running_loss += loss.item() * inputs.size(0)
            running_corrects += torch.sum(preds == labels.data).item()

        epoch_loss = running_loss / len(train_loader.dataset)
        epoch_acc  = running_corrects / len(train_loader.dataset)

        # Validation phase
        model.eval()
        val_running_loss = 0.0
        val_running_corrects = 0

        with torch.no_grad():
            for inputs, labels in tqdm(valid_loader, desc="Valid", leave=False):
                inputs = inputs.to(device, non_blocking=True)
                labels = labels.to(device, non_blocking=True)
                outputs = model(inputs)
                loss = criterion(outputs, labels)
                preds = outputs.argmax(dim=1)
                val_running_loss += loss.item() * inputs.size(0)
                val_running_corrects += torch.sum(preds == labels.data).item()

        val_loss = val_running_loss / len(valid_loader.dataset)
        val_acc  = val_running_corrects / len(valid_loader.dataset)

        print(f"train_loss: {epoch_loss:.4f} train_acc: {epoch_acc:.4f} | val_loss: {val_loss:.4f} val_acc: {val_acc:.4f} | time: {time.time()-t0:.1f}s")

        # deep copy best
        if val_acc > best_acc:
            best_acc = val_acc
            best_model_wts = deepcopy(model.state_dict())
            torch.save(best_model_wts, "best_model.pth")
            print("Saved best_model.pth")

        scheduler.step()

    print(f"Best val acc: {best_acc:.4f}")
    model.load_state_dict(best_model_wts)
    return model

# Example: train for 8 epochs (change as needed)
model = train_model(model, train_loader, valid_loader, criterion, optimizer, scheduler, device, num_epochs=8)


Epoch 1/8



[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A

train_loss: 0.5585 train_acc: 0.7145 | val_loss: 0.4342 val_acc: 0.8326 | time: 21.1s
Saved best_model.pth
Epoch 2/8



[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A

train_loss: 0.3886 train_acc: 0.8730 | val_loss: 0.3194 val_acc: 0.8959 | time: 21.2s
Saved best_model.pth
Epoch 3/8



[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A

train_loss: 0.3102 train_acc: 0.8986 | val_loss: 0.2593 val_acc: 0.9276 | time: 21.7s
Saved best_model.pth
Epoch 4/8



[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A

train_loss: 0.2553 train_acc: 0.9245 | val_loss: 0.2242 val_acc: 0.9344 | time: 21.3s
Saved best_model.pth
Epoch 5/8



[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A

train_loss: 0.2345 train_acc: 0.9275 | val_loss: 0.2028 val_acc: 0.9412 | time: 21.0s
Saved best_model.pth
Epoch 6/8



[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A

train_loss: 0.2123 train_acc: 0.9301 | val_loss: 0.1881 val_acc: 0.9434 | time: 21.7s
Saved best_model.pth
Epoch 7/8



[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A

train_loss: 0.1961 train_acc: 0.9384 | val_loss: 0.1749 val_acc: 0.9502 | time: 21.7s
Saved best_model.pth
Epoch 8/8



[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A

train_loss: 0.1970 train_acc: 0.9354 | val_loss: 0.1726 val_acc: 0.9412 | time: 21.9s
Best val acc: 0.9502


In [22]:
# STEP 5: evaluate on test set
import numpy as np
model.eval()
correct = 0
total = 0
all_preds, all_labels = [], []

with torch.no_grad():
    for inputs, labels in test_loader:
        inputs = inputs.to(device, non_blocking=True)
        labels = labels.to(device, non_blocking=True)
        outputs = model(inputs)
        preds = outputs.argmax(dim=1)
        correct += (preds == labels).sum().item()
        total += labels.size(0)
        all_preds.extend(preds.cpu().numpy().tolist())
        all_labels.extend(labels.cpu().numpy().tolist())

print("Test accuracy:", correct / total)

# Optional: confusion matrix using sklearn
try:
    from sklearn.metrics import classification_report, confusion_matrix
    print(classification_report(all_labels, all_preds, target_names=train_ds.classes))
    print("Confusion matrix:")
    print(confusion_matrix(all_labels, all_preds))
except Exception as e:
    print("sklearn not installed or error:", e)


Test accuracy: 0.9534883720930233


Exception ignored in: <function tqdm.__del__ at 0x000001E513366050>
Traceback (most recent call last):
  File "c:\Users\HAI\AppData\Local\Programs\Python\Python310\lib\site-packages\tqdm\std.py", line 1148, in __del__
    self.close()
  File "c:\Users\HAI\AppData\Local\Programs\Python\Python310\lib\site-packages\tqdm\notebook.py", line 279, in close
    self.disp(bar_style='danger', check_delay=False)
AttributeError: 'tqdm_notebook' object has no attribute 'disp'


              precision    recall  f1-score   support

        bird       0.97      0.95      0.96       121
       drone       0.94      0.96      0.95        94

    accuracy                           0.95       215
   macro avg       0.95      0.95      0.95       215
weighted avg       0.95      0.95      0.95       215

Confusion matrix:
[[115   6]
 [  4  90]]
