<a href="https://colab.research.google.com/github/Sithu0077/myproject123/blob/main/YOLOv8_Swin_COCO_DRDO_Project_with_Download_(1).ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# 🔍 DRDO Project: YOLOv8 + Swin Transformer for Detection & Classification

This notebook combines **YOLOv8** for object detection and **Swin Transformers** for classification using **COCO 2017**, an inbuilt PyTorch dataset.

In [1]:
# Step 1: Install Dependencies
!pip install -q ultralytics timm torchvision pycocotools

[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.0/1.0 MB[0m [31m18.7 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m363.4/363.4 MB[0m [31m3.7 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m13.8/13.8 MB[0m [31m25.4 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m24.6/24.6 MB[0m [31m39.1 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m883.7/883.7 kB[0m [31m34.4 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m664.8/664.8 MB[0m [31m2.8 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m211.5/211.5 MB[0m [31m5.6 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m56.3/56.3 MB[0m [31m13.1 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━

In [2]:
# Step 2: Import Libraries
from ultralytics import YOLO
import timm
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import transforms
from torch.utils.data import DataLoader
from pycocotools.coco import COCO

Creating new Ultralytics Settings v0.0.6 file ✅ 
View Ultralytics Settings with 'yolo settings' or at '/root/.config/Ultralytics/settings.json'
Update Settings with 'yolo settings key=value', i.e. 'yolo settings runs_dir=path/to/dir'. For help see https://docs.ultralytics.com/quickstart/#ultralytics-settings.


In [3]:
# Download COCO 2017 Dataset (Optional: ~20GB total)
!mkdir -p coco && cd coco && \
wget http://images.cocodataset.org/zips/train2017.zip && unzip -q train2017.zip && \
wget http://images.cocodataset.org/zips/val2017.zip && unzip -q val2017.zip && \
wget http://images.cocodataset.org/annotations/annotations_trainval2017.zip && unzip -q annotations_trainval2017.zip


--2025-05-16 09:14:31--  http://images.cocodataset.org/zips/train2017.zip
Resolving images.cocodataset.org (images.cocodataset.org)... 52.216.89.204, 52.216.10.3, 52.217.141.105, ...
Connecting to images.cocodataset.org (images.cocodataset.org)|52.216.89.204|:80... connected.
HTTP request sent, awaiting response... 200 OK
Length: 19336861798 (18G) [application/zip]
Saving to: ‘train2017.zip’


2025-05-16 09:21:10 (46.2 MB/s) - ‘train2017.zip’ saved [19336861798/19336861798]

--2025-05-16 09:24:54--  http://images.cocodataset.org/zips/val2017.zip
Resolving images.cocodataset.org (images.cocodataset.org)... 52.216.49.105, 54.231.131.49, 52.216.28.204, ...
Connecting to images.cocodataset.org (images.cocodataset.org)|52.216.49.105|:80... connected.
HTTP request sent, awaiting response... 200 OK
Length: 815585330 (778M) [application/zip]
Saving to: ‘val2017.zip’


2025-05-16 09:25:08 (56.3 MB/s) - ‘val2017.zip’ saved [815585330/815585330]

--2025-05-16 09:25:20--  http://images.cocodataset

In [4]:
from pycocotools.coco import COCO
from torch.utils.data import Dataset
import os
from PIL import Image

# Custom COCO Dataset class
class CocoDataset(Dataset):
    def __init__(self, root, annFile, transform=None):
        self.root = root
        self.coco = COCO(annFile)
        self.ids = list(self.coco.imgs.keys())
        self.transform = transform

    def __getitem__(self, index):
        img_id = self.ids[index]
        ann_ids = self.coco.getAnnIds(imgIds=img_id)
        target = self.coco.loadAnns(ann_ids)
        path = self.coco.loadImgs(img_id)[0]['file_name']

        img = Image.open(os.path.join(self.root, path)).convert('RGB')
        if self.transform:
            img = self.transform(img)

        # For classification, use the first object's category
        label = target[0]['category_id'] if target else 0
        return img, label

    def __len__(self):
        return len(self.ids)

# Transforms
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
])

# Load datasets
train_dataset = CocoDataset(root='coco/train2017', annFile='coco/annotations/instances_train2017.json', transform=transform)
test_dataset = CocoDataset(root='coco/val2017', annFile='coco/annotations/instances_val2017.json', transform=transform)

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)


loading annotations into memory...
Done (t=17.63s)
creating index...
index created!
loading annotations into memory...
Done (t=0.48s)
creating index...
index created!


In [5]:
# Step 4: Load YOLOv8 Model
yolo_model = YOLO("yolov8m.pt")

Downloading https://github.com/ultralytics/assets/releases/download/v8.3.0/yolov8m.pt to 'yolov8m.pt'...


100%|██████████| 49.7M/49.7M [00:00<00:00, 311MB/s]


In [6]:
# Step 5: Load Pretrained Swin Transformer
swin_model = timm.create_model('swin_tiny_patch4_window7_224', pretrained=True, num_classes=91)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
swin_model = swin_model.to(device)

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


model.safetensors:   0%|          | 0.00/114M [00:00<?, ?B/s]

In [7]:
# Step 6: Train Swin Transformer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(swin_model.parameters(), lr=0.0001)

epochs = 1
for epoch in range(epochs):
    running_loss = 0.0
    for images, labels in train_loader:
        images, labels = images.to(device), labels.to(device)
        optimizer.zero_grad()
        outputs = swin_model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        running_loss += loss.item()
    print(f"Epoch {epoch+1}, Loss: {running_loss/len(train_loader):.4f}")

Epoch 1, Loss: 1.4640


In [8]:
# Step 7: Define Combined Inference
def combined_inference(image_tensor):
    pil_img = transforms.ToPILImage()(image_tensor)
    pil_img.save("temp.jpg")
    print("YOLOv8 Detection Results:")
    yolo_model("temp.jpg", show=True)
    image_tensor = image_tensor.unsqueeze(0).to(device)
    with torch.no_grad():
        outputs = swin_model(image_tensor)
        _, predicted = torch.max(outputs, 1)
    classes = [str(i) for i in range(91)]
    print(f"Swin Transformer Classification: {classes[predicted.item()]}")

In [9]:
# Step 8: Run Inference
img, label = test_dataset[4]
combined_inference(img)

YOLOv8 Detection Results:


image 1/1 /content/temp.jpg: 640x640 1 bicycle, 5 cars, 36.5ms
Speed: 45.6ms preprocess, 36.5ms inference, 253.9ms postprocess per image at shape (1, 3, 640, 640)
Swin Transformer Classification: 2


In [10]:
# Step 9: Save Model
torch.save(swin_model.state_dict(), 'swin_coco.pth')