In [2]:
pip install efficientnet-pytorch





In [3]:
import torch
import torchvision
from torchvision import transforms, models
from torch.utils.data import Dataset, DataLoader
from PIL import Image
import os
from efficientnet_pytorch import EfficientNet
from torchvision.models.detection import FasterRCNN
import time
from sklearn.metrics import precision_recall_curve, average_precision_score
import numpy as np


In [14]:
import os
import torch
from torch.utils.data import Dataset, DataLoader
from PIL import Image
from torchvision import transforms
import pandas as pd

class RoadSignDataset(Dataset):
    def __init__(self, image_dir, annotation_file, transform=None):
        """
        Args:
            image_dir (str): Path to the directory containing the images.
            annotation_file (str): Path to the annotation file (gt.txt).
            transform (callable, optional): Optional transform to be applied on an image.
        """
        self.image_dir = image_dir
        self.annotation_file = annotation_file
        self.transform = transform
        
        # Load the annotations from the gt.txt file
        self.annotations = self.load_annotations()
    
    def load_annotations(self):
        """
        Load the annotations from the `gt.txt` file.
        Assumes the format: image_name;xmin;ymin;xmax;ymax;class_label
        """
        annotations = pd.read_csv(self.annotation_file, header=None, delimiter=';',
                                names=['image_name', 'xmin', 'ymin', 'xmax', 'ymax', 'class_label'])
        return annotations
    
    def __len__(self):
        return len(self.annotations)
    
    def __getitem__(self, idx):
        # Get the annotation for this index
        annotation = self.annotations.iloc[idx]
        
        # Load the image
        image_path = os.path.join(self.image_dir, annotation['image_name'])
        image = Image.open(image_path).convert("RGB")  # Open image as RGB
        
        # Get the bounding box coordinates and label
        boxes = torch.tensor([[
            float(annotation['xmin']),
            float(annotation['ymin']),
            float(annotation['xmax']),
            float(annotation['ymax'])
        ]], dtype=torch.float32)
        
        # Calculate area
        area = (boxes[:, 2] - boxes[:, 0]) * (boxes[:, 3] - boxes[:, 1])
        
        # Prepare the target dictionary
        target = {
            'boxes': boxes,
            'labels': torch.tensor([int(annotation['class_label'])], dtype=torch.int64),
            'image_id': torch.tensor([idx]),
            'area': area,
            'iscrowd': torch.tensor([0], dtype=torch.int64)
        }
        
        if self.transform:
            image = self.transform(image)
        
        return image, target

def collate_fn(batch):
    """
    Custom collate function for the DataLoader to properly batch the images and targets
    """
    images = []
    targets = []
    for img, tgt in batch:
        images.append(img)
        targets.append(tgt)
    return images, targets

# Define the transforms
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

# Create dataset instances
train_dataset = RoadSignDataset(
    image_dir='D:/Work/DL/A3/ppm_images/train/images',
    annotation_file='D:/Work/DL/A3/ppm_images/train/gt_train.txt',
    transform=transform
)

val_dataset = RoadSignDataset(
    image_dir='D:/Work/DL/A3/ppm_images/validate/images',
    annotation_file='D:/Work/DL/A3/ppm_images/validate/gt_validate.txt',
    transform=transform
)

# Create DataLoaders with the custom collate_fn
train_loader = DataLoader(
    train_dataset,
    batch_size=16,
    shuffle=True,
    collate_fn=collate_fn  # Add this line
)

val_loader = DataLoader(
    val_dataset,
    batch_size=16,
    shuffle=False,
    collate_fn=collate_fn  # Add this line
)

In [None]:
import torch
import torch.nn as nn
from torchvision.models import mobilenet_v3_small
from torchvision.models.detection import FasterRCNN
from torchvision.models.detection.rpn import AnchorGenerator
from collections import OrderedDict
import gc

class MobileNetBackbone(nn.Module):
    def __init__(self, pretrained=True):
        super().__init__()
        # Load pretrained MobileNetV3 Small (much lighter than EfficientNet)
        mobile_net = mobilenet_v3_small(pretrained=pretrained)
        
        # Use features up to the last layer
        self.features = mobile_net.features
        
        # MobileNetV3 Small output channels
        self.out_channels = 576
        
    def forward(self, x):
        x = self.features(x)
        return OrderedDict([('0', x)])

def create_model(num_classes, pretrained=True):
    # Create backbone
    backbone = MobileNetBackbone(pretrained=pretrained)
    
    # Create anchor generator with reduced anchors
    anchor_generator = AnchorGenerator(
        sizes=((32, 64, 128),),  # Reduced number of anchor sizes
        aspect_ratios=((0.5, 1.0, 2.0),)
    )
    
    
    from torchvision.ops import MultiScaleRoIAlign
    roi_pooler = MultiScaleRoIAlign(
        featmap_names=['0'],
        output_size=5,  
        sampling_ratio=2
    )
    
    
    model = FasterRCNN(
        backbone=backbone,
        num_classes=num_classes,
        rpn_anchor_generator=anchor_generator,
        box_roi_pool=roi_pooler,
        
        min_size=300,  
        max_size=500,
        rpn_pre_nms_top_n_train=500,   
        rpn_pre_nms_top_n_test=250,    
        rpn_post_nms_top_n_train=250,  
        rpn_post_nms_top_n_test=125,   
        rpn_batch_size_per_image=64,   
        batch_size_per_image=32,       
        rpn_score_thresh=0.05
    )
    
    return model

# Memory optimization helper
def clear_memory():
    gc.collect()
    torch.cuda.empty_cache()

# Create transforms with smaller image size
transform = transforms.Compose([
    transforms.Resize((300, 300)),  # Smaller size
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

# Create the model
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
num_classes = 44  # Adjust based on your dataset
model = create_model(num_classes=num_classes, pretrained=True)
model.to(device)

# Enable mixed precision training
scaler = torch.cuda.amp.GradScaler()

# Use a memory-efficient optimizer
optimizer = torch.optim.AdamW(
    model.parameters(),
    lr=0.0001,
    weight_decay=0.0001
)

# Learning rate scheduler
lr_scheduler = torch.optim.lr_scheduler.StepLR(
    optimizer,
    step_size=3,
    gamma=0.1
)

# Update DataLoader with smaller batch size
train_loader = DataLoader(
    train_dataset,
    batch_size=4,  # Can try larger batch size due to smaller model
    shuffle=True,
    collate_fn=collate_fn,
    num_workers=0,
    pin_memory=True
)

val_loader = DataLoader(
    val_dataset,
    batch_size=4,
    shuffle=False,
    collate_fn=collate_fn,
    num_workers=0,
    pin_memory=True
)

  scaler = torch.cuda.amp.GradScaler()


In [None]:
import torch

def train_one_epoch(model, optimizer, data_loader, device, epoch, scaler):
    """
    Train model for one epoch with mixed precision.
    
    Args:
        model: The neural network model
        optimizer: The optimizer
        data_loader: DataLoader for training data
        device: Device to train on (cuda/cpu)
        epoch: Current epoch number
        scaler: GradScaler for mixed precision training
    
    Returns:
        float: Average loss for this epoch
    """
    model.train()
    total_loss = 0
    
    print(f"Epoch {epoch}")
    
    for i, (images, targets) in enumerate(data_loader):
       
        images = [image.to(device) for image in images]
        targets = [{k: v.to(device) for k, v in t.items()} for t in targets]
        
        
        with torch.amp.autocast('cuda'):
            loss_dict = model(images, targets)
            losses = sum(loss for loss in loss_dict.values())
        
       
        optimizer.zero_grad()
        scaler.scale(losses).backward()
        scaler.step(optimizer)
        scaler.update()
        
        
        total_loss += losses.item()
        
        
        if i % 10 == 0:
            avg_loss = total_loss / (i + 1)
            print(f"Iteration: {i}, Average Loss: {avg_loss:.4f}")
        
       
        del images, targets, losses, loss_dict
        torch.cuda.empty_cache()
    
    
    avg_loss = total_loss / len(data_loader)
    print(f"Epoch {epoch} finished. Average Loss: {avg_loss:.4f}")
    
    return avg_loss

In [None]:

scaler = torch.amp.GradScaler()  
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')


num_epochs = 10
for epoch in range(1, num_epochs + 1):
    avg_loss = train_one_epoch(
        model=model,
        optimizer=optimizer,
        data_loader=train_loader,
        device=device,
        epoch=epoch,
        scaler=scaler
    )


Epoch 1
Iteration: 0, Average Loss: 31.9767
Iteration: 10, Average Loss: 26.8436
Iteration: 20, Average Loss: 26.5034
Iteration: 30, Average Loss: 24.9370
Iteration: 40, Average Loss: 23.7827
Iteration: 50, Average Loss: 22.7474
Iteration: 60, Average Loss: 21.7532
Iteration: 70, Average Loss: 21.1665
Iteration: 80, Average Loss: 20.5426
Iteration: 90, Average Loss: 19.8081
Iteration: 100, Average Loss: 19.3473
Iteration: 110, Average Loss: 18.8817
Iteration: 120, Average Loss: 18.4342
Iteration: 130, Average Loss: 18.2189
Iteration: 140, Average Loss: 17.9442
Iteration: 150, Average Loss: 17.7413
Iteration: 160, Average Loss: 17.4693
Iteration: 170, Average Loss: 17.2666
Epoch 1 finished. Average Loss: 17.2666
Epoch 2
Iteration: 0, Average Loss: 9.6163
Iteration: 10, Average Loss: 13.2426
Iteration: 20, Average Loss: 13.3852
Iteration: 30, Average Loss: 13.2017
Iteration: 40, Average Loss: 13.1833
Iteration: 50, Average Loss: 13.3159
Iteration: 60, Average Loss: 13.3343
Iteration: 70,

In [None]:

torch.save(model.state_dict(), 'model.pth')


In [None]:

torch.save(model, 'model_full.pth')
