In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset
from torchvision import transforms, models
import numpy as np
import matplotlib.pyplot as plt
from PIL import Image
import os
import glob
import torchvision.ops as ops

# Set device
#device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
device = torch.device('cpu')


# Dataset class
class GunsDataset(Dataset):
    def __init__(self, image_paths, labels, transform=None):
        self.image_paths = image_paths
        self.labels = labels
        self.transform = transform

    def __len__(self):
        return len(self.image_paths)

    def __getitem__(self, idx):
        image = Image.open(self.image_paths[idx]).convert('RGB')
        label = self.labels[idx]
        if self.transform:
            image = self.transform(image)
        return image, label

# Load dataset
def load_dataset(data_path):
    image_paths = glob.glob(os.path.join(data_path, 'Images', '*.jpeg'))
    labels = []
    for image_path in image_paths:
        label_path = image_path.replace('Images', 'Labels').replace('.jpeg', '.txt')
        with open(label_path, 'r') as file:
            label_lines = file.readlines()
            num_objects = int(label_lines[0].strip())
            bboxes = []
            for line in label_lines[1:]:
                coords = list(map(int, line.strip().split()))
                bboxes.append(coords)
            # Ensure num_objects is 0 or 1
            num_objects = 1 if num_objects > 0 else 0
            labels.append((num_objects, bboxes))
    return image_paths, labels

# Split dataset
def split_dataset(image_paths, labels, train_ratio=0.7, val_ratio=0.2):
    dataset_size = len(image_paths)
    indices = list(range(dataset_size))
    np.random.shuffle(indices)
    
    train_split = int(np.floor(train_ratio * dataset_size))
    val_split = int(np.floor((train_ratio + val_ratio) * dataset_size))
    
    train_indices = indices[:train_split]
    val_indices = indices[train_split:val_split]
    test_indices = indices[val_split:]
    
    train_paths = [image_paths[i] for i in train_indices]
    val_paths = [image_paths[i] for i in val_indices]
    test_paths = [image_paths[i] for i in test_indices]
    
    train_labels = [labels[i] for i in train_indices]
    val_labels = [labels[i] for i in val_indices]
    test_labels = [labels[i] for i in test_indices]
    
    return train_paths, train_labels, val_paths, val_labels, test_paths, test_labels

# Transformations
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor()
])

# Custom collate function
def collate_fn(batch):
    images, targets = zip(*batch)
    num_objects = torch.tensor([t[0] for t in targets])
    bboxes = [torch.tensor(t[1]) for t in targets]
    images = torch.stack(images, dim=0)
    return images, (num_objects, bboxes)

# Initialize dataset and dataloader
data_path = '.'  # Assuming the current directory
image_paths, labels = load_dataset(data_path)
train_paths, train_labels, val_paths, val_labels, test_paths, test_labels = split_dataset(image_paths, labels)
train_dataset = GunsDataset(train_paths, train_labels, transform=transform)
train_loader = DataLoader(train_dataset, batch_size=4, shuffle=True, collate_fn=collate_fn)  # Smaller batch size
val_dataset = GunsDataset(val_paths, val_labels, transform=transform)
val_loader = DataLoader(val_dataset, batch_size=4, shuffle=False, collate_fn=collate_fn)

# Define model (Part 1: Overfeat)
class Overfeat(nn.Module):
    def __init__(self, backbone='alexnet'):
        super(Overfeat, self).__init__()
        if backbone == 'alexnet':
            self.features = models.alexnet(weights=models.AlexNet_Weights.IMAGENET1K_V1).features[:5]
            self.fc_input_size = self._get_fc_input_size()
        elif backbone == 'efficientnet':
            self.features = models.efficientnet_b0(weights=models.EfficientNet_B0_Weights.IMAGENET1K_V1).features[:5]
            self.fc_input_size = self._get_fc_input_size()
        self.classifier = nn.Sequential(
            nn.Linear(self.fc_input_size, 4096),
            nn.ReLU(),
            nn.Dropout(),
            nn.Linear(4096, 2)  # For binary classification
        )
        self.regressor = nn.Sequential(
            nn.Linear(self.fc_input_size, 4096),
            nn.ReLU(),
            nn.Dropout(),
            nn.Linear(4096, 4)  # For bounding box coordinates
        )

    def _get_fc_input_size(self):
        # Create a dummy input with the same size as the input images
        dummy_input = torch.zeros(1, 3, 224, 224)
        features = self.features(dummy_input)
        return features.view(features.size(0), -1).size(1)

    def forward(self, x):
        x = self.features(x)
        x = x.view(x.size(0), -1)
        cls = self.classifier(x)
        bbox = self.regressor(x)
        return cls, bbox

# NMS function
def nms(boxes, scores, iou_threshold=0.5):
    indices = ops.nms(boxes, scores, iou_threshold)
    return indices

# Loss function
def compute_loss(cls_predictions, cls_targets, bbox_predictions, bbox_targets):
    # Ensure the target values are within range
    print(f"cls_targets: {cls_targets}")  # Debugging target values
    assert cls_targets.min() >= 0 and cls_targets.max() < 2, "Targets out of range for binary classification"
    
    classification_loss = nn.CrossEntropyLoss()(cls_predictions, cls_targets)
    bbox_loss = 0
    if bbox_targets is not None:  # Compute regression loss only if ground truth boxes are available
        for pred, target in zip(bbox_predictions, bbox_targets):
            min_len = min(pred.size(0), target.size(0))
            bbox_loss += nn.MSELoss()(pred[:min_len], target[:min_len])
    total_loss = classification_loss + bbox_loss
    return total_loss

# Training loop
def train_model(model, dataloader, optimizer, num_epochs=25):
    for epoch in range(num_epochs):
        model.train()
        running_loss = 0.0
        for inputs, (num_objects, bboxes) in dataloader:
            inputs = inputs.to(device)
            num_objects = num_objects.to(device)
            bboxes = [bbox.to(device) for bbox in bboxes]
            
            optimizer.zero_grad()
            cls_predictions, bbox_predictions = model(inputs)
            
            # Apply NMS to bbox_predictions
            bbox_predictions_nms = []
            for i in range(cls_predictions.size(0)):
                scores = cls_predictions[i].softmax(dim=0)[1]  # Assuming class 1 is the relevant class
                boxes = bbox_predictions[i].view(-1, 4)
                scores = scores.view(-1)  # Ensure scores is 1D tensor
                print(f"boxes shape: {boxes.shape}, scores shape: {scores.shape}")  # Debugging shape before NMS
                if boxes.size(0) == scores.size(0):  # Ensure the number of boxes matches the number of scores
                    indices = nms(boxes, scores)
                    bbox_predictions_nms.append(boxes[indices])
                else:
                    bbox_predictions_nms.append(torch.zeros((0, 4)).to(device))
            
            # Ensure bbox_predictions and bbox_targets are the same size after NMS
            max_boxes = max([bbox.size(0) for bbox in bboxes])
            bbox_predictions_padded = []
            bbox_targets_padded = []
            for pred, target in zip(bbox_predictions_nms, bboxes):
                padded_pred = torch.zeros(max_boxes, 4).to(device)
                padded_target = torch.zeros(max_boxes, 4).to(device)
                padded_pred[:pred.size(0), :] = pred
                padded_target[:target.size(0), :] = target
                bbox_predictions_padded.append(padded_pred)
                bbox_targets_padded.append(padded_target)
                
            bbox_predictions_padded = torch.stack(bbox_predictions_padded)
            bbox_targets_padded = torch.stack(bbox_targets_padded)
            
            loss = compute_loss(cls_predictions, num_objects, bbox_predictions_padded, bbox_targets_padded)
            loss.backward()
            optimizer.step()
            
            running_loss += loss.item() * inputs.size(0)
        epoch_loss = running_loss / len(dataloader.dataset)
        print(f'Epoch {epoch}/{num_epochs - 1}, Loss: {epoch_loss:.4f}')
    return model

# Evaluation and comparison
def evaluate_model(model, dataloader):
    model.eval()
    total = 0
    correct = 0
    with torch.no_grad():
        for inputs, (num_objects, bboxes) in dataloader:
            inputs = inputs.to(device)
            num_objects = num_objects.to(device)
            bboxes = [bbox.to(device) for bbox in bboxes]
            
            cls_predictions, bbox_predictions = model(inputs)
            
            # Apply NMS to bbox_predictions
            bbox_predictions_nms = []
            for i in range(cls_predictions.size(0)):
                scores = cls_predictions[i].softmax(dim=0)[1]  # Assuming class 1 is the relevant class
                boxes = bbox_predictions[i].view(-1, 4)
                scores = scores.view(-1)  # Ensure scores is 1D tensor
                if boxes.size(0) == scores.size(0):  # Ensure the number of boxes matches the number of scores
                    indices = nms(boxes, scores)
                    bbox_predictions_nms.append(boxes[indices])
                else:
                    bbox_predictions_nms.append(torch.zeros((0, 4)).to(device))
            
            # Ensure bbox_predictions and bbox_targets are the same size after NMS
            max_boxes = max([bbox.size(0) for bbox in bboxes])
            bbox_predictions_padded = []
            bbox_targets_padded = []
            for pred, target in zip(bbox_predictions_nms, bboxes):
                padded_pred = torch.zeros(max_boxes, 4).to(device)
                padded_target = torch.zeros(max_boxes, 4).to(device)
                padded_pred[:pred.size(0), :] = pred
                padded_target[:target.size(0), :] = target
                bbox_predictions_padded.append(padded_pred)
                bbox_targets_padded.append(padded_target)
                
            bbox_predictions_padded = torch.stack(bbox_predictions_padded)
            bbox_targets_padded = torch.stack(bbox_targets_padded)
            
            _, predicted = torch.max(cls_predictions.data, 1)
            total += num_objects.size(0)
            correct += (predicted == num_objects).sum().item()
    accuracy = correct / total
    return accuracy

# Clear GPU cache
torch.cuda.empty_cache()

# Initialize model, optimizer
model = Overfeat().to(device)
optimizer = optim.SGD(model.parameters(), lr=0.001, momentum=0.9)

# Train the model
trained_model = train_model(model, train_loader, optimizer, num_epochs=25)

# Evaluate models
overfeat_accuracy = evaluate_model(trained_model, val_loader)
print(f'Overfeat Model Accuracy: {overfeat_accuracy:.4f}')


boxes shape: torch.Size([1, 4]), scores shape: torch.Size([1])
boxes shape: torch.Size([1, 4]), scores shape: torch.Size([1])
boxes shape: torch.Size([1, 4]), scores shape: torch.Size([1])
boxes shape: torch.Size([1, 4]), scores shape: torch.Size([1])
cls_targets: tensor([1, 1, 1, 1])
boxes shape: torch.Size([1, 4]), scores shape: torch.Size([1])
boxes shape: torch.Size([1, 4]), scores shape: torch.Size([1])
boxes shape: torch.Size([1, 4]), scores shape: torch.Size([1])
boxes shape: torch.Size([1, 4]), scores shape: torch.Size([1])
cls_targets: tensor([1, 1, 1, 1])
boxes shape: torch.Size([1, 4]), scores shape: torch.Size([1])
boxes shape: torch.Size([1, 4]), scores shape: torch.Size([1])
boxes shape: torch.Size([1, 4]), scores shape: torch.Size([1])
boxes shape: torch.Size([1, 4]), scores shape: torch.Size([1])
cls_targets: tensor([1, 1, 1, 1])
boxes shape: torch.Size([1, 4]), scores shape: torch.Size([1])
boxes shape: torch.Size([1, 4]), scores shape: torch.Size([1])
boxes shape: tor

In [3]:
# Evaluation with Confusion Matrix and Example Predictions
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay

def evaluate_model_with_confusion_matrix(model, dataloader):
    model.eval()
    y_true = []
    y_pred = []
    with torch.no_grad():
        for inputs, (num_objects, bboxes) in dataloader:
            inputs = inputs.to(device)
            num_objects = num_objects.to(device)
            bboxes = [bbox.to(device) for bbox in bboxes]
            
            cls_predictions, bbox_predictions = model(inputs)
            
            # Apply NMS to bbox_predictions
            bbox_predictions_nms = []
            for i in range(cls_predictions.size(0)):
                scores = cls_predictions[i].softmax(dim=1)[:, 1]  # Assuming class 1 is the relevant class
                boxes = bbox_predictions[i].view(-1, 4)
                scores = scores.view(-1)  # Ensure scores is 1D tensor
                print(f"boxes shape: {boxes.shape}, scores shape: {scores.shape}")  # Debugging shape before NMS
                if boxes.size(0) == scores.size(0):  # Ensure the number of boxes matches the number of scores
                    indices = nms(boxes, scores)
                    bbox_predictions_nms.append(boxes[indices])
                else:
                    bbox_predictions_nms.append(torch.zeros((0, 4)).to(device))
            
            # Ensure bbox_predictions and bbox_targets are the same size after NMS
            max_boxes = max([bbox.size(0) for bbox in bboxes])
            bbox_predictions_padded = []
            bbox_targets_padded = []
            for pred, target in zip(bbox_predictions_nms, bboxes):
                padded_pred = torch.zeros(max_boxes, 4).to(device)
                padded_target = torch.zeros(max_boxes, 4).to(device)
                padded_pred[:pred.size(0), :] = pred
                padded_target[:target.size(0), :] = target
                bbox_predictions_padded.append(padded_pred)
                bbox_targets_padded.append(padded_target)
                
            bbox_predictions_padded = torch.stack(bbox_predictions_padded)
            bbox_targets_padded = torch.stack(bbox_targets_padded)
            
            _, predicted = torch.max(cls_predictions.data, 1)
            y_true.extend(num_objects.cpu().numpy())
            y_pred.extend(predicted.cpu().numpy())
    
    accuracy = np.sum(np.array(y_true) == np.array(y_pred)) / len(y_true)
    cm = confusion_matrix(y_true, y_pred)
    disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=['Background', 'Gun'])
    disp.plot()
    plt.show()
    
    return accuracy

overfeat_accuracy = evaluate_model_with_confusion_matrix(trained_model, val_loader)
print(f'Overfeat Model Accuracy with Confusion Matrix: {overfeat_accuracy:.4f}')

def show_predictions(model, dataloader, num_examples=5):
    model.eval()
    with torch.no_grad():
        for i, (inputs, (num_objects, bboxes)) in enumerate(dataloader):
            if i >= num_examples:
                break
            inputs = inputs.to(device)
            outputs = model(inputs)
            
            plt.figure(figsize=(10, 10))
            for j in range(inputs.size(0)):
                img = inputs[j].cpu().permute(1, 2, 0).numpy()
                plt.subplot(1, num_examples, j+1)
                plt.imshow(img)
                plt.axis('off')
                
                for box in outputs[j]['boxes']:
                    x1, y1, x2, y2 = box.cpu().numpy()
                    plt.gca().add_patch(plt.Rectangle((x1, y1), x2-x1, y2-y1, edgecolor='red', facecolor='none', linewidth=2))
                    
            plt.show()

show_predictions(trained_model, val_loader, num_examples=5)

IndexError: Dimension out of range (expected to be in range of [-1, 0], but got 1)

In [6]:
import torch
import torch.optim as optim
from ultralytics import YOLO

# Set device
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Initialize YOLOv3-tiny model
model = YOLO('yolov3-tiny.yaml').to(device)

# Fine-tune YOLO model
def fine_tune_yolo(model, dataloader, optimizer, num_epochs=25):
    model.train()
    for epoch in range(num_epochs):
        running_loss = 0.0
        for inputs, (num_objects, bboxes) in dataloader:
            inputs = inputs.to(device)
            targets = []
            for i in range(len(num_objects)):
                target = {}
                target['boxes'] = bboxes[i].to(device).float()  # Ensure the boxes are float
                target['labels'] = torch.ones(num_objects[i], dtype=torch.int64).to(device)
                targets.append(target)
            
            optimizer.zero_grad()
            loss_dict = model(inputs, targets)
            losses = sum(loss for loss in loss_dict.values())
            losses.backward()
            optimizer.step()
            
            running_loss += losses.item() * inputs.size(0)
        epoch_loss = running_loss / len(dataloader.dataset)
        print(f'Epoch {epoch}/{num_epochs - 1}, Loss: {epoch_loss:.4f}')
    return model

# Define optimizer for YOLO
optimizer_yolo = optim.SGD(model.parameters(), lr=0.001, momentum=0.9)

# Train YOLO model
trained_yolo = fine_tune_yolo(model, train_loader, optimizer_yolo, num_epochs=25)

# Save the model
torch.save(trained_yolo.state_dict(), 'yolo_model.pth')


TypeError: Detect.__init__() takes from 1 to 3 positional arguments but 4 were given

In [None]:
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay

def evaluate_yolo(model, dataloader):
    model.eval()
    y_true = []
    y_pred = []
    with torch.no_grad():
        for inputs, (num_objects, bboxes) in dataloader:
            inputs = inputs.to(device)
            targets = []
            for i in range(len(num_objects)):
                target = {}
                target['boxes'] = bboxes[i].to(device).float()  # Ensure the boxes are float
                target['labels'] = torch.ones(num_objects[i], dtype=torch.int64).to(device)
                targets.append(target)
            
            outputs = model(inputs)
            
            for i in range(len(outputs)):
                pred_boxes = outputs[i]['boxes']
                pred_labels = outputs[i]['labels']
                true_boxes = targets[i]['boxes']
                true_labels = targets[i]['labels']
                
                # Assuming a single class for simplicity
                for true_label in true_labels:
                    y_true.append(true_label.item())
                    if pred_labels.numel() > 0:
                        y_pred.append(pred_labels[0].item())  # Assuming the first predicted label
                    else:
                        y_pred.append(0)  # Assuming 0 as the background class

    # Calculate and display confusion matrix
    cm = confusion_matrix(y_true, y_pred)
    disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=['Background', 'Gun'])
    disp.plot()
    plt.show()

    # Calculate accuracy
    accuracy = np.trace(cm) / np.sum(cm)
    return accuracy

# Evaluate models
yolo_accuracy = evaluate_yolo(trained_yolo, val_loader)
print(f'YOLO Model Accuracy: {yolo_accuracy:.4f}')


In [None]:
def show_predictions(model, dataloader, num_examples=5):
    model.eval()
    with torch.no_grad():
        for i, (inputs, (num_objects, bboxes)) in enumerate(dataloader):
            if i >= num_examples:
                break
            inputs = inputs.to(device)
            outputs = model(inputs)
            
            plt.figure(figsize=(10, 10))
            for j in range(inputs.size(0)):
                img = inputs[j].cpu().permute(1, 2, 0).numpy()
                plt.subplot(1, num_examples, j+1)
                plt.imshow(img)
                plt.axis('off')
                
                for box in outputs[j]['boxes']:
                    x1, y1, x2, y2 = box.cpu().numpy()
                    plt.gca().add_patch(plt.Rectangle((x1, y1), x2-x1, y2-y1, edgecolor='red', facecolor='none', linewidth=2))
                    
            plt.show()

# Show examples
show_predictions(trained_yolo, val_loader, num_examples=5)
