In [15]:
import os

# Directories
image_dir = r"C:\Users\cheng\Documents\VSC\Higher_level_CV\Examination_project\Advanced_version_for_accuracy_faster_RCNN\annotated_images"
label_dir = r"C:\Users\cheng\Documents\VSC\Higher_level_CV\Examination_project\Advanced_version_for_accuracy_faster_RCNN\labeled_data"

# Initialize sets
image_extensions = ('.jpg', '.png', '.jpeg')
label_extension = '.txt'

# Collect filenames without extensions
image_files = {os.path.splitext(f)[0] for root, _, files in os.walk(image_dir) for f in files if f.endswith(image_extensions)}
label_files = {os.path.splitext(f)[0] for root, _, files in os.walk(label_dir) for f in files if f.endswith(label_extension)}

# Find matches and mismatches
matched_files = image_files & label_files  # Files present in both
unmatched_images = image_files - label_files  # Images without labels
unmatched_labels = label_files - image_files  # Labels without images

# Output results
print(f"Total images: {len(image_files)}")
print(f"Total annotations: {len(label_files)}")
print(f"Matched files: {len(matched_files)}")
print(f"Unmatched images (no annotations): {len(unmatched_images)}")
print(f"Unmatched labels (no images): {len(unmatched_labels)}")

# List examples of unmatched files (if any)
if unmatched_images:
    print("\nExamples of unmatched images:")
    print("\n".join(list(unmatched_images)[:5]))

if unmatched_labels:
    print("\nExamples of unmatched labels:")
    print("\n".join(list(unmatched_labels)[:5]))



Total images: 1984
Total annotations: 1984
Matched files: 1984
Unmatched images (no annotations): 0
Unmatched labels (no images): 0


In [2]:
import os
import json
from tqdm import tqdm
from PIL import Image

# Define paths
image_dir = r'C:\Users\cheng\Documents\VSC\Higher_level_CV\Examination_project\Advanced_version_for_accuracy_faster_RCNN\annotated_images'
output_json_file = r'C:\Users\cheng\Documents\VSC\Higher_level_CV\Examination_project\Advanced_version_for_accuracy_faster_RCNN\coco_format_data.json'

# Create COCO format dictionary
coco_data = {
    'images': [],
    'annotations': [],
    'categories': []
}

# Class mapping (each fruit)
class_mapping = {
    "Apple_Bad": 1,
    "Apple_Good": 2,
    "Banana_Bad": 3,
    "Banana_Good": 4,
    "Orange_Bad": 5,
    "Orange_Good": 6
}

# Initialize annotations list
annotation_id = 1
image_id = 1

# Loop over the classes and process the images
for class_folder, class_id in class_mapping.items():
    class_folder_path = os.path.join(image_dir, class_folder)
    
    if not os.path.isdir(class_folder_path):
        continue
    
    # Loop through the images of each class
    for img_file in tqdm(os.listdir(class_folder_path)):
        if img_file.lower().endswith(('jpg', 'jpeg', 'png')):
            # Get image path and label file path
            img_path = os.path.join(class_folder_path, img_file)
            label_path = os.path.splitext(img_path)[0] + '.txt'

            # Read the image
            img = Image.open(img_path)
            width, height = img.size
            
            # Add image information
            coco_data['images'].append({
                'id': image_id,
                'file_name': img_file,
                'width': width,
                'height': height
            })
            
            # Check if label file exists and read it
            if os.path.exists(label_path):
                with open(label_path, 'r') as label_file:
                    for line in label_file:
                        parts = line.strip().split()
                        if len(parts) == 5:
                            # Bounding box in the YOLO format (class_id, x_center, y_center, width, height)
                            class_id, x_center, y_center, box_width, box_height = map(float, parts)

                            # Convert to COCO bounding box format (x_min, y_min, width, height)
                            x_min = (x_center - box_width / 2) * width
                            y_min = (y_center - box_height / 2) * height
                            box_width = box_width * width
                            box_height = box_height * height

                            # Add annotation
                            coco_data['annotations'].append({
                                'id': annotation_id,
                                'image_id': image_id,
                                'category_id': class_id,
                                'bbox': [x_min, y_min, box_width, box_height],
                                'area': box_width * box_height,
                                'iscrowd': 0
                            })
                            annotation_id += 1

            # Increment image_id
            image_id += 1

# Add category info
coco_data['categories'] = [{'id': class_id, 'name': class_name} for class_name, class_id in class_mapping.items()]

# Save to JSON
with open(output_json_file, 'w') as json_file:
    json.dump(coco_data, json_file)
    print(f"COCO formatted data saved to {output_json_file}")



100%|██████████| 441/441 [00:03<00:00, 122.78it/s]
100%|██████████| 368/368 [00:03<00:00, 107.98it/s]
100%|██████████| 349/349 [00:02<00:00, 131.34it/s]
100%|██████████| 38/38 [00:00<00:00, 170.14it/s]
100%|██████████| 449/449 [00:03<00:00, 136.66it/s]
100%|██████████| 339/339 [00:02<00:00, 113.79it/s]

COCO formatted data saved to C:\Users\cheng\Documents\VSC\Higher_level_CV\Examination_project\Advanced_version_for_accuracy_faster_RCNN\coco_format_data.json





In [None]:
import os
import torch
import matplotlib.pyplot as plt
from torch.utils.data import Dataset, DataLoader, random_split
from torchmetrics.classification import Accuracy
from PIL import Image
from torchvision import transforms, models
from torch import nn, optim

# Custom dataset class
class CustomDataset(Dataset):
    def __init__(self, image_dir, label_dir, transform=None):
        self.image_dir = image_dir
        self.label_dir = label_dir
        self.transform = transform
        self.image_paths = []
        self.label_paths = []

        # Walk through subfolders (e.g., Apple_Bad, Apple_Good, etc.)
        for class_folder in os.listdir(image_dir):
            class_image_folder = os.path.join(image_dir, class_folder)
            class_label_folder = os.path.join(label_dir, class_folder)
            if os.path.isdir(class_image_folder) and os.path.isdir(class_label_folder):
                for image_file in os.listdir(class_image_folder):
                    if image_file.lower().endswith(('.jpg', '.jpeg', '.png')):
                        image_path = os.path.join(class_image_folder, image_file)
                        label_file = os.path.splitext(image_file)[0] + ".txt"
                        label_path = os.path.join(class_label_folder, label_file)
                        if os.path.exists(label_path):
                            self.image_paths.append(image_path)
                            self.label_paths.append(label_path)

    def __len__(self):
        return len(self.image_paths)

    def __getitem__(self, idx):
        image_path = self.image_paths[idx]
        label_path = self.label_paths[idx]

        image = Image.open(image_path).convert("RGB")
        with open(label_path, "r") as file:
            annotations = file.readlines()

        boxes = []
        labels = []
        for annotation in annotations:
            class_id, x_center, y_center, width, height = map(float, annotation.strip().split())
            x_center, y_center, width, height = x_center * image.width, y_center * image.height, width * image.width, height * image.height
            x_min = x_center - width / 2
            y_min = y_center - height / 2
            x_max = x_center + width / 2
            y_max = y_center + height / 2
            boxes.append([x_min, y_min, x_max, y_max])
            labels.append(int(class_id))

        boxes = torch.tensor(boxes, dtype=torch.float32)
        labels = torch.tensor(labels, dtype=torch.int64)
        
        target = {'boxes': boxes, 'labels': labels}
        
        if self.transform:
            image = self.transform(image)
        
        return image, target

# Define transformations
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Resize((800, 800)),
])


# Define paths for images and labels
image_dir = r'C:\Users\cheng\Documents\VSC\Higher_level_CV\Examination_project\Advanced_version_for_accuracy_faster_RCNN\annotated_images'  
label_dir = r'C:\Users\cheng\Documents\VSC\Higher_level_CV\Examination_project\Advanced_version_for_accuracy_faster_RCNN\labeled_data'


# Instantiate the dataset
dataset = CustomDataset(image_dir=image_dir, label_dir=label_dir, transform=transform)

# Train-Validation Split (80% train, 20% validation)
train_size = int(0.8 * len(dataset))
val_size = len(dataset) - train_size
train_dataset, val_dataset = random_split(dataset, [train_size, val_size])

# Paths for images and labels



In [25]:

# DataLoaders
train_loader = DataLoader(train_dataset, batch_size=2, shuffle=True, num_workers=4)
val_loader = DataLoader(val_dataset, batch_size=2, shuffle=False, num_workers=4)

# Load pre-trained Faster R-CNN model
model = models.detection.fasterrcnn_resnet50_fpn(pretrained=True)

# Modify classifier head
num_classes = len(dataset.label_paths) + 1  # Adding 1 for background
in_features = model.roi_heads.box_predictor.cls_score.in_features
model.roi_heads.box_predictor = models.detection.faster_rcnn.FastRCNNPredictor(in_features, num_classes)

# Set device
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
model.to(device)

# Optimizer and loss function
optimizer = optim.Adam(model.parameters(), lr=1e-5)

# Accuracy metrics
accuracy_metric_train = Accuracy(task='multiclass', num_classes=num_classes).to(device)
accuracy_metric_val = Accuracy(task='multiclass', num_classes=num_classes).to(device)

In [26]:
# Training and validation loop
num_epochs = 10
train_losses = []
val_losses = []
train_accuracies = []
val_accuracies = []


In [None]:

for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0
    correct_train = 0
    total_train = 0
    
    for images, targets in train_loader:
        images = [image.to(device) for image in images]
        targets = [{k: v.to(device) for k, v in t.items()} for t in targets]
        
        optimizer.zero_grad()
        
        loss_dict = model(images, targets)
        losses = sum(loss for loss in loss_dict.values())
        
        losses.backward()
        optimizer.step()
        
        running_loss += losses.item()
        
        # Calculate accuracy for training
        preds = [output['labels'] for output in model(images)]
        accuracy_metric_train.update(preds=preds, target=[target['labels'] for target in targets])
        
        correct_train += (preds == [target['labels'] for target in targets]).sum().item()
        total_train += len(targets)
    
    # Calculate train loss and accuracy
    avg_train_loss = running_loss / len(train_loader)
    train_losses.append(avg_train_loss)
    train_accuracy = accuracy_metric_train.compute().item()
    train_accuracies.append(train_accuracy)
    
    model.eval()
    running_val_loss = 0.0
    correct_val = 0
    total_val = 0
    
    with torch.no_grad():
        for images, targets in val_loader:
            images = [image.to(device) for image in images]
            targets = [{k: v.to(device) for k, v in t.items()} for t in targets]
            
            loss_dict = model(images, targets)
            losses = sum(loss for loss in loss_dict.values())
            
            running_val_loss += losses.item()
            
            # Calculate accuracy for validation
            preds = [output['labels'] for output in model(images)]
            accuracy_metric_val.update(preds=preds, target=[target['labels'] for target in targets])
            
            correct_val += (preds == [target['labels'] for target in targets]).sum().item()
            total_val += len(targets)
    
    # Calculate validation loss and accuracy
    avg_val_loss = running_val_loss / len(val_loader)
    val_losses.append(avg_val_loss)
    val_accuracy = accuracy_metric_val.compute().item()
    val_accuracies.append(val_accuracy)
    
    print(f"Epoch [{epoch+1}/{num_epochs}], "
          f"Train Loss: {avg_train_loss:.4f}, Train Accuracy: {train_accuracy:.4f}, "
          f"Val Loss: {avg_val_loss:.4f}, Val Accuracy: {val_accuracy:.4f}")

# Save the model
torch.save(model.state_dict(), 'faster_rcnn_finetuned.pth')
print("Model fine-tuning complete and saved.")

# Plot the training and validation loss
plt.figure(figsize=(10, 5))
plt.plot(range(1, num_epochs + 1), train_losses, label="Train Loss")
plt.plot(range(1, num_epochs + 1), val_losses, label="Val Loss")
plt.xlabel("Epochs")
plt.ylabel("Loss")
plt.title("Training and Validation Loss")
plt.legend()
plt.show()

# Plot the training and validation accuracy
plt.figure(figsize=(10, 5))
plt.plot(range(1, num_epochs + 1), train_accuracies, label="Train Accuracy")
plt.plot(range(1, num_epochs + 1), val_accuracies, label="Val Accuracy")
plt.xlabel("Epochs")
plt.ylabel("Accuracy")
plt.title("Training and Validation Accuracy")
plt.legend()
plt.show()

In [None]:
torch.save(model.state_dict(), 'faster_rcnn_finetuned.pth')
print("Model fine-tuning complete and saved.")
