In [1]:
import os
import json
import torch
import torchvision
from torch.utils.data import Dataset, DataLoader
from torchvision.models.detection import ssdlite320_mobilenet_v3_large
from torchvision.transforms import functional as F
from PIL import Image, ImageDraw
import matplotlib.pyplot as plt
import torch.optim as optim


In [2]:

# Define the custom dataset class
class CustomDataset(Dataset):
    def __init__(self, image_dir, annotation_file):
        self.image_dir = image_dir
        self.annotation_file = annotation_file
        self.images = sorted([f for f in os.listdir(image_dir) if f.endswith('.jpg')])

        # Load annotations
        with open(annotation_file, 'r') as f:
            self.annotations_data = json.load(f)
            self.annotations = self.annotations_data.get('annotations', [])
            self.categories = {cat['id']: cat['name'] for cat in self.annotations_data.get('categories', [])}
            self.image_id_map = {img['id']: img['file_name'] for img in self.annotations_data.get('images', [])}

    def __len__(self):
        return len(self.images)

    def __getitem__(self, idx):
        img_path = os.path.join(self.image_dir, self.images[idx])
        image = Image.open(img_path).convert("RGB")

        # Get image ID
        img_name = self.images[idx]
        image_id = None
        for img_id, file_name in self.image_id_map.items():
            if file_name == img_name:
                image_id = img_id
                break

        if image_id is None:
            print(f"No matching image ID found for image: {img_name}")
            return None, None

        # Load annotations for the current image
        annotations = [ann for ann in self.annotations if ann['image_id'] == image_id]

        # Convert annotations to the format expected by SSD
        boxes = []
        labels = []
        for ann in annotations:
            x_min, y_min, width, height = ann['bbox']
            if width > 0 and height > 0:  # Ensure that width and height are positive
                boxes.append([x_min, y_min, x_min + width, y_min + height])
                labels.append(ann['category_id'])
            else:
                print(f"Skipping invalid box with non-positive dimensions: {ann['bbox']}")

        # Skip images with no valid annotations (no bounding boxes)
        if len(boxes) == 0:
            print(f"No valid bounding boxes found for image: {img_name}. Skipping.")
            return None, None

        # Convert boxes and labels to the format expected by SSD
        target = {}
        target['boxes'] = torch.as_tensor(boxes, dtype=torch.float32)
        target['labels'] = torch.as_tensor(labels, dtype=torch.int64)
        target['image_id'] = torch.tensor([idx])

        # Transform image
        image = self.transform(image)

        # Return image and target as a dictionary
        return image, target

    def transform(self, image):
        # Convert image to tensor
        image = F.to_tensor(image)
        return image





In [None]:
# Custom collate function to filter out None values
def custom_collate_fn(batch):
    # Filter out None values
    batch = list(filter(lambda x: x[0] is not None, batch))
    if len(batch) == 0:
        return torch.tensor([]), torch.tensor([])
    images, targets = zip(*batch)
    return list(images), list(targets)

# Load the pre-trained model
model = ssdlite320_mobilenet_v3_large(weights='DEFAULT')
model.train()


Downloading: "https://download.pytorch.org/models/ssdlite320_mobilenet_v3_large_coco-a79551df.pth" to /root/.cache/torch/hub/checkpoints/ssdlite320_mobilenet_v3_large_coco-a79551df.pth
100%|██████████| 13.4M/13.4M [00:00<00:00, 79.1MB/s]


SSD(
  (backbone): SSDLiteFeatureExtractorMobileNet(
    (features): Sequential(
      (0): Sequential(
        (0): Conv2dNormActivation(
          (0): Conv2d(3, 16, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
          (1): BatchNorm2d(16, eps=0.001, momentum=0.03, affine=True, track_running_stats=True)
          (2): Hardswish()
        )
        (1): InvertedResidual(
          (block): Sequential(
            (0): Conv2dNormActivation(
              (0): Conv2d(16, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=16, bias=False)
              (1): BatchNorm2d(16, eps=0.001, momentum=0.03, affine=True, track_running_stats=True)
              (2): ReLU(inplace=True)
            )
            (1): Conv2dNormActivation(
              (0): Conv2d(16, 16, kernel_size=(1, 1), stride=(1, 1), bias=False)
              (1): BatchNorm2d(16, eps=0.001, momentum=0.03, affine=True, track_running_stats=True)
            )
          )
        )
        (2): Invert

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
!ls /content/drive/MyDrive/Colab Notebooks/


ls: cannot access '/content/drive/MyDrive/Colab': No such file or directory
ls: cannot access 'Notebooks/': No such file or directory


In [None]:
for image_info in self.annotations_data['images']:
    if image_info['file_name'] == 'katrina-free-lime-008_png.rf.bb64bc3965dce36f4a4d8a69f3e57478 (1).jpg':
        print("Found matching image:", image_info)


NameError: name 'self' is not defined

In [None]:

# Define paths
image_dir = '/content/drive/MyDrive/Colab Notebooks/training'
annotation_file = '/content/drive/MyDrive/Colab Notebooks/_annotations.coco.json'
# Path to your annotations file


#Initialize the dataset and dataloader with custom collate function
train_dataset = CustomDataset(image_dir, annotation_file)
train_loader = DataLoader(train_dataset, batch_size=4, shuffle=True, collate_fn=custom_collate_fn)

# Define the optimizer
optimizer = optim.SGD(model.parameters(), lr=0.005, momentum=0.9, weight_decay=0.0005)


#Training loop
num_epochs = 20
for epoch in range(num_epochs):
    model.train()
    epoch_losses = []  # Store losses for each batch in the epoch
    for images, targets in train_loader:
        # Skip empty batches
        if len(images) == 0:
            continue

        # Ensure targets is a list of dictionaries
        if not isinstance(targets, list):
            targets = [targets]

        # Perform training step
        loss_dict = model(images, targets)
        losses = sum(loss for loss in loss_dict.values())

        # Store the loss for this batch
        epoch_losses.append(losses.item())

        optimizer.zero_grad()
        losses.backward()
        optimizer.step()

    # Calculate and print the average loss for the epoch
    avg_loss = sum(epoch_losses) / len(epoch_losses) if epoch_losses else 0.0
    print(f"Epoch [{epoch+1}/{num_epochs}], Average Loss: {avg_loss:.4f}")

# Save the model
model_save_path = '/content/drive/MyDrive/ssd_model_final.pth'
torch.save(model.state_dict(), model_save_path)
print(f"Model saved to {model_save_path}")

# Define the transform function for input images
def transform_image(image):
    image = F.to_tensor(image).unsqueeze(0)
    return image

# Define the inference function
def infer(image, model):
    image = transform_image(image)
    with torch.no_grad():
        prediction = model(image)
    return prediction

# Visualize the results
def plot_results(image, prediction):
    # Draw bounding boxes
    draw = ImageDraw.Draw(image)
    boxes = prediction[0]['boxes']
    labels = prediction[0]['labels']
    scores = prediction[0]['scores']
    for i, box in enumerate(boxes):
        if scores[i] > 0.5:  # Filter out low-confidence detections
            draw.rectangle(box.tolist(), outline='red', width=3)
            label_name = train_dataset.categories.get(labels[i].item(), 'Unknown')
            draw.text((box[0], box[1]), f'{label_name}, Score: {scores[i].item():.2f}', fill='red')
    plt.imshow(image)
    plt.show()


No matching image ID found for image: katrina-free-lime-007_png.rf.14388dd0cd45922e4840de5461e9a59c (1).jpg
No matching image ID found for image: Aijia-free-lime-262_png.rf.4ff65350054c03ffea4b207061b046c2 (1).jpg
No matching image ID found for image: katrina-cracks-112_png.rf.439a00a5b89fb8e0fb7c0b5a115987f0 (1).jpg
No matching image ID found for image: katrina-free-lime-005_png.rf.93f0ab4bf056768a64e46345bc1bd3d9 (1).jpg
No matching image ID found for image: Aijia-free-lime-257_png.rf.1817140b7d048fbbb483474475f03995 (1).jpg
No matching image ID found for image: katrina-cracks-107_png.rf.c405a8dd04849ac853464febb7f0492e (1).jpg
No matching image ID found for image: katrina-cracks-105_png.rf.795314d211d18c0fc23017ebc8a6c782 (1).jpg
No matching image ID found for image: katrina-free-lime-008_png.rf.bb64bc3965dce36f4a4d8a69f3e57478 (1).jpg
No matching image ID found for image: Aijia-free-lime-259_png.rf.c919fe0fa913b2ccda01db1a88315472 (1).jpg
No matching image ID found for image: Aijia

KeyboardInterrupt: 

In [None]:
import os
import json
import torch
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms as T
import torch.optim as optim
import torch.nn.functional as F
from PIL import Image, ImageDraw
import matplotlib.pyplot as plt

# Define paths
image_dir = '/content/drive/MyDrive/Colab Notebooks/training'
annotation_file = '/content/drive/MyDrive/Colab Notebooks/_annotations.coco.json'

# Custom Dataset class
class CustomDataset(Dataset):
    def __init__(self, image_dir, annotation_file, transform=None):
        self.image_dir = image_dir
        self.transform = transform

        # Load the annotations
        with open(annotation_file, 'r') as f:
            self.coco_data = json.load(f)

        self.images = {img['id']: img for img in self.coco_data['images']}
        self.annotations = self.coco_data['annotations']
        self.image_ids = list(self.images.keys())

        # Create a mapping from category ID to category name
        self.categories = {cat['id']: cat['name'] for cat in self.coco_data['categories']}

    def __len__(self):
        return len(self.image_ids)

    def __getitem__(self, idx):
        image_id = self.image_ids[idx]
        image_info = self.images[image_id]
        image_path = os.path.join(self.image_dir, image_info['file_name'])

        # Debugging: Print image path to check for issues
        print(f"Loading image: {image_path}")

        image = Image.open(image_path).convert("RGB")

        if self.transform:
            image = self.transform(image)

        # Get all annotations for this image
        annotations = [ann for ann in self.annotations if ann['image_id'] == image_id]

        # Prepare targets in the expected format
        boxes = []
        labels = []
        for ann in annotations:
            xmin, ymin, width, height = ann['bbox']
            xmax = xmin + width
            ymax = ymin + height
            boxes.append([xmin, ymin, xmax, ymax])
            labels.append(ann['category_id'])

        boxes = torch.as_tensor(boxes, dtype=torch.float32)
        labels = torch.as_tensor(labels, dtype=torch.int64)

        target = {"boxes": boxes, "labels": labels}

        return image, target

# Custom collate function for DataLoader
def custom_collate_fn(batch):
    images, targets = zip(*batch)
    return list(images), list(targets)

# Initialize the dataset and dataloader
transform = T.Compose([T.ToTensor()])
train_dataset = CustomDataset(image_dir, annotation_file, transform=transform)
train_loader = DataLoader(train_dataset, batch_size=4, shuffle=True, collate_fn=custom_collate_fn)

# Define the optimizer
optimizer = optim.SGD(model.parameters(), lr=0.005, momentum=0.9, weight_decay=0.0005)

# Training loop
num_epochs = 20
for epoch in range(num_epochs):
    model.train()
    epoch_losses = []
    for images, targets in train_loader:
        if len(images) == 0:
            continue

        loss_dict = model(images, targets)
        losses = sum(loss for loss in loss_dict.values())

        epoch_losses.append(losses.item())

        optimizer.zero_grad()
        losses.backward()
        optimizer.step()

    avg_loss = sum(epoch_losses) / len(epoch_losses) if epoch_losses else 0.0
    print(f"Epoch [{epoch+1}/{num_epochs}], Average Loss: {avg_loss:.4f}")

# Save the model
model_save_path = '/content/drive/MyDrive/ssd_model_final.pth'
torch.save(model.state_dict(), model_save_path)
print(f"Model saved to {model_save_path}")

# Define the transform function for input images
def transform_image(image):
    image = F.to_tensor(image).unsqueeze(0)
    return image

# Define the inference function
def infer(image, model):
    image = transform_image(image)
    with torch.no_grad():
        prediction = model(image)
    return prediction

# Visualize the results
def plot_results(image, prediction):
    draw = ImageDraw.Draw(image)
    boxes = prediction[0]['boxes']
    labels = prediction[0]['labels']
    scores = prediction[0]['scores']
    for i, box in enumerate(boxes):
        if scores[i] > 0.5:  # Filter out low-confidence detections
            draw.rectangle(box.tolist(), outline='red', width=3)
            label_name = train_dataset.categories.get(labels[i].item(), 'Unknown')
            draw.text((box[0], box[1]), f'{label_name}, Score: {scores[i].item():.2f}', fill='red')
    plt.imshow(image)
    plt.show()


Loading image: /content/drive/MyDrive/Colab Notebooks/training/katrina-crack-43_png.rf.8f1e8d00f11abc15301b2466c0836c2b.jpg
Loading image: /content/drive/MyDrive/Colab Notebooks/training/Inamura-spalling-088_png.rf.063c702b194be4c563b791b5778cdae9.jpg
Loading image: /content/drive/MyDrive/Colab Notebooks/training/Lin-Spalling-052_png.rf.4f181c4f5f0348540f5175d807fab30b.jpg
Loading image: /content/drive/MyDrive/Colab Notebooks/training/Inamura-free-lime-054_png.rf.7c201c7520ec4b8f443ccb686bc1ecb5.jpg
Loading image: /content/drive/MyDrive/Colab Notebooks/training/Aijia-spalling_02_png.rf.b32293d27e9ea771e189b9630a24aa74.jpg
Loading image: /content/drive/MyDrive/Colab Notebooks/training/lin-spalling-214_png.rf.db3073ea3177c4def8a1151cb94532b1.jpg
Loading image: /content/drive/MyDrive/Colab Notebooks/training/Lin-Spalling-036_png.rf.57c42f4e2de0345f5e8384b0c570ede0.jpg
Loading image: /content/drive/MyDrive/Colab Notebooks/training/Inamura-free-lime-101_png.rf.51e438032015b878a92e9e24de03d6

KeyboardInterrupt: 