In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
import os

BASE_DIR = "/content/drive/My Drive/pothole_project/pothole_seg"
DATA_DIR = os.path.join(BASE_DIR, "data")
RAW_DIR = os.path.join(DATA_DIR, "raw")
PROC_DIR = os.path.join(DATA_DIR, "processed")
MODEL_DIR = os.path.join(BASE_DIR, "models")
RAW_YOLO_DIR = os.path.join(RAW_DIR, "Pothole_Segmentation_YOLOv8")
RAW_TRAIN_IMG = os.path.join(RAW_YOLO_DIR, "train", "images")
RAW_TRAIN_LAB = os.path.join(RAW_YOLO_DIR, "train", "labels")
RAW_VAL_IMG   = os.path.join(RAW_YOLO_DIR, "valid", "images")
RAW_VAL_LAB   = os.path.join(RAW_YOLO_DIR, "valid", "labels")
TRAIN_IMG_DIR = os.path.join(PROC_DIR, "train", "images")
TRAIN_LAB_DIR = os.path.join(PROC_DIR, "train", "labels")
VAL_IMG_DIR   = os.path.join(PROC_DIR, "val", "images")
VAL_LAB_DIR   = os.path.join(PROC_DIR, "val", "labels")

In [None]:
import cv2
import torch
import numpy as np
from torch.utils.data import Dataset, DataLoader

In [None]:
class PotholeSegDataset(Dataset):
    def __init__(self, img_dir, label_dir, transforms=None):
        self.img_dir = img_dir
        self.label_dir = label_dir
        self.transforms = transforms
        self.img_files = sorted([
            f for f in os.listdir(img_dir)
            if f.lower().endswith((".jpg", ".jpeg", ".png"))
        ])

    def __len__(self):
        return len(self.img_files)

    def __getitem__(self, idx):
        img_name = self.img_files[idx]
        img_path = os.path.join(self.img_dir, img_name)
        label_path = os.path.join(
            self.label_dir,
            os.path.splitext(img_name)[0] + ".txt"
        )

        # --- IMAGE ---
        img_bgr = cv2.imread(img_path)
        if img_bgr is None:
            raise RuntimeError(f"Failed to read image: {img_path}")

        img_rgb = cv2.cvtColor(img_bgr, cv2.COLOR_BGR2RGB)

        # ðŸ”‘ Make 100% sure it has positive, contiguous strides
        img = np.ascontiguousarray(img_rgb)  # or img_rgb.copy()
        h, w = img.shape[:2]

        masks = []
        boxes = []

        # --- LABELS â†’ MASKS & BOXES ---
        if os.path.exists(label_path):
            with open(label_path) as f:
                for line in f:
                    parts = line.strip().split()
                    if len(parts) < 7:
                        continue
                    coords = np.array(list(map(float, parts[1:])))
                    xs = coords[0::2] * w
                    ys = coords[1::2] * h
                    pts = np.stack([xs, ys], axis=1).astype(np.int32)

                    mask = np.zeros((h, w), dtype=np.uint8)
                    cv2.fillPoly(mask, [pts], 1)
                    masks.append(mask)

                    x_min, x_max = xs.min(), xs.max()
                    y_min, y_max = ys.min(), ys.max()
                    boxes.append([x_min, y_min, x_max, y_max])

        if len(masks) == 0:
            masks_arr = np.zeros((0, h, w), dtype=np.uint8)
            boxes_arr = np.zeros((0, 4), dtype=np.float32)
            labels_arr = np.zeros((0,), dtype=np.int64)
        else:
            masks_arr = np.stack(masks, axis=0)
            boxes_arr = np.array(boxes, dtype=np.float32)
            labels_arr = np.ones((len(masks_arr),), dtype=np.int64)

        # ðŸ”‘ Make sure masks are contiguous too (just in case)
        masks_arr = np.ascontiguousarray(masks_arr)
        boxes_arr = np.ascontiguousarray(boxes_arr)
        labels_arr = np.ascontiguousarray(labels_arr)

        # --- NUMPY â†’ TORCH ---
        img_t = torch.from_numpy(img).float().permute(2, 0, 1) / 255.0
        target = {
            "boxes": torch.from_numpy(boxes_arr),
            "labels": torch.from_numpy(labels_arr),
            "masks": torch.from_numpy(masks_arr),
            "image_id": torch.tensor([idx]),
        }

        if self.transforms:
            img_t, target = self.transforms(img_t, target)

        return img_t, target


In [None]:
train_dataset = PotholeSegDataset(TRAIN_IMG_DIR, TRAIN_LAB_DIR)
val_dataset   = PotholeSegDataset(VAL_IMG_DIR, VAL_LAB_DIR)

def collate_fn(batch):
    return tuple(zip(*batch))

train_loader = DataLoader(train_dataset, batch_size=2, shuffle=True, collate_fn=collate_fn)
val_loader   = DataLoader(val_dataset, batch_size=2, shuffle=False, collate_fn=collate_fn)


In [None]:
from torchvision.models.detection import maskrcnn_resnet50_fpn
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
from torchvision.models.detection.mask_rcnn import MaskRCNNPredictor

In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

num_classes = 2  # background + pothole

model = maskrcnn_resnet50_fpn(weights="COCO_V1")
in_features = model.roi_heads.box_predictor.cls_score.in_features
model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)

in_features_mask = model.roi_heads.mask_predictor.conv5_mask.in_channels
model.roi_heads.mask_predictor = MaskRCNNPredictor(in_features_mask, 256, num_classes)

model.to(device)

optimizer = torch.optim.Adam(model.parameters(), lr=1e-4)

for epoch in range(5):  # start small
    model.train()
    running_loss = 0.0
    for imgs, targets in train_loader:
        imgs = [img.to(device) for img in imgs]
        targets = [{k: v.to(device) for k, v in t.items()} for t in targets]

        loss_dict = model(imgs, targets)
        loss = sum(loss_dict.values())

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        running_loss += loss.item()

    print(f"Epoch {epoch+1}, loss: {running_loss/len(train_loader):.4f}")


Epoch 1, loss: 0.6482
Epoch 2, loss: 0.4985
Epoch 3, loss: 0.4266
Epoch 4, loss: 0.3744
Epoch 5, loss: 0.3271


In [None]:
weights_path = os.path.join(MODEL_DIR, "maskrcnn_pothole.pth")
torch.save(model.state_dict(), weights_path)
weights_path
