In [11]:
import torch   
print(torch.__version__)

2.10.0+cu128


In [12]:
# Vérifie si CUDA est disponible
print(torch.cuda.is_available()) # Retourne True si CUDA est activé


# Affiche la version de CUDA utilisée par PyTorch
print(torch.version.cuda)

True
12.8


# 1. IMPORT DEPENDENCIES

In [21]:
import os
import torch.utils.data
import matplotlib.pyplot as plt
from PIL import Image
import numpy as np
from pycocotools.coco import COCO
import torchvision.transforms as T
from torchvision.models.detection import maskrcnn_resnet50_fpn
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
from torchvision.models.detection.mask_rcnn import MaskRCNNPredictor
from torchvision.transforms import functional as F
import numpy as np
import random

In [14]:
class ClimbingDataset(torch.utils.data.Dataset):
    def __init__(self, root, transforms=None):
        self.root = root
        self.transforms = transforms
        
        # Initialize COCO api
        ann_file = os.path.join(root, "_annotations.coco.json")
        if not os.path.exists(ann_file):
            raise FileNotFoundError(f"Annotation file not found at: {ann_file}")

        self.coco = COCO(ann_file)
        # Sort initial keys
        all_ids = list(sorted(self.coco.imgs.keys()))

        # Filter out images that don't exist
        self.ids = []
        missing_count = 0
        for img_id in all_ids:
            img_metadata = self.coco.loadImgs(img_id)[0]
            path = img_metadata['file_name']
            img_path = os.path.join(self.root, path)
            if os.path.exists(img_path):
                self.ids.append(img_id)
            else:
                missing_count += 1
        
        print(f"Total potential images: {len(all_ids)}")
        print(f"Missing images filtered out: {missing_count}")
        print(f"Valid images loaded: {len(self.ids)}")

    def __getitem__(self, index):
        # Load Image
        coco = self.coco
        img_id = self.ids[index]
        img_metadata = coco.loadImgs(img_id)[0]
        path = img_metadata['file_name']
        img_path = os.path.join(self.root, path)
        
        # Image is guaranteed to exist now due to init filtering
        img = Image.open(img_path).convert("RGB")

        # Load Annotations
        ann_ids = coco.getAnnIds(imgIds=img_id)
        anns = coco.loadAnns(ann_ids)

        num_objs = len(anns)
        boxes = []
        masks = []
        labels = []

        for ann in anns:
            xmin, ymin, w, h = ann['bbox']
            boxes.append([xmin, ymin, xmin + w, ymin + h])
            labels.append(ann['category_id'])
            masks.append(coco.annToMask(ann))

        # Convert to Tensors
        boxes = torch.as_tensor(boxes, dtype=torch.float32)
        labels = torch.as_tensor(labels, dtype=torch.int64)
        if num_objs > 0:
            masks = np.array(masks)
            masks = torch.as_tensor(masks, dtype=torch.uint8)
        else:
            masks = torch.zeros((0, img_metadata['height'], img_metadata['width']), dtype=torch.uint8)
            boxes = torch.zeros((0, 4), dtype=torch.float32)

        image_id = torch.tensor([img_id])
        area = (boxes[:, 3] - boxes[:, 1]) * (boxes[:, 2] - boxes[:, 0])
        iscrowd = torch.zeros((num_objs,), dtype=torch.int64)

        target = {}
        target["boxes"] = boxes
        target["labels"] = labels
        target["masks"] = masks
        target["image_id"] = image_id
        target["area"] = area
        target["iscrowd"] = iscrowd

        if self.transforms is not None:
            img = self.transforms(img)

        return img, target

    def __len__(self):
        return len(self.ids)

# 2. DEFINE DATASET CLASS
We implement a custom Dataset that reads COCO annotations and converts polygons to masks.

In [15]:
# --- Helper Functions ---
def get_transform(train):
    custom_transforms = []
    custom_transforms.append(T.ToTensor())
    return T.Compose(custom_transforms)

def collate_fn(batch):
    return tuple(zip(*batch))


In [16]:
import os
import json

def check_dataset(split='train'):
    base_dir = r"C:\Users\silue\Documents\telecom_clustering\segmentation\MaskRCNN"
    dataset_dir = os.path.join(base_dir, "Hold Detector.v2i.coco-segmentation", split)
    ann_file = os.path.join(dataset_dir, "_annotations.coco.json")
    
    if not os.path.exists(ann_file):
        print(f"Annotation file not found: {ann_file}")
        return

    with open(ann_file, 'r') as f:
        data = json.load(f)
    
    total_images = len(data['images'])
    missing_count = 0
    valid_count = 0
    
    print(f"Checking {split} set...")
    print(f"Total entries in JSON: {total_images}")
    
    for img in data['images']:
        fname = img['file_name']
        img_path = os.path.join(dataset_dir, fname)
        if not os.path.exists(img_path):
            missing_count += 1
            # print(f"Missing: {fname}")
        else:
            valid_count += 1
            
    print(f"Valid images: {valid_count}")
    print(f"Missing images: {missing_count}")

check_dataset('train')
check_dataset('test')

Checking train set...
Total entries in JSON: 117
Valid images: 114
Missing images: 3
Checking test set...
Total entries in JSON: 1
Valid images: 1
Missing images: 0


# 3. CONFIGURE MODEL
We fine-tune a pre-trained Mask R-CNN model.

In [17]:
def get_model_instance_segmentation(num_classes):
    model = maskrcnn_resnet50_fpn(pretrained=True)
    in_features = model.roi_heads.box_predictor.cls_score.in_features
    model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)
    in_features_mask = model.roi_heads.mask_predictor.conv5_mask.in_channels
    hidden_layer = 256
    model.roi_heads.mask_predictor = MaskRCNNPredictor(in_features_mask, hidden_layer, num_classes)
    return model

# 4. TRAINING LOOP

In [18]:
def train_one_epoch(model, optimizer, data_loader, device, epoch, print_freq=10):
    model.train()
    for i, (images, targets) in enumerate(data_loader):
        images = list(image.to(device) for image in images)
        targets = [{k: v.to(device) for k, v in t.items()} for t in targets]

        loss_dict = model(images, targets)
        losses = sum(loss for loss in loss_dict.values())

        optimizer.zero_grad()
        losses.backward()
        optimizer.step()

        if i % print_freq == 0:
            print(f"Epoch: {epoch}, Batch: {i}, Loss: {losses.item()}")

# 5. VISUALIZATION
Visualize predictions on a test image.

In [19]:
dataset_path = 'Hold Detector.v2i.coco-segmentation'
train_path = os.path.join(dataset_path, 'train')
test_path = os.path.join(dataset_path, 'test')

device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
num_classes = 2 # background + hold

print("Checking the device")
print()
print(f"The device use is {device}.")
print()
print(f"Initializing datasets...")
# Initialize datasets with the Robust ClimbingDataset
dataset_train = ClimbingDataset(train_path, get_transform(train=True))
dataset_test = ClimbingDataset(test_path, get_transform(train=False))

data_loader = torch.utils.data.DataLoader(
    dataset_train, batch_size=2, shuffle=True, collate_fn=collate_fn
)

data_loader_test = torch.utils.data.DataLoader(
    dataset_test, batch_size=1, shuffle=False, collate_fn=collate_fn
)
print()
print("Loading model...")
model = get_model_instance_segmentation(num_classes)
model.to(device)

params = [p for p in model.parameters() if p.requires_grad]
optimizer = torch.optim.SGD(params, lr=0.005, momentum=0.9, weight_decay=0.0005)
lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=3, gamma=0.1)

# Train for 1 Epoch
num_epochs = 5
print()
print("Starting training...")
for epoch in range(num_epochs):
    train_one_epoch(model, optimizer, data_loader, device, epoch, print_freq=10)
    lr_scheduler.step()

print("Training Complete!")
torch.save(model.state_dict(), f"climbing_model_epoch{num_epochs}.pth")

plt.show()

Checking the device

The device use is cuda.

Initializing datasets...
loading annotations into memory...
Done (t=0.14s)
creating index...
index created!
Total potential images: 117
Missing images filtered out: 3
Valid images loaded: 114
loading annotations into memory...
Done (t=0.00s)
creating index...
index created!
Total potential images: 1
Missing images filtered out: 0
Valid images loaded: 1

Loading model...





Starting training...
Epoch: 0, Batch: 0, Loss: 11.459665298461914
Epoch: 0, Batch: 10, Loss: 2.1420774459838867
Epoch: 0, Batch: 20, Loss: 2.339132785797119
Epoch: 0, Batch: 30, Loss: 2.274705410003662
Epoch: 0, Batch: 40, Loss: 2.3343803882598877
Epoch: 0, Batch: 50, Loss: 1.681612491607666
Epoch: 1, Batch: 0, Loss: 1.3708373308181763
Epoch: 1, Batch: 10, Loss: 1.424757957458496
Epoch: 1, Batch: 20, Loss: 1.425351619720459
Epoch: 1, Batch: 30, Loss: 1.818040370941162
Epoch: 1, Batch: 40, Loss: 1.4922174215316772
Epoch: 1, Batch: 50, Loss: 1.7108650207519531
Epoch: 2, Batch: 0, Loss: 1.5467612743377686
Epoch: 2, Batch: 10, Loss: 1.1366422176361084
Epoch: 2, Batch: 20, Loss: 1.0448704957962036
Epoch: 2, Batch: 30, Loss: 1.365187406539917
Epoch: 2, Batch: 40, Loss: 1.1291300058364868
Epoch: 2, Batch: 50, Loss: 1.6019426584243774
Epoch: 3, Batch: 0, Loss: 1.4601249694824219
Epoch: 3, Batch: 10, Loss: 1.2617735862731934
Epoch: 3, Batch: 20, Loss: 1.4798154830932617
Epoch: 3, Batch: 30, Lo

In [None]:
def main():
    # --- Setup ---
    device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
    print(f"Using device: {device}")

    # --- Load Model ---
    num_classes = 2 # background + hold
    model = get_model_instance_segmentation(num_classes)

    model_path = 'climbing_model.pth'
    if not os.path.exists(model_path):
        print(f"Error: Model file '{model_path}' not found.")
        return
print(f"Loading model from {model_path}...")
# Load state dict
state_dict = torch.load(model_path, map_location=device)
model.load_state_dict(state_dict)
model.to(device)
model.eval()

# --- Load Test Image ---
# Path to the specific test image found earlier
dataset_path = 'Hold Detector.v2i.coco-segmentation'
test_dir = os.path.join(dataset_path, 'test')

# Try to find a valid image
img_name = '43d78bf0874b7b6def76e7d846ec6fc4_jpg.rf.8463ddc27821845eeff514149e782f7b.jpg'
img_path = os.path.join(test_dir, img_name)

if not os.path.exists(img_path):
    print(f"Specific image not found at {img_path}, looking for any jpg in {test_dir}...")
    if os.path.exists(test_dir):
        for f in os.listdir(test_dir):
            if f.lower().endswith(('.jpg', '.jpeg', '.png')):
                img_path = os.path.join(test_dir, f)
                break

if not os.path.exists(img_path):
    print("No test image found.")
    return

print(f"Using image: {img_path}")

image = Image.open(img_path).convert("RGB")
image_tensor = F.to_tensor(image).unsqueeze(0).to(device)

# --- Inference ---
print("Running inference...")
with torch.no_grad():
    prediction = model(image_tensor)

# --- Visualization ---
print("Visualizing results...")

img_np = np.array(image)

# Create figure and axes
fig, ax = plt.subplots(1, figsize=(12, 9))
ax.imshow(img_np)

pred_score_threshold = 0.5
pred_boxes = prediction[0]['boxes'].cpu().numpy()
pred_masks = prediction[0]['masks'].cpu().numpy()
pred_scores = prediction[0]['scores'].cpu().numpy()

# Helper function for random colors
def random_color():
    return (random.random(), random.random(), random.random())

num_instances = 0
for i in range(len(pred_masks)):
    if pred_scores[i] > pred_score_threshold:
        num_instances += 1
        mask = pred_masks[i, 0]
        mask = (mask > 0.5) # Boolean mask
        
        if mask.sum() > 0:
            color = random_color()
            
            # Apply mask overlay
            # Create an RGBA image for the mask
            mask_rgba = np.zeros((mask.shape[0], mask.shape[1], 4))
            mask_rgba[mask, 0:3] = color
            mask_rgba[mask, 3] = 0.5 # Alpha
            
            ax.imshow(mask_rgba)
            
            # Draw bounding box
            box = pred_boxes[i]
            x_min, y_min, x_max, y_max = box
            rect = plt.Rectangle((x_min, y_min), x_max - x_min, y_max - y_min, 
                                    fill=False, edgecolor=color, linewidth=2)
            ax.add_patch(rect)
            ax.text(x_min, y_min - 5, f'{pred_scores[i]:.2f}', color=color, fontsize=10, weight='bold')

plt.axis('off')
plt.title(f"Detected {num_instances} holds (Threshold: {pred_score_threshold})")

output_file = 'prediction_result_epoch5.png'
plt.savefig(output_file, bbox_inches='tight')
print(f"Result saved to {output_file}")
plt.show()

plt.close()

IndentationError: unexpected indent (1841942721.py, line 2)

#  ****