### Housepital Object Classification Model

In [1]:
import torch
import torchvision
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
from torchvision.transforms import functional as F
import torchvision.transforms as T
from torch.utils.data import DataLoader
from pycocotools.coco import COCO
import os
from PIL import Image, ImageOps

def get_transform():
    return T.Compose([
        T.ToTensor(),
        T.Resize((640, 640)),  # Stretch to 640x640
        T.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),  # Standard normalization
    ])

def auto_orient(image):
    # Auto-orient the image based on EXIF data
    try:
        image = Image.open(image)
        image = ImageOps.exif_transpose(image)
    except Exception as e:
        print(f"Error in auto-orienting image: {e}")
    return image

def validate_and_fix_boxes(target):
    boxes = target["boxes"]
    
    # Ensure boxes is 2D with shape [num_boxes, 4]
    if boxes.dim() == 1:
        boxes = boxes.view(-1, 4)  # Reshape to 2D if it's 1D
    
    # Ensure width and height are >= 1
    x_min, y_min, x_max, y_max = boxes[:, 0], boxes[:, 1], boxes[:, 2], boxes[:, 3]
    x_max = torch.max(x_min + 1, x_max)  # Ensure width is >= 1
    y_max = torch.max(y_min + 1, y_max)  # Ensure height is >= 1
    
    # Fix boxes and return them
    target["boxes"] = boxes
    return target


# Define a custom dataset class for COCO
class COCODataset(torch.utils.data.Dataset):
    def __init__(self, img_dir, annotation_file, transforms=None):
        self.img_dir = img_dir
        self.coco = COCO(annotation_file)
        self.transforms = transforms
        self.image_ids = list(self.coco.imgs.keys())

    def __len__(self):
        return len(self.image_ids)

    def __getitem__(self, idx):
        image_id = self.image_ids[idx]
        image_info = self.coco.loadImgs(image_id)[0]
        image_path = os.path.join(self.img_dir, image_info['file_name'])
        
        # Apply Auto-Orientation
        image = auto_orient(image_path)
        
        # Load annotations
        ann_ids = self.coco.getAnnIds(imgIds=image_id)
        annotations = self.coco.loadAnns(ann_ids)
        
        # Prepare targets
        boxes = []
        labels = []
        for ann in annotations:
            bbox = ann['bbox']
            # COCO uses [x, y, width, height], but PyTorch uses [x_min, y_min, x_max, y_max]
            boxes.append([bbox[0], bbox[1], bbox[0] + bbox[2], bbox[1] + bbox[3]])
            labels.append(ann['category_id'])  # Assuming category_id represents bed sore grades (1-4)
        
        boxes = torch.as_tensor(boxes, dtype=torch.float32)
        labels = torch.as_tensor(labels, dtype=torch.int64)
        target = {"boxes": boxes, "labels": labels, "image_id": torch.tensor([image_id])}

        # Apply transforms if provided
        if self.transforms:
            image = self.transforms(image)
        target = validate_and_fix_boxes(target)

        return image, target


# Paths for train, valid, and test directories from both versions
train_dir_v1 = "dataset/pressureV1/train"
train_annotations_v1 = "dataset/pressureV1/train/_annotations.coco.json"
valid_dir_v1 = "dataset/pressureV1/valid"
valid_annotations_v1 = "dataset/pressureV1/valid/_annotations.coco.json"
test_dir_v1 = "dataset/pressureV1/test"
test_annotations_v1 = "dataset/pressureV1/test/_annotations.coco.json"

train_dir_v2 = "dataset/pressureV2/train"
train_annotations_v2 = "dataset/pressureV2/train/_annotations.coco.json"
valid_dir_v2 = "dataset/pressureV2/valid"
valid_annotations_v2 = "dataset/pressureV2/valid/_annotations.coco.json"
test_dir_v2 = "dataset/pressureV2/test"
test_annotations_v2 = "dataset/pressureV2/test/_annotations.coco.json"

# Datasets and loaders
batch_size = 2

# Combine the datasets into single dataset loaders
train_dataset = torch.utils.data.ConcatDataset([
    COCODataset(train_dir_v1, train_annotations_v1, transforms=get_transform()),
    COCODataset(train_dir_v2, train_annotations_v2, transforms=get_transform())
])

valid_dataset = torch.utils.data.ConcatDataset([
    COCODataset(valid_dir_v1, valid_annotations_v1, transforms=get_transform()),
    COCODataset(valid_dir_v2, valid_annotations_v2, transforms=get_transform())
])

test_dataset = torch.utils.data.ConcatDataset([
    COCODataset(test_dir_v1, test_annotations_v1, transforms=get_transform()),
    COCODataset(test_dir_v2, test_annotations_v2, transforms=get_transform())
])

train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, collate_fn=lambda x: tuple(zip(*x)))
valid_loader = DataLoader(valid_dataset, batch_size=batch_size, shuffle=False, collate_fn=lambda x: tuple(zip(*x)))
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False, collate_fn=lambda x: tuple(zip(*x)))

loading annotations into memory...
Done (t=0.07s)
creating index...
index created!
loading annotations into memory...
Done (t=0.02s)
creating index...
index created!
loading annotations into memory...
Done (t=0.02s)
creating index...
index created!
loading annotations into memory...
Done (t=0.02s)
creating index...
index created!
loading annotations into memory...
Done (t=0.01s)
creating index...
index created!
loading annotations into memory...
Done (t=0.02s)
creating index...
index created!


In [2]:
import torch
import torchvision
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
from torchvision.models.detection.faster_rcnn import GeneralizedRCNNTransform

# Number of classes: 1 background + 4 bed sore grades
NUM_CLASSES = 5  # Including the background class

# Pre-trained Faster R-CNN with ResNet50 backbone
def get_faster_rcnn_model():
    # Load pre-trained Faster R-CNN model with a ResNet50 backbone
    model = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained=True)
    
    # Get the input features from the pre-trained model's classifier
    in_features = model.roi_heads.box_predictor.cls_score.in_features
    
    # Replace the classifier with a new one that outputs NUM_CLASSES (4 classes + 1 background)
    model.roi_heads.box_predictor = FastRCNNPredictor(in_features, NUM_CLASSES)
    
    # Modify the image transform for your custom dataset (e.g., using 640x640 images)
    model.transform = GeneralizedRCNNTransform(min_size=640, max_size=640, image_mean=[0.485, 0.456, 0.406], image_std=[0.229, 0.224, 0.225])
    
    return model


# Set custom directory for model downloads
os.environ["TORCH_HOME"] = "D:/Practice Housepital Back/model/pretrain"

# Create the model
model = get_faster_rcnn_model()

# Move model to the appropriate device (GPU or CPU)
device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
model.to(device)



FasterRCNN(
  (transform): GeneralizedRCNNTransform(
      Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
      Resize(min_size=(640,), max_size=640, mode='bilinear')
  )
  (backbone): BackboneWithFPN(
    (body): IntermediateLayerGetter(
      (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
      (bn1): FrozenBatchNorm2d(64, eps=0.0)
      (relu): ReLU(inplace=True)
      (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
      (layer1): Sequential(
        (0): Bottleneck(
          (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn1): FrozenBatchNorm2d(64, eps=0.0)
          (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
          (bn2): FrozenBatchNorm2d(64, eps=0.0)
          (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn3): FrozenBatchNorm2d(256, eps=0.0)
          (relu): ReLU(i

In [None]:
import torch
import torch.optim as optim
import torchvision
from torch.utils.data import DataLoader
from tqdm import tqdm
import time

# Define the training loop
def train(model, train_loader, valid_loader, num_epochs=10, lr=1e-4, device=None):
    model.to(device)
    
    # Define optimizer (Adam for Faster R-CNN)
    optimizer = optim.Adam(model.parameters(), lr=lr, weight_decay=0.0001)
    
    # Define the learning rate scheduler (optional)
    lr_scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=3, gamma=0.1)
    
    # Set model to training mode
    model.train()
    
    # Train the model
    for epoch in range(num_epochs):
        print(f"Epoch {epoch + 1}/{num_epochs}")
        start_time = time.time()
        
        # Training phase
        model.train()
        train_loss = 0
        count = 0
        for images, targets in tqdm(train_loader, desc="Training"):
            images = [image.to(device) for image in images]
            targets = [{k: v.to(device) for k, v in t.items()} for t in targets]

            # Zero gradients
            optimizer.zero_grad()

            # Forward pass
            loss_dict = model(images, targets)
            
            # Sum up the losses
            losses = sum(loss for loss in loss_dict.values())
            
            # Backward pass and optimization
            losses.backward()
            optimizer.step()

            # Track training loss
            train_loss += losses.item()

            # count += 1
            # if count == 2:
            #     break

        # Average training loss
        train_loss /= len(train_loader)
        
        # Validation phase
        valid_loss = 0
        model.eval()  # Set model to evaluation mode
        count = 0
        with torch.no_grad():
            valid_loss = 0
            for images, targets in tqdm(valid_loader, desc="Validating"):
                images = [image.to(device) for image in images]
                targets = [{k: v.to(device) for k, v in t.items()} for t in targets]

                # Forward pass
                loss_dict = model(images, targets)

                # Process losses if it's a dictionary
                try:
                    losses = sum(loss.item() for loss in loss_dict.values())
                    valid_loss += losses
                except:
                    pass

                # count += 1
                # if count == 2:
                #     break

        # Average validation loss
        valid_loss /= len(valid_loader)

        # Print epoch statistics
        print(f"Epoch {epoch + 1} finished in {time.time() - start_time:.2f} seconds")
        print(f"Training Loss: {train_loss:.4f}, Validation Loss: {valid_loss:.4f}")

        # Step the learning rate scheduler
        lr_scheduler.step()

    print("Training complete!")

# Train the model
train(model, train_loader, valid_loader, num_epochs=10, lr=1e-4, device=device)

In [92]:
import torch
import torch.optim as optim
from torchvision.models.detection import fasterrcnn_resnet50_fpn
from torch.optim.lr_scheduler import StepLR

# Load the model
model = get_faster_rcnn_model()
model.to(device)

# Optimizer
params = [p for p in model.parameters() if p.requires_grad]
# optimizer = optim.SGD(params, lr=0.005, momentum=0.9, weight_decay=0.0005)
optimizer = optim.Adam(model.parameters(), lr=1e-4, weight_decay=0.0001)

# Scheduler
lr_scheduler = StepLR(optimizer, step_size=3, gamma=0.1)

# Training loop with validation
num_epochs = 10
for epoch in range(num_epochs):
    # Training Phase
    model.train()  # Set the model to training mode
    running_loss = 0.0

    for images, targets in train_loader:
        images = [image.to(device) for image in images]
        targets = [{k: v.to(device) for k, v in t.items()} for t in targets]
        
        # Zero gradients
        optimizer.zero_grad()
        
        # Forward pass
        loss_dict = model(images, targets)
        
        # Total loss
        losses = sum(loss for loss in loss_dict.values())
        
        # Backward pass
        losses.backward()
        
        # Update weights
        optimizer.step()
        
        # Keep track of the loss
        running_loss += losses.item()
        break
        

    # Print average loss for the epoch
    print(f"Epoch [{epoch+1}/{num_epochs}], Training Loss: {running_loss/len(train_loader)}")
    
    # Step the learning rate scheduler
    lr_scheduler.step()

    # Save the model after each epoch
    # torch.save(model.state_dict(), f"fasterrcnn_epoch_{epoch+1}.pth")

    # Validation Phase
    model.eval()  # Set the model to evaluation mode
    with torch.no_grad():  # No gradients needed for inference
        validation_loss = 0.0
        for images, targets in valid_loader:
            images = [image.to(device) for image in images]
            targets = [{k: v.to(device) for k, v in t.items()} for t in targets]

            # Forward pass
            loss_dict = model(images, targets)
            
            # Total loss for validation
            try:
                losses = sum(loss for loss in loss_dict.values())
                validation_loss += losses.item()
            except:
                pass

            break

        # Print validation loss
        print(f"Epoch [{epoch+1}/{num_epochs}], Validation Loss: {validation_loss/len(valid_loader)}")
    break

    # Optionally, print the validation performance (e.g., mAP or IoU)

Epoch [1/10], Training Loss: 0.0006994074583053589
Epoch [1/10], Validation Loss: 0.0


In [32]:
# Save the model

torch.save(model.state_dict(), "model/v2.pth")
print("Model Saved Successfully")

Model Saved Successfully


In [5]:
# Load the model

# Create the model
model = get_faster_rcnn_model()

# Move model to the appropriate device (GPU or CPU)
model.load_state_dict(torch.load("model/latest_model.pth"))
model.to("cpu")
print("Model Loaded Successfully")

  model.load_state_dict(torch.load("model/latest_model.pth"))


Model Loaded Successfully


In [6]:
import torch
from tqdm import tqdm
import numpy as np
import torch


def coco_to_xyxy(box):
    """Convert COCO bbox format [x, y, w, h] to [x_min, y_min, x_max, y_max]."""
    x_min, y_min, width, height = box
    x_max = x_min + width
    y_max = y_min + height
    return [x_min, y_min, x_max, y_max]

def calculate_iou(box1, box2):
    """Calculate Intersection over Union (IoU) for two boxes in [x_min, y_min, x_max, y_max] format."""
    x1 = max(box1[0], box2[0])
    y1 = max(box1[1], box2[1])
    x2 = min(box1[2], box2[2])
    y2 = min(box1[3], box2[3])
    intersection = max(0, x2 - x1) * max(0, y2 - y1)
    
    box1_area = (box1[2] - box1[0]) * (box1[3] - box1[1])
    box2_area = (box2[2] - box2[0]) * (box2[3] - box2[1])
    union = box1_area + box2_area - intersection
    
    return intersection / union if union > 0 else 0

def test_model_acc(model, test_loader, device, iou_threshold=0.5):
    """Evaluate model accuracy with a focus on category ID and IoU alignment."""
    model.to(device)
    model.eval()
    
    correct_detections = 0  # Both IoU and category match
    correct_categories = 0  # Category match regardless of IoU
    total_targets = 0       # Total ground truth objects
    total_predictions = 0   # Total predicted objects
    
    with torch.no_grad():
        for images, targets in tqdm(test_loader, desc="Testing"):
            images = [img.to(device) for img in images]
            outputs = model(images)
            
            for i, output in enumerate(outputs):
                pred_boxes = output["boxes"].cpu().numpy()
                pred_labels = output["labels"].cpu().numpy()
                pred_scores = output["scores"].cpu().numpy()
                
                # Convert true boxes to [x_min, y_min, x_max, y_max]
                true_boxes = np.array([coco_to_xyxy(box) for box in targets[i]["boxes"].cpu().numpy()])
                true_labels = targets[i]["labels"].cpu().numpy()
                
                total_targets += len(true_boxes)
                total_predictions += len(pred_boxes)
                
                # Match true boxes with predictions
                for true_box, true_label in zip(true_boxes, true_labels):
                    match_found = False
                    category_correct = False
                    for pred_box, pred_label, pred_score in zip(pred_boxes, pred_labels, pred_scores):
                        iou = calculate_iou(true_box, pred_box)
                        if pred_label == true_label:
                            category_correct = True
                        if iou >= iou_threshold and pred_label == true_label:
                            correct_detections += 1
                            match_found = True
                            break
                    
                    if category_correct:
                        correct_categories += 1  # Category is correct even if IoU is low
                    
                    if not match_found:
                        pass  # No IoU and category match for this true object
    
    # Metrics
    precision = correct_detections / total_predictions if total_predictions > 0 else 0
    recall = correct_detections / total_targets if total_targets > 0 else 0
    classification_accuracy = correct_categories / total_targets if total_targets > 0 else 0
    iou_accuracy = correct_detections / total_targets if total_targets > 0 else 0
    
    print(f"Precision: {precision:.4f}, Recall: {recall:.4f}")
    print(f"Classification Accuracy: {classification_accuracy:.4f}, IoU Accuracy: {iou_accuracy:.4f}")
    
    return precision, recall, classification_accuracy, iou_accuracy


test_model_acc(model, test_loader, device)

Testing:   0%|          | 0/206 [00:00<?, ?it/s]

Testing: 100%|██████████| 206/206 [00:57<00:00,  3.59it/s]

Precision: 0.4471, Recall: 0.9854
Classification Accuracy: 0.9854, IoU Accuracy: 0.9854





(0.44713656387665196,
 0.9854368932038835,
 0.9854368932038835,
 0.9854368932038835)

In [None]:
from tqdm import tqdm

def test_model(model, test_loader, device=None):
    model.to(device)
    model.eval()  # Set the model to evaluation mode
    
    predictions = []
    with torch.no_grad():
        for images, targets in tqdm(test_loader, desc="Testing"):
            # Move images to the device
            images = [image.to(device) for image in images]
            
            # Get predictions from the model
            outputs = model(images)
            
            # Process outputs
            for i, output in enumerate(outputs):
                pred_boxes = output['boxes'].cpu().numpy()
                pred_labels = output['labels'].cpu().numpy()
                pred_scores = output['scores'].cpu().numpy()
                
                # Optionally compare with ground truth targets
                true_boxes = targets[i]['boxes'].cpu().numpy()
                true_labels = targets[i]['labels'].cpu().numpy()
                
                # Store predictions and ground truth
                predictions.append({
                    "image_id": targets[i]["image_id"].item(),
                    "pred_boxes": pred_boxes,
                    "pred_labels": pred_labels,
                    "pred_scores": pred_scores,
                    "true_boxes": true_boxes,
                    "true_labels": true_labels
                })
    
    return predictions


test_model(model, test_loader, device)

In [None]:
import json

train_annotations_v1 = "dataset/pressureV1/train/_annotations.coco.json"
with open(train_annotations_v1, "r") as file:
    data = json.load(file)

print(data.keys())
print(data["categories"])
for val in data.keys():
    if val != "categories":
        try:
            print(val, "=", data[val][0])
        except:
            pass

dict_keys(['info', 'licenses', 'categories', 'images', 'annotations'])
[{'id': 0, 'name': 'stage1-stage2-stage3-stage4', 'supercategory': 'none'}, {'id': 1, 'name': 'stage1', 'supercategory': 'stage1-stage2-stage3-stage4'}, {'id': 2, 'name': 'stage2', 'supercategory': 'stage1-stage2-stage3-stage4'}, {'id': 3, 'name': 'stage3', 'supercategory': 'stage1-stage2-stage3-stage4'}, {'id': 4, 'name': 'stage4', 'supercategory': 'stage1-stage2-stage3-stage4'}]
licenses = {'id': 1, 'url': 'https://creativecommons.org/licenses/by/4.0/', 'name': 'CC BY 4.0'}
images = {'id': 0, 'license': 1, 'file_name': 'undermining0003_jpg.rf.025bab6b80a9c1c801fcba72a2332fb1.jpg', 'height': 640, 'width': 640, 'date_captured': '2024-12-15T07:55:13+00:00'}
annotations = {'id': 0, 'image_id': 0, 'category_id': 3, 'bbox': [3, 3, 637, 637], 'area': 405769, 'segmentation': [], 'iscrowd': 0}


In [63]:
import json

test_annotations_v1 = "dataset/pressureV1/test/_annotations.coco.json"
with open(test_annotations_v1, "r") as file:
    data = json.load(file)

print(data.keys())
print(data["categories"])
for val in data.keys():
    if val != "categories":
        try:
            print(val, "=", data[val][0])
        except:
            pass

dict_keys(['info', 'licenses', 'categories', 'images', 'annotations'])
[{'id': 0, 'name': 'stage1-stage2-stage3-stage4', 'supercategory': 'none'}, {'id': 1, 'name': 'stage1', 'supercategory': 'stage1-stage2-stage3-stage4'}, {'id': 2, 'name': 'stage2', 'supercategory': 'stage1-stage2-stage3-stage4'}, {'id': 3, 'name': 'stage3', 'supercategory': 'stage1-stage2-stage3-stage4'}, {'id': 4, 'name': 'stage4', 'supercategory': 'stage1-stage2-stage3-stage4'}]
licenses = {'id': 1, 'url': 'https://creativecommons.org/licenses/by/4.0/', 'name': 'CC BY 4.0'}
images = {'id': 0, 'license': 1, 'file_name': 'undermining0003_jpg.rf.025bab6b80a9c1c801fcba72a2332fb1.jpg', 'height': 640, 'width': 640, 'date_captured': '2024-12-15T07:55:13+00:00'}
annotations = {'id': 0, 'image_id': 0, 'category_id': 3, 'bbox': [3, 3, 637, 637], 'area': 405769, 'segmentation': [], 'iscrowd': 0}


In [9]:
from sklearn.metrics import classification_report
from tqdm import tqdm
import numpy as np

def coco_to_xyxy(box):
    """Convert COCO bbox format [x, y, w, h] to [x_min, y_min, x_max, y_max]."""
    x_min, y_min, width, height = box
    x_max = x_min + width
    y_max = y_min + height
    return [x_min, y_min, x_max, y_max]

def calculate_iou(box1, box2):
    """Calculate Intersection over Union (IoU) for two boxes in [x_min, y_min, x_max, y_max] format."""
    x1 = max(box1[0], box2[0])
    y1 = max(box1[1], box2[1])
    x2 = min(box1[2], box2[2])
    y2 = min(box1[3], box2[3])
    intersection = max(0, x2 - x1) * max(0, y2 - y1)
    
    box1_area = (box1[2] - box1[0]) * (box1[3] - box1[1])
    box2_area = (box2[2] - box2[0]) * (box2[3] - box2[1])
    union = box1_area + box2_area - intersection
    
    return intersection / union if union > 0 else 0

def test_model_acc(model, test_loader, device, iou_threshold=0.5):
    """Evaluate model accuracy and generate classification report."""
    model.to(device)
    model.eval()
    
    true_labels_all = []
    pred_labels_all = []
    
    correct_detections = 0  # Both IoU and category match
    total_targets = 0       # Total ground truth objects
    total_predictions = 0   # Total predicted objects
    
    with torch.no_grad():
        for images, targets in tqdm(test_loader, desc="Testing"):
            images = [img.to(device) for img in images]
            outputs = model(images)
            
            for i, output in enumerate(outputs):
                pred_boxes = output["boxes"].cpu().numpy()
                pred_labels = output["labels"].cpu().numpy()
                pred_scores = output["scores"].cpu().numpy()
                
                # Convert true boxes to [x_min, y_min, x_max, y_max]
                true_boxes = np.array([coco_to_xyxy(box) for box in targets[i]["boxes"].cpu().numpy()])
                true_labels = targets[i]["labels"].cpu().numpy()
                
                total_targets += len(true_boxes)
                total_predictions += len(pred_boxes)
                
                # Collect true and predicted labels
                for true_box, true_label in zip(true_boxes, true_labels):
                    match_found = False
                    for pred_box, pred_label, pred_score in zip(pred_boxes, pred_labels, pred_scores):
                        iou = calculate_iou(true_box, pred_box)
                        if iou >= iou_threshold:
                            pred_labels_all.append(pred_label)
                            true_labels_all.append(true_label)
                            if pred_label == true_label:
                                correct_detections += 1
                            match_found = True
                            break
                    
                    if not match_found:
                        true_labels_all.append(true_label)
                        pred_labels_all.append(-1)  # Indicate no prediction

    # Metrics
    precision = correct_detections / total_predictions if total_predictions > 0 else 0
    recall = correct_detections / total_targets if total_targets > 0 else 0
    
    # Generate classification report
    report = classification_report(true_labels_all, pred_labels_all, zero_division=0)
    print(report)
    print(f"Precision: {precision:.4f}, Recall: {recall:.4f}")
    
    return precision, recall, report

test_model_acc(model, test_loader, device)

Testing:   0%|          | 0/206 [00:00<?, ?it/s]

Testing: 100%|██████████| 206/206 [00:58<00:00,  3.49it/s]

              precision    recall  f1-score   support

           1       0.95      0.84      0.89        90
           2       0.72      0.75      0.74       122
           3       0.60      0.55      0.57       110
           4       0.75      0.87      0.80        90

    accuracy                           0.74       412
   macro avg       0.75      0.75      0.75       412
weighted avg       0.74      0.74      0.74       412

Precision: 0.3370, Recall: 0.7427





(0.3370044052863436,
 0.7427184466019418,
 '              precision    recall  f1-score   support\n\n           1       0.95      0.84      0.89        90\n           2       0.72      0.75      0.74       122\n           3       0.60      0.55      0.57       110\n           4       0.75      0.87      0.80        90\n\n    accuracy                           0.74       412\n   macro avg       0.75      0.75      0.75       412\nweighted avg       0.74      0.74      0.74       412\n')

In [4]:
from tqdm import tqdm

def test_model(model, test_loader, device=None):
    model.to(device)
    model.eval()  # Set the model to evaluation mode
    
    predictions = []
    with torch.no_grad():
        for images, targets in tqdm(test_loader, desc="Testing"):
            # Move images to the device
            images = [image.to(device) for image in images]
            
            # Get predictions from the model
            outputs = model(images)
            
            # Process outputs
            for i, output in enumerate(outputs):
                pred_boxes = output['boxes'].cpu().numpy()
                pred_labels = output['labels'].cpu().numpy()
                pred_scores = output['scores'].cpu().numpy()
                
                # Optionally compare with ground truth targets
                true_boxes = targets[i]['boxes'].cpu().numpy()
                true_labels = targets[i]['labels'].cpu().numpy()
                
                # Store predictions and ground truth
                predictions.append({
                    "image_id": targets[i]["image_id"].item(),
                    "pred_boxes": pred_boxes,
                    "pred_labels": pred_labels,
                    "pred_scores": pred_scores,
                    "true_boxes": true_boxes,
                    "true_labels": true_labels
                })
    
    return predictions


test_model(model, test_loader, "cpu")

Testing: 100%|██████████| 206/206 [01:54<00:00,  1.79it/s]


[{'image_id': 0,
  'pred_boxes': array([[4.8718872e+00, 4.2875061e+00, 6.4000000e+02, 6.3991846e+02],
         [6.4107361e+00, 1.3876343e-01, 6.4000000e+02, 6.3850623e+02]],
        dtype=float32),
  'pred_labels': array([3, 2], dtype=int64),
  'pred_scores': array([0.85856915, 0.34615943], dtype=float32),
  'true_boxes': array([[  3.,   3., 640., 640.]], dtype=float32),
  'true_labels': array([3], dtype=int64)},
 {'image_id': 1,
  'pred_boxes': array([[3.8528442e-01, 0.0000000e+00, 6.4000000e+02, 6.3902246e+02],
         [1.1218262e-01, 6.0907288e+00, 6.4000000e+02, 6.3937598e+02]],
        dtype=float32),
  'pred_labels': array([4, 3], dtype=int64),
  'pred_scores': array([0.86041284, 0.3759606 ], dtype=float32),
  'true_boxes': array([[  5.,   0., 640., 640.]], dtype=float32),
  'true_labels': array([3], dtype=int64)},
 {'image_id': 2,
  'pred_boxes': array([[  1.3620911,   1.6354065, 639.78467  , 638.0299   ],
         [  4.9110107,   2.1372986, 638.3259   , 639.21204  ]],
        

In [27]:
import torch
from torchvision.transforms import functional as F
import torchvision.transforms as T
from PIL import Image, ImageOps
import io

def test_model(model, image, device=None):
    model.to(device)
    model.eval()  # Set the model to evaluation mode
    
    with torch.no_grad():
        # Move image to the device
        image = [image.to(device)]
        
        # Get predictions from the model
        outputs = model(image)
        
        # Process outputs
        for output in outputs:
            pred_boxes = output['boxes'].cpu().numpy()
            pred_labels = output['labels'].cpu().numpy()
            pred_scores = output['scores'].cpu().numpy()
            
            # Store predictions and ground truth
            predictions = {
                "pred_boxes": pred_boxes,
                "pred_labels": pred_labels,
                "pred_scores": pred_scores,
            }

    return predictions

def get_transform(image):
    transform = T.Compose([
        T.ToTensor(),
        T.Resize((640, 640)),  # Stretch to 640x640
        T.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),  # Standard normalization
    ])
    return transform(image)

def auto_orient(image):
    # Auto-orient the image based on EXIF data
    try:
        image = Image.open(image)
        image = ImageOps.exif_transpose(image)
    except Exception as e:
        print(f"Error in auto-orienting image: {e}")
    return image

path = "dataset/pressureV1/test/20019_jpg.rf.73b61f3a6e7da15ed828604408a1b8eb.jpg"
img = auto_orient(path)
img = get_transform(img)
# print(img)
pred = test_model(model, img, "cpu")
pred

{'pred_boxes': array([[2.1481934e+00, 3.8293457e-01, 6.3898096e+02, 6.3982013e+02]],
       dtype=float32),
 'pred_labels': array([1], dtype=int64),
 'pred_scores': array([0.99406034], dtype=float32)}