In [1]:
import torch
import torchvision
from torchvision.models.detection import fasterrcnn_resnet50_fpn_v2, FasterRCNN_ResNet50_FPN_V2_Weights
from torchvision.transforms import functional as F
from torchvision.transforms import transforms as T
from torchvision.ops import box_iou
from torch.utils.data import DataLoader, Dataset
import os
import numpy as np
from PIL import Image
import xml.etree.ElementTree as ET
from torch.cuda.amp import GradScaler, autocast
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

In [2]:
class ShipDataset(Dataset):
    def __init__(self, root, transforms=None):
        self.root = root
        self.transforms = transforms
        self.imgs = sorted([img for img in os.listdir(os.path.join(root, "images")) if os.path.isfile(os.path.join(root, "annotations", img.replace('.bmp', '.xml')))])
        self.annotations = sorted([ann for ann in os.listdir(os.path.join(root, "annotations")) if os.path.isfile(os.path.join(root, "images", ann.replace('.xml', '.bmp')))])

        assert len(self.imgs) == len(self.annotations), "Mismatch between number of images and annotations"

        self.wordname_50 = ['Other Ship', 'Other Warship', 'Submarine', 'Other Aircraft Carrier', 'Enterprise', 
                            'Nimitz', 'Midway', 'Ticonderoga', 'Other Destroyer', 'Atago DD', 'Arleigh Burke DD', 
                            'Hatsuyuki DD', 'Hyuga DD', 'Asagiri DD', 'Other Frigate', 'Perry FF', 'Patrol', 
                            'Other Landing', 'YuTing LL', 'YuDeng LL', 'YuDao LL', 'YuZhao LL', 'Austin LL', 
                            'Osumi LL', 'Wasp LL', 'LSD 41 LL', 'LHA LL', 'Commander', 'Other Auxiliary Ship', 
                            'Medical Ship', 'Test Ship', 'Training Ship', 'AOE', 'Masyuu AS', 'Sanantonio AS', 'EPF', 
                            'Other Merchant', 'Container Ship', 'RoRo', 'Cargo', 'Barge', 'Tugboat', 'Ferry', 'Yacht', 
                            'Sailboat', 'Fishing Vessel', 'Oil Tanker', 'Hovercraft', 'Motorboat', 'Dock']

    def __getitem__(self, idx):
        img_path = os.path.join(self.root, "images", self.imgs[idx])
        ann_path = os.path.join(self.root, "annotations", self.annotations[idx])
        
        img = Image.open(img_path).convert("RGB")
        tree = ET.parse(ann_path)
        root = tree.getroot()

        boxes = []
        labels = []
        for obj in root.findall("object"):
            bndbox = obj.find("bndbox")
            xmin = int(bndbox.find("xmin").text)
            ymin = int(bndbox.find("ymin").text)
            xmax = int(bndbox.find("xmax").text)
            ymax = int(bndbox.find("ymax").text)
            boxes.append([xmin, ymin, xmax, ymax])

            label_name = obj.find("name").text
            label = self.wordname_50.index(label_name) + 1  # +1 to make background class 0
            labels.append(label)

        boxes = torch.as_tensor(boxes, dtype=torch.float32)
        labels = torch.as_tensor(labels, dtype=torch.int64)

        target = {}
        target["boxes"] = boxes
        target["labels"] = labels

        if self.transforms is not None:
            img = self.transforms(img)

        return img, target

    def __len__(self):
        return len(self.imgs)

def get_transform(train):
    transforms = [T.ToTensor()]
    if train:
        transforms.append(T.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]))
    return T.Compose(transforms)

In [3]:
def clear_memory():
    torch.cuda.empty_cache()
    print("Cleared CUDA memory cache")

def calculate_metrics(pred_boxes, pred_labels, pred_scores, true_boxes, true_labels, iou_threshold=0.5):
    true_positives = 0
    false_positives = 0
    false_negatives = 0

    if len(pred_boxes) == 0 and len(true_boxes) == 0:
        return 1, 1, 1  # precision, recall, F1 score
    if len(pred_boxes) == 0:
        return 0, 0, 0
    if len(true_boxes) == 0:
        return 0, 0, 0

    ious = box_iou(torch.tensor(pred_boxes), torch.tensor(true_boxes))
    for i, pred in enumerate(pred_boxes):
        if pred_labels[i] in true_labels:
            max_iou = torch.max(ious[i]).item()
            if max_iou > iou_threshold:
                true_positives += 1
            else:
                false_positives += 1
        else:
            false_positives += 1

    for j, true in enumerate(true_boxes):
        if true_labels[j] in pred_labels:
            max_iou = torch.max(ious[:, j]).item()
            if max_iou <= iou_threshold:
                false_negatives += 1
        else:
            false_negatives += 1

    precision = true_positives / (true_positives + false_positives)
    recall = true_positives / (true_positives + false_negatives)
    if precision + recall == 0:
        f1_score = 0
    else:
        f1_score = 2 * (precision * recall) / (precision + recall)

    return precision, recall, f1_score

def train_model(num_epochs, data_loader, model, optimizer, device):
    model.to(device)
    scaler = GradScaler()
    for epoch in range(num_epochs):
        model.train()
        epoch_loss = 0
        epoch_precision = 0
        epoch_recall = 0
        epoch_f1 = 0

        for images, targets in data_loader:
            images = list(image.to(device) for image in images)
            targets = [{k: v.to(device) for k, v in t.items()} for t in targets]

            optimizer.zero_grad()
            
            with autocast():
                loss_dict = model(images, targets)
                losses = sum(loss for loss in loss_dict.values())

            scaler.scale(losses).backward()
            scaler.step(optimizer)
            scaler.update()

            epoch_loss += losses.item()

            # Calculate metrics
            with torch.no_grad():
                model.eval()
                outputs = model(images)
                for i, output in enumerate(outputs):
                    pred_boxes = output['boxes'].cpu().numpy()
                    pred_labels = output['labels'].cpu().numpy()
                    pred_scores = output['scores'].cpu().numpy()
                    
                    true_boxes = targets[i]['boxes'].cpu().numpy()
                    true_labels = targets[i]['labels'].cpu().numpy()
                    
                    precision, recall, f1_score = calculate_metrics(pred_boxes, pred_labels, pred_scores, true_boxes, true_labels)
                    
                    epoch_precision += precision
                    epoch_recall += recall
                    epoch_f1 += f1_score

        epoch_loss /= len(data_loader)
        epoch_precision /= len(data_loader)
        epoch_recall /= len(data_loader)
        epoch_f1 /= len(data_loader)

        print(f"Epoch: {epoch+1}, Loss: {epoch_loss:.4f}, Precision: {epoch_precision:.4f}, Recall: {epoch_recall:.4f}, F1 Score: {epoch_f1:.4f}")

if __name__ == "__main__":
    clear_memory()  # Clear memory before starting the training

    dataset = ShipDataset(root="ShipRSImageNet_V1/Dataset/", transforms=get_transform(train=True))
    data_loader = DataLoader(dataset, batch_size=4, shuffle=True, num_workers=4, collate_fn=lambda x: tuple(zip(*x)))

    device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')

    weights = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained=True)
    num_classes = 51  # 50 ship classes + background
    in_features = weights.roi_heads.box_predictor.cls_score.in_features
    weights.roi_heads.box_predictor = torchvision.models.detection.faster_rcnn.FastRCNNPredictor(in_features, num_classes)
    
    model = weights.to(device)

    params = [p for p in model.parameters() if p.requires_grad]
    optimizer = torch.optim.SGD(params, lr=0.005, momentum=0.9, weight_decay=0.0005)

    num_epochs = 10
    train_model(num_epochs, data_loader, model, optimizer, device)

    clear_memory()  # Clear memory after training

Cleared CUDA memory cache




AttributeError: 'list' object has no attribute 'values'

In [None]:
from torchvision.models.detection import ssd300_vgg16, SSD300_VGG16_Weights

In [None]:
def clear_memory():
    torch.cuda.empty_cache()
    print("Cleared CUDA memory cache")

def train_model(num_epochs, data_loader, model, optimizer, device):
    model.to(device)
    scaler = GradScaler()
    for epoch in range(num_epochs):
        model.train()
        epoch_loss = 0
        
        for images, targets in data_loader:
            images = list(image.to(device) for image in images)
            targets = [{k: v.to(device) for k, v in t.items()} for t in targets]

            optimizer.zero_grad()
            
            with autocast():
                loss_dict = model(images, targets)
                losses = sum(loss for loss in loss_dict.values())

            scaler.scale(losses).backward()
            scaler.step(optimizer)
            scaler.update()

            epoch_loss += losses.item()

        print(f"Epoch: {epoch+1}, Loss: {epoch_loss/len(data_loader)}")

if __name__ == "__main__":
    clear_memory()  # Clear memory before starting the training

    dataset = ShipDataset(root="ShipRSImageNet_V1/Dataset/", transforms=get_transform(train=True))
    data_loader = DataLoader(dataset, batch_size=4, shuffle=True, num_workers=4, collate_fn=lambda x: tuple(zip(*x)))

    device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')

    weights = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained=True)
    num_classes = 51  # 50 ship classes + background
    in_features = weights.roi_heads.box_predictor.cls_score.in_features
    weights.roi_heads.box_predictor = torchvision.models.detection.faster_rcnn.FastRCNNPredictor(in_features, num_classes)
    
    model = weights.to(device)

    params = [p for p in model.parameters() if p.requires_grad]
    optimizer = torch.optim.SGD(params, lr=0.005, momentum=0.9, weight_decay=0.0005)

    num_epochs = 10
    train_model(num_epochs, data_loader, model, optimizer, device)

    clear_memory()  # Clear memory after training

In [None]:
torch.cuda.empty_cache()

In [None]:
device

In [None]:
torch.save(model.state_dict(), "./checkpoint/ship_detect_rsnn_10epochs_8batch.h5")


In [None]:
dataset = ShipDataset(root="ShipRSImageNet_V1/Dataset/", transforms=get_transform(train=True))
data_loader = DataLoader(dataset, batch_size=2, shuffle=True, num_workers=4, collate_fn=lambda x: tuple(zip(*x)))

device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')

weights = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained=True)
num_classes = 51  # 50 ship classes + background
in_features = weights.roi_heads.box_predictor.cls_score.in_features
weights.roi_heads.box_predictor = torchvision.models.detection.faster_rcnn.FastRCNNPredictor(in_features, num_classes)
    
model = weights.to(device)
model.load_state_dict(torch.load("./checkpoint/ship_detect_rsnn_15epochs.h5"))
model.eval()

In [None]:
import torch
from torchvision.transforms import functional as F
from PIL import Image
import matplotlib.pyplot as plt
import matplotlib.patches as patches

def predict_image(model, image_path, device, confidence_threshold=0.5):
    # Load and preprocess the image
    img = Image.open(image_path).convert("RGB")
    img_tensor = F.to_tensor(img).unsqueeze(0).to(device)

    # Run inference
    model.eval()
    with torch.no_grad():
        predictions = model(img_tensor)

    # Process the predictions
    pred_boxes = predictions[0]['boxes'].cpu().numpy()
    pred_labels = predictions[0]['labels'].cpu().numpy()
    pred_scores = predictions[0]['scores'].cpu().numpy()

    # Filter out low-confidence detections
    high_conf_indices = pred_scores > confidence_threshold
    pred_boxes = pred_boxes[high_conf_indices]
    pred_labels = pred_labels[high_conf_indices]
    pred_scores = pred_scores[high_conf_indices]

    return pred_boxes, pred_labels, pred_scores

def display_predictions(image_path, boxes, labels, scores, class_names):
    img = Image.open(image_path).convert("RGB")
    plt.figure(figsize=(12, 8))
    plt.imshow(img)
    ax = plt.gca()

    for box, label, score in zip(boxes, labels, scores):
        xmin, ymin, xmax, ymax = box
        width, height = xmax - xmin, ymax - ymin
        edgecolor = 'r'
        ax.add_patch(patches.Rectangle((xmin, ymin), width, height, linewidth=2, edgecolor=edgecolor, facecolor='none'))
        ax.text(xmin, ymin, f"{class_names[label-1]}: {score:.2f}", bbox=dict(facecolor='yellow', alpha=0.5))

    plt.axis('off')
    plt.show()

# Example usage
image_path = "test/DIOR/JPEGImages-test_has_ships/12908.jpg"
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')

# Assuming `model`, `device`, and `dataset.wordname_50` are already defined
model.to(device)
pred_boxes, pred_labels, pred_scores = predict_image(model, image_path, device, confidence_threshold=0.3)

img = Image.open(image_path).convert("RGB")
plt.figure(figsize=(12, 8))
plt.imshow(img)

if len(pred_boxes) > 0:
    display_predictions(image_path, pred_boxes, pred_labels, pred_scores, dataset.wordname_50)
else:
    print("No objects detected with the specified confidence threshold.")


In [None]:
import json
import pandas as pd 
import numpy as np

with open('./test/shipsnet.json') as data_file:
    dataset = json.load(data_file)
Shipsnet= pd.DataFrame(dataset)
print(Shipsnet.head())
print('')    
x = np.array(dataset['data']).astype('uint8')
y = np.array(dataset['labels']).astype('uint8')
def describeData(a,b):
    print('Total number of images: {}'.format(len(a)))
    print('Number of NoShip Images: {}'.format(np.sum(b==0)))
    print('Number of Ship Images: {}'.format(np.sum(b==1)))
    print('Percentage of positive images: {:.2f}%'.format(100*np.mean(b)))
    print('Image shape (Width, Height, Channels): {}'.format(a[0].shape))
describeData(x,y)