In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
import os
import cv2
import numpy as np
from torch.utils.data import Dataset, DataLoader
from ultralytics import YOLO


In [2]:
class AortaDataset(Dataset):
    def __init__(self, image_folder, label_folder, sov_folder, transform=None):
        self.image_folder = image_folder
        self.label_folder = label_folder
        self.sov_folder = sov_folder
        self.image_files = sorted(os.listdir(image_folder))
        self.transform = transform
    
    def __len__(self):
        return len(self.image_files)
    
    def __getitem__(self, idx):
        image_path = os.path.join(self.image_folder, self.image_files[idx])
        label_path = os.path.join(self.label_folder, self.image_files[idx].replace(".jpg", ".txt").replace(".png", ".txt"))
        sov_path = os.path.join(self.sov_folder, self.image_files[idx].replace(".jpg", ".txt").replace(".png", ".txt"))

        # Load image
        image = cv2.imread(image_path)
        if image is None:
            raise ValueError(f"Error loading image: {image_path}")
        if len(image.shape) == 2:  # Convert grayscale images to RGB
            image = cv2.cvtColor(image, cv2.COLOR_GRAY2RGB)
        image = cv2.resize(image, (640, 640))  # Resize for YOLO input
        image = torch.tensor(image, dtype=torch.float32).permute(2, 0, 1) / 255.0  # Normalize

        # Load bounding box labels
        label = torch.zeros(4)  # Default label (x, y, w, h)
        if os.path.exists(label_path):
            with open(label_path, "r") as f:
                parts = f.readline().strip().split()
                if len(parts) == 5:  # Ensure it's a valid bounding box
                    label = torch.tensor(list(map(float, parts[1:])), dtype=torch.float32)

        # Load SOV coordinates
        sov_coords = torch.zeros(4)  # Default (x, y, w, h)
        if os.path.exists(sov_path):
            with open(sov_path, "r") as f:
                parts = f.readline().strip().split()
                if len(parts) == 5:  # Ensure valid file
                    sov_coords = torch.tensor(list(map(float, parts[1:])), dtype=torch.float32)

        return image, sov_coords, label


In [3]:
import torch
import torch.nn as nn
from ultralytics import YOLO

class YOLOWithMLP(nn.Module):
    def __init__(self, mlp_hidden_dim, mlp_output_dim):
        super(YOLOWithMLP, self).__init__()

        # Load YOLOv8n (pretrained) for feature extraction
        self.yolo = YOLO("yolov8n.pt").to("cuda:6")  
        self.yolo.eval()  # Set YOLO to evaluation mode (no gradient updates)

        # Freeze YOLO parameters (so only the MLP is trained)
        for param in self.yolo.model.parameters():
            param.requires_grad = False  

        # Extract feature size dynamically (No dummy input needed at runtime)
        sample_input = torch.randn(1, 3, 640, 640).to("cuda:6")
        with torch.no_grad():
            features = self.yolo.model.model[:10](sample_input)
            feature_dim = features.shape[1]  # Extract number of channels

        print(f"Detected YOLO feature size from 10th layer: {feature_dim}")

        # Define MLP (Trainable part)
        self.mlp = nn.Sequential(
            nn.Linear(feature_dim + 4, mlp_hidden_dim),  # YOLO features + additional 4D input
            nn.ReLU(),
            nn.Linear(mlp_hidden_dim, mlp_output_dim),  # Output (e.g., x, y, w, h)
            nn.ReLU()
        )

    def forward(self, images, additional_features):
        with torch.no_grad():  # YOLO runs inference only (no gradients)
            features = self.yolo.model.model[:10](images)  # Extract 10th layer features
        
        # Apply Global Average Pooling (GAP) to create a fixed-size vector
        yolo_features = features.mean(dim=[2, 3])  

        # Concatenate YOLO features with additional numerical input
        combined_features = torch.cat((yolo_features, additional_features), dim=1)

        # Pass through the MLP (Trainable)
        output = self.mlp(combined_features)
        return output


In [4]:
sample_input = torch.randn(1, 3, 640, 640).to("cuda:6")  # Dummy image input
sample_sov_coords = torch.randn(1, 4).to("cuda:6")  # Dummy 4-coordinates

yolo_model = YOLOWithMLP(256, 4).to("cuda:6")
sample_output = yolo_model(sample_input, sample_sov_coords)


Detected YOLO feature size from 10th layer: 256


In [6]:
import torch.optim as optim
# Create dataset and dataloader
train_dataset = AortaDataset("/mnt/nvme_disk2/User_data/nb57077k/cardiovision/phase2/Dataset/aorta_data/images/train", "/mnt/nvme_disk2/User_data/nb57077k/cardiovision/phase2/Dataset/aorta_data/labels/train", "/mnt/nvme_disk2/User_data/nb57077k/cardiovision/phase2/Dataset/aorta_data/SOV_labels/train")
train_loader = DataLoader(train_dataset, batch_size=8, shuffle=True)
# Define model
mlp_hidden_dim = 128
mlp_output_dim = 4  # Example: bounding box coordinates (x, y, w, h)
model = YOLOWithMLP(mlp_hidden_dim, mlp_output_dim).to("cuda:6")

# Define optimizer and loss function
optimizer = optim.Adam(model.mlp.parameters(), lr=1e-4)  # Only MLP is trainable
criterion = nn.MSELoss()  # Example loss function (modify as needed)

# Example training loop
num_epochs = 100
for epoch in range(num_epochs):
    for images, additional_features, targets in train_loader:  # Replace with real dataloader
        images, additional_features, targets = images.to("cuda:6"), additional_features.to("cuda:6"), targets.to("cuda:6")

        optimizer.zero_grad()  # Reset gradients

        outputs = model(images, additional_features)  # Forward pass
        loss = criterion(outputs, targets)  # Compute loss

        loss.backward()  # Backpropagation
        optimizer.step()  # Update MLP weights

    print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}")


Detected YOLO feature size from 10th layer: 256
Epoch [1/100], Loss: 0.0106
Epoch [2/100], Loss: 0.0068
Epoch [3/100], Loss: 0.0037
Epoch [4/100], Loss: 0.0025
Epoch [5/100], Loss: 0.0020
Epoch [6/100], Loss: 0.0060
Epoch [7/100], Loss: 0.0077
Epoch [8/100], Loss: 0.0106
Epoch [9/100], Loss: 0.0032
Epoch [10/100], Loss: 0.0052
Epoch [11/100], Loss: 0.0032
Epoch [12/100], Loss: 0.0051
Epoch [13/100], Loss: 0.0025
Epoch [14/100], Loss: 0.0032
Epoch [15/100], Loss: 0.0027
Epoch [16/100], Loss: 0.0050
Epoch [17/100], Loss: 0.0026
Epoch [18/100], Loss: 0.0028
Epoch [19/100], Loss: 0.0025
Epoch [20/100], Loss: 0.0017
Epoch [21/100], Loss: 0.0037
Epoch [22/100], Loss: 0.0022
Epoch [23/100], Loss: 0.0029
Epoch [24/100], Loss: 0.0032
Epoch [25/100], Loss: 0.0025
Epoch [26/100], Loss: 0.0058
Epoch [27/100], Loss: 0.0018
Epoch [28/100], Loss: 0.0020
Epoch [29/100], Loss: 0.0029
Epoch [30/100], Loss: 0.0024
Epoch [31/100], Loss: 0.0006
Epoch [32/100], Loss: 0.0010
Epoch [33/100], Loss: 0.0015
Epoc

In [7]:
# Save the trained model after training
torch.save(model.state_dict(), "trained_mlp.pth")
print("✅ Model saved successfully as trained_mlp.pth")


✅ Model saved successfully as trained_mlp.pth


In [9]:
import os
import torch
import cv2
import numpy as np
from tqdm import tqdm
from ultralytics import YOLO

# Define paths
VAL_IMAGES_FOLDER = "/mnt/nvme_disk2/User_data/nb57077k/cardiovision/phase2/Dataset/aorta_data/images/val"  # Folder containing validation images
SOV_LABELS_FOLDER = "/mnt/nvme_disk2/User_data/nb57077k/cardiovision/phase2/Dataset/aorta_data/SOV_labels/val"  # Folder containing corresponding SOV bounding boxes
OUTPUT_TXT_FOLDER = "predicted_labels"  # Where predictions will be saved
os.makedirs(OUTPUT_TXT_FOLDER, exist_ok=True)  # Create folder if it doesn't exist

# Load trained model
mlp_hidden_dim = 128
mlp_output_dim = 4  # Predicting (x, y, w, h)
model = YOLOWithMLP(mlp_hidden_dim, mlp_output_dim).to("cuda:6")
model.load_state_dict(torch.load("trained_mlp.pth"))  # Load trained weights
# model.eval()  # Set to evaluation mode

# Get list of images
image_files = [f for f in os.listdir(VAL_IMAGES_FOLDER) if f.endswith((".jpg", ".png"))]

# Run inference on all images
with torch.no_grad():
    for img_name in tqdm(image_files):
        # Load image
        img_path = os.path.join(VAL_IMAGES_FOLDER, img_name)
        img = cv2.imread(img_path)
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)  # Convert to RGB
        img = cv2.resize(img, (640, 640))  # Resize to match YOLO input size
        img = torch.tensor(img).float().permute(2, 0, 1).unsqueeze(0).to("cuda:6") / 255.0  # Normalize

        # Load corresponding SOV bounding box
        txt_filename = os.path.splitext(img_name)[0] + ".txt"
        sov_txt_path = os.path.join(SOV_LABELS_FOLDER, txt_filename)

        if os.path.exists(sov_txt_path):
            with open(sov_txt_path, "r") as f:
                lines = f.readlines()
                if len(lines) > 0:
                    sov_bbox = list(map(float, lines[0].strip().split()[1:]))  # Skip class label (0)
                else:
                    sov_bbox = [0, 0, 0, 0]  # Default if file empty
        else:
            sov_bbox = [0, 0, 0, 0]  # Default if no SOV bbox found

        # Convert SOV bbox to tensor
        additional_features = torch.tensor([sov_bbox]).to("cuda:6")

        # Run inference
        outputs = model(img, additional_features)
        pred_bbox = outputs[0].cpu().tolist()  # Get predictions (x, y, w, h)

        # Convert bbox format (normalize x, y, w, h between 0 and 1)
        img_width, img_height = 640, 640  # Since all images are resized to 640x640
        x, y, w, h = pred_bbox       

        # Save to corresponding .txt file
        pred_txt_path = os.path.join(OUTPUT_TXT_FOLDER, txt_filename)

        with open(pred_txt_path, "w") as f:
            f.write(f"0 {x:.6f} {y:.6f} {w:.6f} {h:.6f}\n")  # Write in YOLO format

print(f"✅ Predictions saved in {OUTPUT_TXT_FOLDER}")


Detected YOLO feature size from 10th layer: 256


  0%|          | 0/59 [00:00<?, ?it/s]

100%|██████████| 59/59 [00:01<00:00, 46.19it/s]

✅ Predictions saved in predicted_labels



