# Import and Loads

In [1]:
from ultralytics import YOLO
import os 
import sys
sys.path.append("/mnt/RAID/projects/FjordVision")
from models.probability_tree import ProbabilityTree
import torch
from anytree.importer import JsonImporter
from preprocessing.preprocessing import load_ground_truth_mask_xyn, convert_polygon_to_mask, calculate_binary_mask_iou
from utils.metrics import calculate_hierarchical_precision_recall, calculate_weighted_f1_score
torch.cuda.empty_cache()

# Function to divide the data into chunks of size n
def chunks(lst, n):
    """Yield successive n-sized chunks from lst."""
    for i in range(0, len(lst), n):
        yield lst[i:i + n]

# Define the file path
weights_path = '/mnt/RAID/projects/FjordVision/runs/segment/Yolov8n-seg-train/weights/best.pt'

# Load the YOLO model weights
model = YOLO(weights_path)

importer = JsonImporter()
with open('data/ontology.json', 'r') as f:
    root = importer.read(f)

classes_file = '/mnt/RAID/datasets/label-studio/fjord/classes.txt'

species_names = []
with open(classes_file, 'r') as file:
    species_names = [line.strip() for line in file]

genus_names, class_names, binary_names = [], [], []
for node in root.descendants:
    if node.rank == 'genus':
        genus_names.append(node.name)
    elif node.rank == 'class':
        class_names.append(node.name)
    elif node.rank == 'binary':
        binary_names.append(node.name)

# Construct Probability Tree

In [2]:
# Usage example
ontology_path = 'data/ontology.json'  # Update this path as necessary
prob_tree = ProbabilityTree(ontology_path)

# Training Loop

In [3]:
# Define the image folder path
image_folder_path = '/mnt/RAID/datasets/The Fjord Dataset/fjord/images/test/'
frames = os.listdir(image_folder_path)
image_files_full_path = [image_folder_path + f for f in frames]

# Define the label folder path
label_folder_path = '/mnt/RAID/datasets/The Fjord Dataset/fjord/labels/test/'

classes = '/mnt/RAID/datasets/The Fjord Dataset/fjord/classes.txt'

class_index = []
with open(classes, 'r') as file:
    for line_number, line in enumerate(file, start=1):
        class_name = line.strip()
        class_index.append(class_name)

Y = []
Yhat = []
confidences = []
batch_size = 50

# Loop through batches of images
for image_batch in chunks(image_files_full_path, batch_size):

    with torch.no_grad():
        predictions = model(image_batch, stream=True)

    # Loop through the files in the image folder
    for file_name, prediction in zip(image_batch, predictions):
        # Check if the file is an image file
        if file_name.endswith('.jpg') or file_name.endswith('.png'):
            # Construct the corresponding label file name
            shape = prediction.orig_img.shape[:2]
            base_file_name = file_name.split('/')[-1].replace('.jpg', '.txt')
            label_file_path = label_folder_path + base_file_name
        
            # check if predictions are empty
            if len(prediction.boxes.cls) == 0:
                continue

            GT = load_ground_truth_mask_xyn(label_file_path)
            visited = len(GT)*[None]

            for cls, mask, conf in zip(prediction.boxes.cls, prediction.masks.xyn, prediction.boxes.conf):
                confidences.append(conf.item())
                m = convert_polygon_to_mask(mask, shape)
                best_iou = 0

                # calculate iou and find the best mask
                for idx, (gcls, gmsk) in enumerate(GT):
                    g = convert_polygon_to_mask(gmsk, shape)
                    iou = calculate_binary_mask_iou(m, g)

                    if iou > best_iou and iou > 0.5:
                        best_iou = iou
                        best_g = g
                        best_gcls = gcls
                        visited[idx] = True
                        best_idx = idx

                if best_idx is not None and best_iou > 0.5:
                    visited[best_idx] = True

                if best_g is None:
                    Y.append(None)
                    Yhat.append(int(cls.item()))
                else:
                    Y.append(best_gcls)
                    Yhat.append(int(cls.item()))

            for vis in visited:
                if vis is None:
                    Y.append(GT[idx][0])
                    Yhat.append(None)

    # After processing each batch, clear unusPolitisk likheited memory from CUDA
    torch.cuda.empty_cache()






































In [4]:
# Calculate weighted precision, recall, and F1
precision, recall = calculate_hierarchical_precision_recall(Y, Yhat, root, species_names)
weighted_f1_score = calculate_weighted_f1_score(precision, recall)

In [5]:
precision

0.9588657803141181

In [6]:
recall

0.8972809667673716

In [7]:
weighted_f1_score

0.9270517169110761

# Update Predictions with uniform probability tree

In [9]:
from anytree.search import find
from anytree.walker import Walker

def hierarchical_similarity(node1, node2):
    walker = Walker()
    # Assuming node1 and node2 are already the correct nodes from the tree
    upwards, _, down = walker.walk(node1, node2)
    distance = len(upwards) + len(down)

    return 1 / (1 + distance)

def calculate_hierarchical_precision_recall(Y, Yhat, tree, confidences, threshold=0.8):
    weighted_true_positives = 0
    weighted_false_positives = 0
    weighted_false_negatives = 0
    taxonomies = [species_names, genus_names, class_names, binary_names]
    
    for true_label, predicted_label, conf in zip(Y, Yhat, confidences):
        if predicted_label is None:  # Handle negative prediction as complete miss
            weighted_false_negatives += 1
            continue
        if true_label is None:  # Handle missing ground truth as complete miss
            weighted_false_positives += 1
            continue

        current_taxonomy = 0
        node = find(tree.root, lambda node: node.name == taxonomies[current_taxonomy][predicted_label])
        while conf < threshold and current_taxonomy < len(taxonomies) - 1:
            # Move up the taxonomy if the confidence is below the threshold
            if node.parent is not None:
                node = node.parent
                current_taxonomy += 1
                # Attempt to find the new predicted_label index in the parent taxonomy
                try:
                    predicted_label = taxonomies[current_taxonomy].index(node.name)
                except ValueError:
                    # If the node's name is not in the taxonomy, break from the loop
                    break
                conf += tree.sum_siblings_probabilities(node.name)
            else:
                break  # If there's no parent, we're at the root and cannot go up further

        # At this point, node represents the current predicted label node
        node1 = node
        node2 = find(tree.root, lambda node: node.name == taxonomies[0][true_label])
        similarity_weight = hierarchical_similarity(node1, node2)

        if true_label == predicted_label:
            weighted_true_positives += similarity_weight
        else:
            weighted_false_positives += (1 - similarity_weight)  # Penalize based on dissimilarity

    precision = weighted_true_positives / (weighted_true_positives + weighted_false_positives) if (weighted_true_positives + weighted_false_positives) > 0 else 0
    recall = weighted_true_positives / (weighted_true_positives + weighted_false_negatives) if (weighted_true_positives + weighted_false_negatives) > 0 else 0
    
    return precision, recall

def calculate_weighted_f1_score(precision, recall):
    if precision + recall == 0:
        return 0
    return 2 * (precision * recall) / (precision + recall)

# Calculate weighted precision, recall, and F1
precision, recall = calculate_hierarchical_precision_recall(Y, Yhat, prob_tree, confidences, threshold=0.2)
weighted_f1_score = calculate_weighted_f1_score(precision, recall)

In [10]:
precision

0.9590961757352848

In [11]:
recall

0.8988274706867672

In [12]:
weighted_f1_score

0.9279843027366996

# Predict Using Probability Tree

In [None]:
label_folder_path = '/mnt/RAID/datasets/The Fjord Dataset/fjord/labels/train/'
frames = os.listdir(label_folder_path)
label_files_full_path = [label_folder_path + f for f in frames]

class_indexes = []

for label_file in label_files_full_path:
    with open(label_file, 'r') as file:
        lines = file.readlines()
        for line in lines:
            if line.strip():
                class_index = int(line.split()[0])
                class_indexes.append(class_index)

class_index_counts = {}
for class_index in class_indexes:
    if class_index in class_index_counts:
        class_index_counts[class_index] += 1
    else:
        class_index_counts[class_index] = 1

sorted_class_index_counts = dict(sorted(class_index_counts.items()))
sorted_class_index_counts

{0: 1321,
 1: 1399,
 2: 2324,
 3: 1210,
 4: 3098,
 5: 1376,
 6: 1519,
 7: 1081,
 8: 1292,
 9: 2113,
 10: 1202,
 11: 3314,
 12: 4837}