# SAM2 Finetuning Training Data Prep

### This notebook walks through the process of converting a folder of images and the corresponding label-studio ellipse annotations (.json) into the format required by the SAM2 Training framework

In [None]:
import os
import json
import cv2
import numpy as np
import matplotlib.pyplot as plt


### Extract image name and create an image containing all labelled masks

In [None]:
data_dir = "../data/Train20"
img_dir = os.path.join(data_dir, "Raw")

labels = "annotations.json"

def construct_masks_from_json(path):
    """
    Construct masks from the annotations in the JSON file.
    """
    data = []
    with open(path, 'r') as f:
        labels = json.load(f)
        for label in labels:
            img_name = "-".join(label["file_upload"].split("-")[1:])
            ellipses = label["annotations"]
            canvas = np.zeros((ellipses[0]["result"][0]["original_height"], ellipses[0]["result"][0]["original_width"]), dtype=np.uint8) 
            for ellipse in ellipses[0]["result"]:
                draw_mask(canvas, ellipse["original_height"], ellipse["original_width"],
                        ellipse["value"], ellipse["image_rotation"])
            data.append({
                "image": img_name,
                "annotation": canvas
            }) 
    return data
                
def draw_mask(canvas, height, width, ellipse, image_rotation):
    x = ellipse["x"] / 100 * width
    y = ellipse["y"] / 100 * height
    radius_x = ellipse["radiusX"] / 100 * width
    radius_y = ellipse["radiusY"] / 100 * height
    rotation = ellipse["rotation"]
    
    total_rotation = rotation + image_rotation

    center = (int(x), int(y))
    axes = (int(radius_x), int(radius_y))

    cv2.ellipse(canvas, center, axes, total_rotation, 0, 360, 255, -1)

    return canvas

### Pair up images and masks and create dir structure for SAM2 Finetuning

In [None]:
def remove_commas_and_decimals(path, name):
    files = os.listdir(path)
    for file in files:
        if file.endswith(".jpg"):
            new_name = file.replace(",", "")
            new_name = new_name.rstrip(".jpg").replace(".", "") + ".jpg"
            if new_name == name:
                return file

def create_sam2_training_data(path, data):
    ann_path = os.path.join(path, "Annotations", "placeholder-vid") 
    img_path = os.path.join(path, "JPEGImages", "placeholder-vid")
    raw_path = os.path.join(path, "Raw")
    os.makedirs(ann_path, exist_ok=True)
    os.makedirs(img_path, exist_ok=True)
    
    counter = 0
    for item in data:
        file = remove_commas_and_decimals(raw_path, item["image"])
        image = cv2.imread(os.path.join(raw_path, file))
        name = f"{'0'*(2-len(str(counter)))}{counter}.jpg"
        
        cv2.imwrite(os.path.join(ann_path, name), item["annotation"])
        cv2.imwrite(os.path.join(img_path, name), image)
        
        counter += 1
    

In [None]:
data = construct_masks_from_json(os.path.join(data_dir, labels))
for elem in data:
    if elem["image"] == "320-151-1x-3-5.4.jpg":
        elem["image"] = "320-151-1x-3-54.jpg"
create_sam2_training_data(data_dir, data)

### Check that masks were created correctly

In [None]:
image = cv2.imread(os.path.join(data_dir, "JPEGImages/placeholder-vid", "05.jpg"))	
masks = data[5]["annotation"]

# Create a color mask from the grayscale mask
color_mask = cv2.merge([masks, masks, masks])  # Create a 3-channel mask

# Define transparency factor (adjust as needed)
alpha = 0.4

# Add the colored mask to the original image
result_image = cv2.addWeighted(color_mask, alpha, image, 1 - alpha, 0)

# Display the result
cv2.imshow('Original Image with Masks', result_image)
cv2.waitKey(0)
cv2.destroyAllWindows()

# Experiment with new Data Structure

In [None]:
import os
import shutil

def organize_files_into_folders(path):
    # Ensure the provided path is valid
    if not os.path.isdir(path):
        print(f"Error: {path} is not a valid directory.")
        return

    for entry in os.listdir(path):
        full_path = os.path.join(path, entry)

        # Skip if it's a directory
        if os.path.isdir(full_path):
            continue

        # Create a directory with the same name as the file (excluding extension)
        filename_without_ext = os.path.splitext(entry)[0]
        new_dir_path = os.path.join(path, filename_without_ext)
        os.makedirs(new_dir_path, exist_ok=True)

        # Move the file into the new directory and rename it to "00.jpg"
        new_file_path = os.path.join(new_dir_path, "00.jpg")
        shutil.move(full_path, new_file_path)

        print(f"Moved '{entry}' to '{new_file_path}'")

# Example usage
organize_files_into_folders("C:/Users/Micha/Desktop/BachelorProject/AI-Powered-Biosensing/data/Train20/JPEGImages/placeholder-vid")

Moved '00.jpg' to 'C:/Users/Micha/Desktop/BachelorProject/AI-Powered-Biosensing/data/Train20/JPEGImages/placeholder-vid\00\00.jpg'
Moved '01.jpg' to 'C:/Users/Micha/Desktop/BachelorProject/AI-Powered-Biosensing/data/Train20/JPEGImages/placeholder-vid\01\00.jpg'
Moved '02.jpg' to 'C:/Users/Micha/Desktop/BachelorProject/AI-Powered-Biosensing/data/Train20/JPEGImages/placeholder-vid\02\00.jpg'
Moved '03.jpg' to 'C:/Users/Micha/Desktop/BachelorProject/AI-Powered-Biosensing/data/Train20/JPEGImages/placeholder-vid\03\00.jpg'
Moved '04.jpg' to 'C:/Users/Micha/Desktop/BachelorProject/AI-Powered-Biosensing/data/Train20/JPEGImages/placeholder-vid\04\00.jpg'
Moved '05.jpg' to 'C:/Users/Micha/Desktop/BachelorProject/AI-Powered-Biosensing/data/Train20/JPEGImages/placeholder-vid\05\00.jpg'
Moved '06.jpg' to 'C:/Users/Micha/Desktop/BachelorProject/AI-Powered-Biosensing/data/Train20/JPEGImages/placeholder-vid\06\00.jpg'
Moved '07.jpg' to 'C:/Users/Micha/Desktop/BachelorProject/AI-Powered-Biosensing/dat

## Finetuning

In [None]:
!cd ../sam2/training && python train.py -c configs/sam2.1_training/train_b+_base.yaml --use-cluster 0 --num-nodes 1

###################### Train App Config ####################
scratch:
  resolution: 1024
  train_batch_size: 1
  num_train_workers: 1
  num_frames: 1
  max_num_objects: 3
  base_lr: 5.0e-06
  vision_lr: 3.0e-06
  phases_per_epoch: 1
  num_epochs: 50
dataset:
  img_folder: C:/Users/Micha/Desktop/BachelorProject/AI-Powered-Biosensing/data/Train20/JPEGImages
  gt_folder: C:/Users/Micha/Desktop/BachelorProject/AI-Powered-Biosensing/data/Train20/Annotations
  file_list_txt: null
  multiplier: 2
vos:
  train_transforms:
  - _target_: training.dataset.transforms.ComposeAPI
    transforms:
    - _target_: training.dataset.transforms.RandomHorizontalFlip
      consistent_transform: true
    - _target_: training.dataset.transforms.RandomAffine
      degrees: 25
      shear: 20
      image_interpolation: bilinear
      consistent_transform: true
    - _target_: training.dataset.transforms.RandomResizeAPI
      sizes: ${scratch.resolution}
      square: true
      consistent_transform: true
    - 

grad.sizes() = [64, 256, 1, 1], strides() = [256, 1, 256, 256]
bucket_view.sizes() = [64, 256, 1, 1], strides() = [256, 1, 1, 1] (Triggered internally at C:\actions-runner\_work\pytorch\pytorch\pytorch\torch\csrc\distributed\c10d\reducer.cpp:342.)
  return Variable._execution_engine.run_backward(  # Calls into the C++ engine to run the backward pass


In [None]:
%tensorboard --logdir ../sam2/training/logs --bind_all