Create a Dataset for @HOME2024 using GroundingDino and SAM

In [31]:
import os
import random
import json
import numpy as np
import cv2
from PIL import Image, ImageDraw, ImageEnhance, ImageFilter, ImageFont
from pycocotools import mask
import json
import yaml
import csv
import torch
import matplotlib.pyplot as plt
from pathlib import Path
import ultralytics
import time
import imutils
import argparse

#grounding imports----------------

import groundingdino.datasets.transforms as T
from groundingdino.models import build_model
from groundingdino.util import box_ops
from groundingdino.util.slconfig import SLConfig
from groundingdino.util.utils import clean_state_dict, get_phrases_from_posmap
from groundingdino.util.vl_utils import create_positive_map_from_span


  from .autonotebook import tqdm as notebook_tqdm


In [2]:
#resize images in a folder to a specific size

pathtofiles = "/home/jabv/Desktop/prueba/imgs/"
pathtoimage = [f.path for f in os.scandir(pathtofiles) if f.is_dir()]
size = 720

#pathtoimage = os.listdir(pathtofiles)

for filepath in pathtoimage:
    folder = filepath + "/"
    for filename in os.listdir(folder):
        img = Image.open(folder + filename)
        img = img.resize((size, size))
        img.save(folder + filename)

Auto label con Segment anyting y modelo de YOLOv8

In [3]:
pathtofiles = "/home/jabv/Desktop/prueba/imgs/" #path to images to process
resultspath = "/home/jabv/Desktop/prueba/prueba_precut" #path to save results all ready processed and segmented images
if not os.path.exists(resultspath):
    os.makedirs(resultspath)

def load_image(image_path):

    image_pil = Image.open(image_path).convert("RGB")  # load image

    transform = T.Compose(
        [
            T.RandomResize([800], max_size=1333),
            T.ToTensor(),
            T.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]),
        ]
    )
    image, _ = transform(image_pil, None)  # 3, h, w
    return image_pil, image


def load_model(model_config_path, model_checkpoint_path, cpu_only=False):
    args = SLConfig.fromfile(model_config_path)
    args.device = "cuda" if not cpu_only else "cpu"
    model = build_model(args)
    checkpoint = torch.load(model_checkpoint_path, map_location="cpu")
    load_res = model.load_state_dict(clean_state_dict(checkpoint["model"]), strict=False)
    print(load_res)
    _ = model.eval()
    return model


def get_grounding_output(model, image, caption, box_threshold, text_threshold=None, with_logits=True, cpu_only=False, token_spans=None):
    assert text_threshold is not None or token_spans is not None, "text_threshould and token_spans should not be None at the same time!"
    caption = caption.lower()
    caption = caption.strip()
    if not caption.endswith("."):
        caption = caption + "."
    device = "cuda" if not cpu_only else "cpu"
    model = model.to(device)
    image = image.to(device)
    with torch.no_grad():
        print("Running model...")
        outputs = model(image[None], captions=[caption])
    logits = outputs["pred_logits"].sigmoid()[0]  # (nq, 256)
    boxes = outputs["pred_boxes"][0]  # (nq, 4)

    # filter output
    if token_spans is None:
        logits_filt = logits.cpu().clone()
        boxes_filt = boxes.cpu().clone()
        filt_mask = logits_filt.max(dim=1)[0] > box_threshold
        logits_filt = logits_filt[filt_mask]  # num_filt, 256
        boxes_filt = boxes_filt[filt_mask]  # num_filt, 4

        # get phrase
        tokenlizer = model.tokenizer
        tokenized = tokenlizer(caption)
        # build pred
        pred_phrases = []
        for logit, box in zip(logits_filt, boxes_filt):
            pred_phrase = get_phrases_from_posmap(logit > text_threshold, tokenized, tokenlizer)
            if with_logits:
                pred_phrases.append(pred_phrase + f"({str(logit.max().item())[:4]})")
            else:
                pred_phrases.append(pred_phrase)
    else:
        # given-phrase mode
        positive_maps = create_positive_map_from_span(
            model.tokenizer(text_prompt),
            token_span=token_spans
        ).to(image.device) # n_phrase, 256

        logits_for_phrases = positive_maps @ logits.T # n_phrase, nq
        all_logits = []
        all_phrases = []
        all_boxes = []
        for (token_span, logit_phr) in zip(token_spans, logits_for_phrases):
            # get phrase
            phrase = ' '.join([caption[_s:_e] for (_s, _e) in token_span])
            # get mask
            filt_mask = logit_phr > box_threshold
            # filt box
            all_boxes.append(boxes[filt_mask])
            # filt logits
            all_logits.append(logit_phr[filt_mask])
            if with_logits:
                logit_phr_num = logit_phr[filt_mask]
                all_phrases.extend([phrase + f"({str(logit.item())[:4]})" for logit in logit_phr_num])
            else:
                all_phrases.extend([phrase for _ in range(len(filt_mask))])
        boxes_filt = torch.cat(all_boxes, dim=0).cpu()
        pred_phrases = all_phrases


    return boxes_filt, pred_phrases

# cfg
config_file = "/home/jabv/Desktop/home-vision/dataset_generator/groundingdino/config/GroundingDINO_SwinT_OGC.py"  # change the path of the model config file

#wget -q https://github.com/IDEA-Research/GroundingDINO/releases/download/v0.1.0-alpha/groundingdino_swint_ogc.pth
checkpoint_path = "/home/jabv/Desktop/home-vision/dataset_generator/groundingdino_swint_ogc.pth"  # change the path of the model
text_prompt = "bannana"
output_dir = resultspath
box_threshold = 0.3
text_threshold = 0.25
token_spans = None



import sys
sys.path.append("..")
from segment_anything import sam_model_registry, SamPredictor
sam_model = "h"

#use sam model
#wget -q https://dl.fbaipublicfiles.com/segment_anything/sam_vit_h_4b8939.pth
#wget -q https://dl.fbaipublicfiles.com/segment_anything/sam_vit_l_0b3195.pth
if sam_model =="h":
  sam_checkpoint = "sam_vit_h_4b8939.pth"
  model_type = "vit_h"
else:
  sam_checkpoint = "sam_vit_l_0b3195.pth"
  model_type = "vit_l"

device = "cuda"
sam = sam_model_registry[model_type](checkpoint=sam_checkpoint)
sam.to(device=device)
predictor = SamPredictor(sam)

images=[]
annotations=[]
categories=[]

img_id=0
anno_id=0

#check if results directory exists, else create it
if not os.path.exists(resultspath):
  os.makedirs(resultspath)

#make a list of all the directories in the path
pathtoimage = [f.path for f in os.scandir(pathtofiles) if f.is_dir()]

#pathtoimage = os.listdir(pathtofiles)

for filepath in pathtoimage:
    imgPaths = os.listdir(filepath)
    print(imgPaths)

    i=0

    for imgPath in imgPaths:
        print(f"Processing image: {imgPath}")
        img = imutils.resize(cv2.imread(f"{filepath}/{imgPath}"))
        if img is None:
            continue

    #------------------------start grounding----------------------------------------------
        #image_path = args.image_path

        # load image
        image_pil, image = load_image(f"{filepath}/{imgPath}")

        # load model
        model = load_model(config_file, checkpoint_path, cpu_only=False)

        # set the text_threshold to None if token_spans is set.
        if token_spans is not None:
            text_threshold = None
            print("Using token_spans. Set the text_threshold to None.")

        # run model
        boxes_filt, pred_phrases = get_grounding_output(
            model, image, text_prompt, box_threshold, text_threshold, cpu_only=False, token_spans=eval(f"{token_spans}")
        )

        #found bb dimensions

        size = image_pil.size
        pred_dict = {
            "boxes": boxes_filt,
            "size": [size[1], size[0]],  # H,W
            "labels": pred_phrases,
        }

        H, W = pred_dict["size"]
        boxes = pred_dict["boxes"]
        labels = pred_dict["labels"]
        assert len(boxes) == len(labels), "boxes and labels must have same length"

        draw = ImageDraw.Draw(image_pil)
        mask = Image.new("L", image_pil.size, 0)
        mask_draw = ImageDraw.Draw(mask)

        #change pil image to cv2 image
        img = cv2.cvtColor(np.array(image_pil), cv2.COLOR_RGB2BGR)
        img2 = img.copy()
        # draw boxes and masks
        for box, label in zip(boxes, labels):
            # from 0..1 to 0..W, 0..H
            box = box * torch.Tensor([W, H, W, H])
            # from xywh to xyxy
            box[:2] -= box[2:] / 2
            box[2:] += box[:2]
            # random color
            color = tuple(np.random.randint(0, 255, size=1).tolist())
            # draw
            padding = 10
            x0, y0, x1, y1 = box
            x0, y0, x1, y1 = int(x0)-padding, int(y0)-padding, int(x1)+padding, int(y1)+padding

            #validate if the bounding box is inside the image
            if x0 < 0:
                x0 = 0
            if y0 < 0:
                y0 = 0
            if x1 > W:
                x1 = W
            if y1 > H:
                y1 = H
                
            #draw rectangles
            cv2.rectangle(img2, (x0, y0), (x1, y1), color, 2)

            draw.rectangle([x0, y0, x1, y1], outline=color, width=6)
            # draw.text((x0, y0), str(label), fill=color)

            font = ImageFont.load_default()
            if hasattr(font, "getbbox"):
                bbox = draw.textbbox((x0, y0), str(label), font)
            else:
                w, h = draw.textsize(str(label), font)
                bbox = (x0, y0, w + x0, y0 + h)
            # bbox = draw.textbbox((x0, y0), str(label))
            draw.rectangle(bbox, fill=color)
            draw.text((x0, y0), str(label), fill="white")

            mask_draw.rectangle([x0, y0, x1, y1], fill=255, width=6)
        
    # ----------------End grounding ---------------------------------------------------------   
        
    # ----------------Start SAM--------------------------------------------------------------  
            
            class_name = filepath.split("/")[-1]
            #print x0, y0, x1, y1
            print(f"Bounding box: {x0}, {y0}, {x1}, {y1}")

            sam_bounding_box = np.array([x0, y0, x1, y1])
            ran_sam = False
            #run sam
            if ran_sam == False:
                predictor.set_image(img)
                ran_sam = True

            mask, _, _ = predictor.predict(
                point_coords=None,
                point_labels=None,
                box=sam_bounding_box,
                multimask_output=False,
            )

            mask, _, _ = predictor.predict(box=sam_bounding_box, multimask_output=False)

            #Make png mask
            contours, _ = cv2.findContours(mask[0].astype(np.uint8), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE) # Your call to find the contours

            # threshold input image using otsu thresholding as mask and refine with morphology
            ret, pngmask = cv2.threshold(mask[0].astype(np.uint8), 0, 255, cv2.THRESH_BINARY+cv2.THRESH_OTSU) 
            kernel = np.ones((9,9), np.uint8)
            pngmask = cv2.morphologyEx(pngmask, cv2.MORPH_CLOSE, kernel)
            pngmask = cv2.morphologyEx(pngmask, cv2.MORPH_OPEN, kernel)
            result = img.copy()
            result = cv2.cvtColor(result, cv2.COLOR_BGR2BGRA)
            result[:, :, 3] = pngmask                           

    # ----------------End SAM-----------------------------------------------------------------  
            #cv2.imwrite(f"{resultspath}/groundingcv2_{imgPath}", img2)

            #image_pil.save(f"{resultspath}/grounding_{imgPath}")

            if not os.path.exists(f"{resultspath}/{class_name}"):
                os.mkdir(f"{resultspath}/{class_name}")

            file_path = f"{resultspath}/{class_name}/{imgPath[:-4]}.png"
            if os.path.exists(file_path):
                if os.path.exists(f"{resultspath}/{class_name}/{imgPath[:-4]}_1.png"):
                    print("File already exists, saving with _2")
                    cv2.imwrite(f"{resultspath}/{class_name}/{imgPath[:-4]}_2.png", result)
                else:
                    print("File already exists, saving with _1")
                    file_path = f"{resultspath}/{class_name}/{imgPath[:-4]}_1.png"

            cv2.imwrite(file_path, result)
            i=i+1
            ran_sam = False

['manzana1.jpeg', 'manzana2.jpeg']
Processing image: manzana1.jpeg


  return _VF.meshgrid(tensors, **kwargs)  # type: ignore[attr-defined]


final text_encoder_type: bert-base-uncased
_IncompatibleKeys(missing_keys=[], unexpected_keys=['label_enc.weight', 'bert.embeddings.position_ids'])
Running model...




Bounding box: 269, 319, 342, 417
Bounding box: 338, 224, 405, 309
File already exists, saving with _1
Bounding box: 364, 319, 438, 418
File already exists, saving with _2
Processing image: manzana2.jpeg
final text_encoder_type: bert-base-uncased
_IncompatibleKeys(missing_keys=[], unexpected_keys=['label_enc.weight', 'bert.embeddings.position_ids'])
Running model...




Bounding box: 371, 345, 448, 449
Bounding box: 264, 407, 347, 502
File already exists, saving with _1
Bounding box: 292, 290, 367, 380
File already exists, saving with _2
['platano1.jpeg', 'platano2.jpeg', 'platano3.jpeg']
Processing image: platano1.jpeg
final text_encoder_type: bert-base-uncased
_IncompatibleKeys(missing_keys=[], unexpected_keys=['label_enc.weight', 'bert.embeddings.position_ids'])
Running model...
Bounding box: 267, 325, 349, 520
Bounding box: 315, 282, 440, 421
File already exists, saving with _1
Processing image: platano2.jpeg
final text_encoder_type: bert-base-uncased
_IncompatibleKeys(missing_keys=[], unexpected_keys=['label_enc.weight', 'bert.embeddings.position_ids'])
Running model...
Bounding box: 235, 308, 413, 621
Bounding box: 392, 225, 503, 521
File already exists, saving with _1
Processing image: platano3.jpeg
final text_encoder_type: bert-base-uncased
_IncompatibleKeys(missing_keys=[], unexpected_keys=['label_enc.weight', 'bert.embeddings.position_ids'])

In [2]:
from PIL import Image
import os

datasetpath = "/home/jabv/Desktop/prueba/prueba_precut/"
resultspath = "/home/jabv/Desktop/prueba/prueba_fit/"

#create a result folder if it doesn't exist
if not os.path.exists(resultspath):
    os.makedirs(resultspath)
    os.makedirs(resultspath+"gatos/")
    os.makedirs(resultspath+"jarra/")
    os.makedirs(resultspath+"lata/")
    os.makedirs(resultspath+"manzana/")
    os.makedirs(resultspath+"platano/")
    os.makedirs(resultspath+"silla/")


fg_folders = [
    ("gatos/"),
    ("jarra/"),
    ("lata/"),
    ("manzana/"),
    ("platano/"),
    ("silla/")
]

for folder in fg_folders:
    for filename in os.listdir(f"{datasetpath}{folder}"):
        try:
            print(f"{filename} started")
            myImage = Image.open(datasetpath+folder+filename)
            black = Image.new('RGBA', myImage.size)
            myImage = Image.composite(myImage, black, myImage)
            #print("aqui")
            myCroppedImage = myImage.crop(myImage.getbbox())
            myCroppedImage.save(f"{resultspath}{folder}{filename}")
            print(f"{filename} done")
        except:
            print(f"{filename} failed")
            continue
print("all done")

gatos..png started
gatos..png done
gatos._2.png started
gatos._2.png done
gatos._1.png started
gatos._1.png done
jarra3..png started
jarra3..png done
jarra2..png started
jarra2..png done
jarra1..png started
jarra1..png done
lata.png started
lata.png done
manzana1..png started
manzana1..png done
manzana2._1.png started
manzana2._1.png done
manzana1._2.png started
manzana1._2.png done
manzana2._2.png started
manzana2._2.png done
manzana1._1.png started
manzana1._1.png done
manzana2..png started
manzana2..png done
platano3..png started
platano3..png done
platano3._1.png started
platano3._1.png done
platano1._1.png started
platano1._1.png done
platano2..png started
platano2..png done
platano2._1.png started
platano2._1.png done
platano1..png started
platano1..png done
silla.png started
silla.png done
silla_1.png started
silla_1.png done
silla_2.png started
silla_2.png done
all done


In [3]:
# Define the paths to the three folders containing the images
datasetPath = "/home/jabv/Desktop/prueba/prueba_fit"
fg_folders = [
    (f"{datasetPath}/gatos/","gatos" ),
    (f"{datasetPath}/jarra/","jarra" ),
    (f"{datasetPath}/lata/","lata" ),
    (f"{datasetPath}/manzana/","manzana" ),
    (f"{datasetPath}/platano/","platano" ),
    (f"{datasetPath}/silla/","silla" )

]
bg_folder = "/home/jabv/Desktop/prueba/bg/"
output_folder = "/home/jabv/Desktop/prueba/prueba_final/"

In [4]:
objects_list = ["gatos", "jarra", "lata", "manzana", "platano", "silla"]
annotations_ID = {}
categories = []
for i, object in enumerate(objects_list):
    annotations_ID[object] = i
    categories.append({"id": i, "name": object})

print(annotations_ID)
print(categories)

{'gatos': 0, 'jarra': 1, 'lata': 2, 'manzana': 3, 'platano': 4, 'silla': 5}
[{'id': 0, 'name': 'gatos'}, {'id': 1, 'name': 'jarra'}, {'id': 2, 'name': 'lata'}, {'id': 3, 'name': 'manzana'}, {'id': 4, 'name': 'platano'}, {'id': 5, 'name': 'silla'}]


In [5]:
# Load the list of files in each of the three folders
fg_files = {}
for folder, category in fg_folders:
    fg_files[category] = os.listdir(folder)

In [41]:
if not os.path.exists(output_folder):
    os.mkdir(output_folder)
trainfolder = output_folder + "train/"
testfolder = output_folder + "test/"
validfolder = output_folder + "valid/"
os.mkdir(trainfolder)
os.mkdir(testfolder)
os.mkdir(validfolder)
os.mkdir(trainfolder + "images/")
os.mkdir(trainfolder + "labels/")
os.mkdir(testfolder + "images/")
os.mkdir(testfolder + "labels/")
os.mkdir(validfolder + "images/")
os.mkdir(validfolder + "labels/")

## Auto Label

In [42]:
import os
import random
import numpy as np
from PIL import Image, ImageEnhance, ImageFilter
import yaml

images = []
annotations = []
annotations2 = []
annot_csv = []

img_id = int(0)
anno_id = int(0)

rescaling_min = 0.60
rescaling_max = 0.70

for j in range(200):
    # Crear archivo de etiquetas vacío
    with open(f'{trainfolder}labels/{img_id}.txt', 'w') as file:
        pass
    
    # Seleccionar aleatoriamente cuántos objetos habrá en la imagen
    num_objects = random.randint(1, 5)  # Asegúrate de tener al menos un objeto
    
    # Seleccionar imágenes de primer plano aleatoriamente
    fg_categories = random.choices(objects_list, k=num_objects)
    
    fg_files_selected = []
    for category in fg_categories:
        fg_files_selected.append([category, random.choice(fg_files[category])])
    
    # Cargar las imágenes de primer plano seleccionadas
    fg_imgs = []
    for img in fg_files_selected:
        folder = [f[0] for f in fg_folders if f[1] == img[0]][0]
        fg_imgs.append([img[0], Image.open(folder + img[1]), folder + img[1]])
    
    # Cargar la imagen de fondo
    bg_files = os.listdir(bg_folder)
    bg_file = random.choice(bg_files)
    bg_img = Image.open(bg_folder + bg_file)

    occupied = []  # Lista para almacenar posiciones ocupadas

    for img in fg_imgs:
        fg_img = img[1]

        # Redimensionar y rotar la imagen de primer plano
        angle = random.randint(-5, 5)
        scale = random.uniform(rescaling_min, rescaling_max)
        fg_img = fg_img.rotate(angle, resample=Image.BICUBIC, expand=True)
        fg_img = fg_img.resize((int(fg_img.width * scale), int(fg_img.height * scale)))
        
        # Aplicar mejoras a la imagen
        fg_img = ImageEnhance.Brightness(fg_img).enhance(random.uniform(0.7, 1.3))
        fg_img = ImageEnhance.Contrast(fg_img).enhance(random.uniform(0.9, 1.1))
        fg_img = ImageEnhance.Color(fg_img).enhance(random.uniform(0.7, 1.3))
        fg_img = fg_img.filter(ImageFilter.GaussianBlur(radius=random.uniform(0.0, 0.5)))
        
        # Comprobar si el objeto cabe en la imagen de fondo
        if fg_img.width > bg_img.width or fg_img.height > bg_img.height:
            print(f"Skipping {img[0]} due to size constraints")
            continue  # Si la imagen de primer plano es más grande, saltar

        max_x = bg_img.width - fg_img.width
        max_y = bg_img.height - fg_img.height

        # Intentar colocar el objeto sin superposición
        for attempt in range(10):
            x = random.randint(0, max_x)
            y = random.randint(0, max_y)

            # Comprobar superposición
            overlap = False
            for occupied_rect in occupied:
                if (x < occupied_rect[0] + occupied_rect[2] and
                    x + fg_img.width > occupied_rect[0] and
                    y < occupied_rect[1] + occupied_rect[3] and
                    y + fg_img.height > occupied_rect[1]):
                    overlap = True
                    break
            
            if not overlap:
                occupied.append([x, y, fg_img.width, fg_img.height])  # Guardar posición ocupada
                break
        else:
            continue  # Si no se encuentra un espacio, continuar

        # Pegar la imagen de primer plano en el fondo
        bg_img.paste(fg_img, (x, y), fg_img)

        # Guardar anotaciones
        x_center_ann = (x + fg_img.width / 2) / bg_img.width
        y_center_ann = (y + fg_img.height / 2) / bg_img.height
        width_ann = fg_img.width / bg_img.width
        height_ann = fg_img.height / bg_img.height
        
        with open(f'{trainfolder}labels/{img_id}.txt', 'a') as f:
            f.write(f"{annotations_ID[img[0]]} {x_center_ann} {y_center_ann} {width_ann} {height_ann}\n")

        annotations2.append({
            "id": anno_id,
            "image_id": img_id,
            "category_id": annotations_ID[img[0]],
            "bbox": [x, y, fg_img.width, fg_img.height],
            "segmentation": [],
            "area": fg_img.height * fg_img.width,
            "iscrowd": 0
        })
        annotations.append({
            "id": anno_id,
            "image_id": img_id,
            "category_id": annotations_ID[img[0]],
            "bbox": [x, y, fg_img.width, fg_img.height],
            "segmentation": [],
            "area": fg_img.height * fg_img.width,
            "iscrowd": 0
        })
        annot_csv.append(["TRAIN", output_folder + str(img_id) + ".jpg", img[0], x / bg_img.width, y / bg_img.height, "", "", (x + fg_img.width) / bg_img.width, (y + fg_img.height) / bg_img.height])
        anno_id += 1

    # Guardar la imagen de fondo con los objetos pegados
    bg_img.save(f"{trainfolder}images/" + str(img_id) + ".jpg", quality=100)
    images.append({
        "id": img_id,
        "file_name": str(img_id) + ".jpg",
        "height": bg_img.height,
        "width": bg_img.width
    })
    img_id += 1

# Crear data.yaml
data = dict(
    train=f"{trainfolder}images",
    val=f"{validfolder}images",
    test=f"{validfolder}images",
    nc=len(annotations_ID),
    names=list(annotations_ID.keys())
)

# Almacenar
with open(f'{output_folder}data.yaml', 'w') as outfile:
    yaml.dump(data, outfile, default_flow_style=False)


Skipping lata due to size constraints
Skipping lata due to size constraints
Skipping platano due to size constraints
Skipping silla due to size constraints
Skipping gatos due to size constraints
Skipping silla due to size constraints
Skipping lata due to size constraints
Skipping gatos due to size constraints
Skipping gatos due to size constraints
Skipping platano due to size constraints
Skipping gatos due to size constraints
Skipping lata due to size constraints
Skipping silla due to size constraints
Skipping lata due to size constraints
Skipping lata due to size constraints
Skipping gatos due to size constraints
Skipping silla due to size constraints
Skipping gatos due to size constraints
Skipping lata due to size constraints
Skipping lata due to size constraints
Skipping lata due to size constraints
Skipping gatos due to size constraints
Skipping platano due to size constraints
Skipping lata due to size constraints
Skipping gatos due to size constraints
Skipping silla due to size co

# Segmentation

In [43]:
final_dataset = "/home/jabv/Desktop/prueba/prueba_final/"

import sys
sys.path.append("..")
import torch
from segment_anything import sam_model_registry, SamPredictor
sam_model = "h"

#use sam model
#wget -q https://dl.fbaipublicfiles.com/segment_anything/sam_vit_h_4b8939.pth
#wget -q https://dl.fbaipublicfiles.com/segment_anything/sam_vit_l_0b3195.pth
if sam_model =="h":
  sam_checkpoint = "sam_vit_h_4b8939.pth"
  model_type = "vit_h"
else:
  sam_checkpoint = "sam_vit_l_0b3195.pth"
  model_type = "vit_l"

device = "cuda"
sam = sam_model_registry[model_type](checkpoint=sam_checkpoint)
sam.to(device=device)
predictor = SamPredictor(sam)

images=[]
annotations=[]
categories=[]

img_id=0
anno_id=0

final_images = f"{final_dataset}train/images/"
final_labels = f"{final_dataset}train/labels/"
output_points_path = f"{final_dataset}train/segmentation_points/"

# Crea el directorio de salida si no existe
os.makedirs(output_points_path, exist_ok=True)
torch.cuda.empty_cache()  # Limpia la memoria de la GPU


for imgPath in os.listdir(final_images):
    img_id = imgPath.split("/")[-1].split(".")[0]
    print(f"Processing image: {imgPath}")
    
    image = cv2.imread(os.path.join(final_images, imgPath))
    height, width = image.shape[:2]

    # Cuenta las líneas en el archivo de etiquetas
    with open(f"{final_labels}{img_id}.txt") as f:
        lines = f.readlines()
        num_objects = len(lines)
        print(f"Number of objects: {num_objects}")

    ran_sam = False

    for line in lines:
        values = line.split(" ")
        class_name = values[0]  # Nombre de la clase
        class_index = int(class_name)  # Asegúrate de que sea un número si usas índices

        x_center = float(values[1])
        y_center = float(values[2])
        width_bbox = float(values[3])
        height_bbox = float(values[4])
        
        # Convertir coordenadas del centro a esquinas
        x0 = int((x_center - width_bbox / 2) * width)
        y0 = int((y_center - height_bbox / 2) * height)
        x1 = int((x_center + width_bbox / 2) * width)
        y1 = int((y_center + height_bbox / 2) * height)

        sam_bounding_box = np.array([x0, y0, x1, y1])

        if not ran_sam:
            predictor.set_image(image)
            ran_sam = True

        mask, _, _ = predictor.predict(box=sam_bounding_box, multimask_output=False)

        contours, _ = cv2.findContours(mask[0].astype(np.uint8), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)

        points = []
        for contour in contours:
            for point in contour:
                # Normaliza las coordenadas
                norm_x = point[0][0] / width
                norm_y = point[0][1] / height
                points.append((norm_x, norm_y))

        # Guarda los puntos en el formato solicitado
        point_file_path = f"{output_points_path}{img_id}.txt"
        with open(point_file_path, "a") as point_file:  # Cambiado a "a" para añadir al archivo
            point_file.write(f"{class_index} " + " ".join(f"{p[0]:.3f} {p[1]:.3f}" for p in points) + "\n")

#delete labels folder
import shutil
shutil.rmtree(f"{final_dataset}train/labels/")
#change name of segmentation_points to labels
os.rename(f"{final_dataset}train/segmentation_points/", f"{final_dataset}train/labels/")


Processing image: 196.jpg
Number of objects: 0
Processing image: 195.jpg
Number of objects: 1
Processing image: 125.jpg
Number of objects: 1
Processing image: 15.jpg
Number of objects: 1
Processing image: 182.jpg
Number of objects: 2
Processing image: 86.jpg
Number of objects: 5
Processing image: 29.jpg
Number of objects: 2
Processing image: 175.jpg
Number of objects: 0
Processing image: 186.jpg
Number of objects: 2
Processing image: 158.jpg
Number of objects: 2
Processing image: 129.jpg
Number of objects: 2
Processing image: 14.jpg
Number of objects: 0
Processing image: 17.jpg
Number of objects: 2
Processing image: 157.jpg
Number of objects: 1
Processing image: 9.jpg
Number of objects: 2
Processing image: 118.jpg
Number of objects: 1
Processing image: 57.jpg
Number of objects: 2
Processing image: 96.jpg
Number of objects: 4
Processing image: 45.jpg
Number of objects: 1
Processing image: 27.jpg
Number of objects: 5
Processing image: 33.jpg
Number of objects: 1
Processing image: 69.jpg


SplitTrainValidation

In [45]:
import os
import shutil
import random

validation = 0.1
test = 0.1

# Assumes test has 100% of data
output_folder = "/home/jabv/Desktop/prueba/prueba_final/"
trainfolder = output_folder + "train/"
trainfolderimgs = trainfolder + "images/"
trainfolderlabels = trainfolder + "labels/"
testfolder = output_folder + "test/"
testfolderimgs = testfolder + "images/"
testfolderlabels = testfolder + "labels/"
validfolder = output_folder + "valid/"
validfolderimgs = validfolder + "images/"
validfolderlabels = validfolder + "labels/"

# Obtener la lista de archivos en las carpetas de imágenes y etiquetas
img_files = os.listdir(trainfolderimgs)
label_files = os.listdir(trainfolderlabels)

# Filtrar solo los archivos que existen en ambas carpetas
existing_files = [f for f in img_files if f[:-4] + '.txt' in label_files]

fullSize = len(existing_files)
validSize = int(fullSize * validation)
testSize = int(fullSize * test)

# Mezclar los archivos existentes
random.shuffle(existing_files)

# Mover archivos a la carpeta de validación
for i in range(validSize):
    filetomove = existing_files[i]
    filetomovename = filetomove[:-4]

    # Mover imágenes
    shutil.move(f"{trainfolderimgs}{filetomove}", f"{validfolderimgs}{filetomove}")
    # Mover etiquetas
    shutil.move(f"{trainfolderlabels}{filetomovename}.txt", f"{validfolderlabels}{filetomovename}.txt")

# Mover archivos a la carpeta de test
for i in range(testSize):
    filetomove = existing_files[validSize + i]
    filetomovename = filetomove[:-4]

    # Mover imágenes
    shutil.move(f"{trainfolderimgs}{filetomove}", f"{testfolderimgs}{filetomove}")
    # Mover etiquetas
    shutil.move(f"{trainfolderlabels}{filetomovename}.txt", f"{testfolderlabels}{filetomovename}.txt")

# Imprimir el tamaño de cada carpeta
print(f"Train size is now: {len(os.listdir(trainfolderimgs))}")
print(f"Validation size is now: {len(os.listdir(validfolderimgs))}")
print(f"Test size is now: {len(os.listdir(testfolderimgs))}")


Train size is now: 168
Validation size is now: 16
Test size is now: 16
