In [None]:
#Import the values
from lang_sam import LangSAM
import numpy as np
from PIL import Image, ImageDraw, ImageFont
import cv2

In [None]:
model = LangSAM()

In [None]:
# Allow image upload here and save it as a file i.e original_image = st.file_uploader("Drop the image", type=['png', 'jpg', 'jpeg'])
# Load the image
original_image = Image.open('carousel.jpg')

In [None]:
model = LangSAM()

image_pil = original_image.convert("RGB")
num_objects = input("How many things in the image would you like to label?")
if num_objects:
    num_objects = int(num_objects)  # Convert the input to an integer
    print(f"Number of objects you want to label are confirmed as {num_objects}")

    # Collecting text prompts from the user
    text_prompts = []
    for i in range(num_objects):
        text_prompt = input(f"Enter the thing you want to label (Label {i+1})")
        if text_prompt:
            text_prompts.append(text_prompt)

    if len(text_prompts) == num_objects:
        try:
            # Perform predictions for each text prompt
            predictions = []
            for text_prompt in text_prompts:
                print(f"Predicting for prompt: {text_prompt}")
                results = model.predict([image_pil], [text_prompt])
                print(f"Raw prediction results: {results}")
                for result in results:
                    if "masks" in result and "boxes" in result and "scores" in result:
                        masks = result["masks"]
                        boxes = result["boxes"]
                        phrases = [text_prompt] * len(boxes)  # Using the text prompt as the phrase
                        logits = result["scores"]
                        print(f"Prediction result: masks={masks}, boxes={boxes}, phrases={phrases}, logits={logits}")
                        predictions.append((masks, boxes, phrases, logits))
                    else:
                        print(f"Unexpected prediction result format: {result}")
                        continue  # Skip this prediction if the format is unexpected

            # Draw predictions for each text prompt on the input image
            image_array = np.asarray(image_pil)
            draw = ImageDraw.Draw(image_pil)
            font = ImageFont.load_default()  # You can choose a custom font if needed

            # Define a list of colors for labeling
            colors = ["red", "green", "blue", "yellow", "purple", "orange", "cyan", "magenta"]

            for i, (masks, boxes, phrases, logits) in enumerate(predictions):
                labels = [f"{phrase} {logit:.2f}" for phrase, logit in zip(phrases, logits)]
                label_color = colors[i % len(colors)]  # Change label color based on the prompt

                for label, box in zip(labels, boxes):
                    x0, y0, x1, y1 = box  # Unpack the coordinates
                    draw.rectangle([x0, y0, x1, y1], outline=label_color)  # Draw the rectangle
                    draw.text((x0, y0), label, fill=label_color, font=font)  # Draw the label
        except TypeError as e:
            print(f"TypeError in model.predict: {e}")
        except Exception as e:
            print(f"An error occurred during prediction: {e}")

In [None]:
# Display the image with predictions
image_pil.show()


In [None]:
model = LangSAM()
video_path = "range_rover_ad.mp4"

def process_frame(frame, text_prompts):
    image_pil = Image.fromarray(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))
    predictions = []
    for text_prompt in text_prompts:
        print(f"Predicting for prompt: {text_prompt}")
        results = model.predict([image_pil], [text_prompt])
        print(f"Raw prediction results: {results}")
        for result in results:
            if "masks" in result and "boxes" in result and "scores" in result:
                masks = result["masks"]
                boxes = result["boxes"]
                phrases = [text_prompt] * len(boxes)
                logits = result["scores"]
                print(f"Prediction result: masks={masks}, boxes={boxes}, phrases={phrases}, logits={logits}")
                predictions.append((masks, boxes, phrases, logits))
            else:
                print(f"Unexpected prediction result format: {result}")
                continue

    draw = ImageDraw.Draw(image_pil)
    font = ImageFont.load_default()
    colors = ["red", "green", "blue", "yellow", "purple", "orange", "cyan", "magenta"]

    for i, (masks, boxes, phrases, logits) in enumerate(predictions):
        labels = [f"{phrase} {logit:.2f}" for phrase, logit in zip(phrases, logits)]
        label_color = colors[i % len(colors)]

        for label, box in zip(labels, boxes):
            x0, y0, x1, y1 = box
            draw.rectangle([x0, y0, x1, y1], outline=label_color)
            draw.text((x0, y0), label, fill=label_color, font=font)

    return cv2.cvtColor(np.array(image_pil), cv2.COLOR_RGB2BGR)

def process_video(video_path, text_prompts):
    cap = cv2.VideoCapture(video_path)
    fourcc = cv2.VideoWriter_fourcc(*'XVID')
    out = cv2.VideoWriter('output.avi', fourcc, 20.0, (int(cap.get(3)), int(cap.get(4))))

    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            break
        processed_frame = process_frame(frame, text_prompts)
        out.write(processed_frame)

    cap.release()
    out.release()

num_objects = input("How many things in the image would you like to label?")
if num_objects:
    num_objects = int(num_objects)
    print(f"Number of objects you want to label are confirmed as {num_objects}")

    text_prompts = []
    for i in range(num_objects):
        text_prompt = input(f"Enter the thing you want to label (Label {i+1})")
        if text_prompt:
            text_prompts.append(text_prompt)

    if len(text_prompts) == num_objects:
        process_video(video_path, text_prompts)