In [1]:
from transformers import pipeline
from PIL import Image
from accelerate.test_utils.testing import get_backend
import matplotlib.pyplot as plt

# Set up the backend device
device, _, _ = get_backend()

# Load the depth estimation model
checkpoint = "depth-anything/Depth-Anything-V2-base-hf"
pipe = pipeline("depth-estimation", model=checkpoint, device=device)

# Define input and output paths
input_path = "/kaggle/input/sampleapple/splitted-DJI_20240901104507_0236_D_clahe_grid_3_5.jpg"
output_path = "/kaggle/working/depth_map.png"

# Load the image
image = Image.open(input_path).convert("RGB")

# Perform depth estimation
predictions = pipe(image)
depth_map = predictions["depth"]

# Save the depth map directly if it's a PIL image
if isinstance(depth_map, Image.Image):
    depth_map.save(output_path)
    print(f"Depth map saved as image at: {output_path}")
else:
    # If depth_map is not an image, assume it's a tensor or numpy array
    import numpy as np
    depth_array = depth_map.squeeze()  # Ensure it's a 2D array
    plt.imshow(depth_array, cmap="viridis")
    plt.axis("off")
    plt.savefig(output_path, bbox_inches='tight', pad_inches=0)
    plt.close()
    print(f"Depth map saved as visualization at: {output_path}")



config.json:   0%|          | 0.00/953 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/390M [00:00<?, ?B/s]

preprocessor_config.json:   0%|          | 0.00/775 [00:00<?, ?B/s]

Device set to use cuda


Depth map saved as image at: /kaggle/working/depth_map.png


In [2]:
from transformers import pipeline
from PIL import Image, ImageDraw
import numpy as np
import matplotlib.pyplot as plt
from accelerate.test_utils.testing import get_backend

# Set up the backend device
device, _, _ = get_backend()

# Load the depth estimation model
depth_checkpoint = "depth-anything/Depth-Anything-V2-base-hf"
depth_pipe = pipeline("depth-estimation", model=depth_checkpoint, device=device)

# Load the object detection model
object_detection_pipe = pipeline("object-detection", model="facebook/detr-resnet-50", device=device)

# Define input and output paths
input_path = "/kaggle/input/sampleapple/splitted-DJI_20240901104507_0236_D_clahe_grid_3_5.jpg"
output_path = "/kaggle/working/depth_map_with_apple_borders.png"

# Load the image
image = Image.open(input_path).convert("RGB")

# Perform depth estimation
depth_predictions = depth_pipe(image)
depth_map = depth_predictions["depth"]

# Perform object detection
detection_results = object_detection_pipe(image)

# Highlight detected apples on the depth map with red borders only
if isinstance(depth_map, Image.Image):
    depth_map = depth_map.convert("RGBA")  # Ensure the depth map is in RGBA mode for overlay
else:
    raise ValueError("Depth map should be a PIL Image for this implementation.")

draw = ImageDraw.Draw(depth_map)

# Filter detection results for "apple"
for detection in detection_results:
    if "apple" in detection["label"].lower() and detection["score"] > 0.5:
        bbox = detection["box"]
        draw.rectangle(
            [bbox["xmin"], bbox["ymin"], bbox["xmax"], bbox["ymax"]],
            outline="red",  # Red border for apples
            width=5,        # Border width
        )

# Save the resulting image
depth_map.save(output_path)
print(f"Depth map with apple borders saved at: {output_path}")


Device set to use cuda


config.json:   0%|          | 0.00/4.59k [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/167M [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/102M [00:00<?, ?B/s]

Some weights of the model checkpoint at facebook/detr-resnet-50 were not used when initializing DetrForObjectDetection: ['model.backbone.conv_encoder.model.layer1.0.downsample.1.num_batches_tracked', 'model.backbone.conv_encoder.model.layer2.0.downsample.1.num_batches_tracked', 'model.backbone.conv_encoder.model.layer3.0.downsample.1.num_batches_tracked', 'model.backbone.conv_encoder.model.layer4.0.downsample.1.num_batches_tracked']
- This IS expected if you are initializing DetrForObjectDetection from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing DetrForObjectDetection from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


preprocessor_config.json:   0%|          | 0.00/290 [00:00<?, ?B/s]

Device set to use cuda


Depth map with apple borders saved at: /kaggle/working/depth_map_with_apple_borders.png


In [3]:
import os
from transformers import pipeline
from PIL import Image, ImageDraw
from accelerate.test_utils.testing import get_backend

# Set up the backend device
device, _, _ = get_backend()

# Load the depth estimation model
depth_checkpoint = "depth-anything/Depth-Anything-V2-base-hf"
depth_pipe = pipeline("depth-estimation", model=depth_checkpoint, device=device)

# Load the object detection model
object_detection_pipe = pipeline("object-detection", model="facebook/detr-resnet-50", device=device)

# Define input and output paths
input_folder = "/kaggle/input/mutlitest"
output_folder = "/kaggle/working/multiple_outputs"
os.makedirs(output_folder, exist_ok=True)  # Create the output folder if it doesn't exist

# Process each image in the input folder
for filename in os.listdir(input_folder):
    if filename.endswith((".jpg", ".jpeg", ".png")):  # Process only image files
        input_path = os.path.join(input_folder, filename)
        output_path = os.path.join(output_folder, f"processed_{filename}")
        
        # Load the image
        image = Image.open(input_path).convert("RGB")
        
        # Perform depth estimation
        depth_predictions = depth_pipe(image)
        depth_map = depth_predictions["depth"]

        # Perform object detection
        detection_results = object_detection_pipe(image)

        # Highlight detected apples on the depth map with red borders only
        if isinstance(depth_map, Image.Image):
            depth_map = depth_map.convert("RGBA")  # Ensure the depth map is in RGBA mode for overlay
        else:
            raise ValueError("Depth map should be a PIL Image for this implementation.")

        draw = ImageDraw.Draw(depth_map)

        # Filter detection results for "apple"
        for detection in detection_results:
            if "apple" in detection["label"].lower() and detection["score"] > 0.5:
                bbox = detection["box"]
                draw.rectangle(
                    [bbox["xmin"], bbox["ymin"], bbox["xmax"], bbox["ymax"]],
                    outline="red",  # Red border for apples
                    width=5,        # Border width
                )

        # Convert to RGB mode if saving as JPEG
        if output_path.endswith((".jpg", ".jpeg")):
            depth_map = depth_map.convert("RGB")
        
        # Save the resulting image
        depth_map.save(output_path)
        print(f"Processed {filename} saved to {output_path}")

print(f"All images processed. Results saved in: {output_folder}")


Device set to use cuda
Some weights of the model checkpoint at facebook/detr-resnet-50 were not used when initializing DetrForObjectDetection: ['model.backbone.conv_encoder.model.layer1.0.downsample.1.num_batches_tracked', 'model.backbone.conv_encoder.model.layer2.0.downsample.1.num_batches_tracked', 'model.backbone.conv_encoder.model.layer3.0.downsample.1.num_batches_tracked', 'model.backbone.conv_encoder.model.layer4.0.downsample.1.num_batches_tracked']
- This IS expected if you are initializing DetrForObjectDetection from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing DetrForObjectDetection from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Device set to use cuda


Processed AppleTest4.png saved to /kaggle/working/multiple_outputs/processed_AppleTest4.png
Processed AppleTest3.jpg saved to /kaggle/working/multiple_outputs/processed_AppleTest3.jpg
Processed AppleTest2.jpg saved to /kaggle/working/multiple_outputs/processed_AppleTest2.jpg
All images processed. Results saved in: /kaggle/working/multiple_outputs
