In [None]:
!pip install ultralytics==8.0.137

In [None]:
import daft

In [None]:
df = daft.read_parquet("s3://daft-public-data/coco-2017-parquet/images.parquet").select("coco_url", "file_name")

In [None]:
df = df.with_column(
    "image",
    df["coco_url"].url.download().image.decode(),
)

In [None]:
df.show(3)

In [None]:
@daft.udf(return_dtype=daft.DataType.list("preds", daft.DataType.string()))
class RunYoloModel:

    def __init__(self):
        from ultralytics import YOLO
        
        self.yolo = YOLO("yolov8n.pt")

    def __call__(self, images: daft.Series):
        image_np_arrays = images.to_pylist()
        results = self.yolo.predict(image_np_arrays, conf=0.5)
        return [[result.names[int(box_class)] for box_class in result.boxes.cls] for result in results]

df = df.with_column("prediction_classes", RunYoloModel(df["image"]))

In [None]:
df.show(3)

In [None]:
%%time

df.limit(200).where(df["prediction_classes"].list.contains("car")).collect()