In [1]:
from valor import connect
connect("http://localhost:8000")

Successfully connected to host at http://localhost:8000/


In [17]:
import datetime

from valor import (
    Annotation,
    Client,
    Dataset,
    Datum,
    Filter,
    GroundTruth,
    Label,
)
from valor.schemas import And, Or, Box, Polygon, Point

client = Client()

# Example - Swimmers and boats.

This example demonstrates how to create and query a dataset containing images annotated with boats, swimmers, and fish.

In [3]:
# Define the data for the example
contains_boat_swimmer = [
    ("uid1", False, False),
    ("uid2", True, False),
    ("uid3", False, True),
    ("uid4", True, True),
]

# Create a bounding box for annotations
box = Box.from_extrema(0, 10, 0, 10)

# Define labels for annotations
swimmer_label = Label(key="class", value="swimmer")
boat_label = Label(key="class", value="boat")
fish_label = Label(key="class", value="fish")

# Create a dataset for the images
dataset1 = Dataset.create("dataset1")

# Add annotations to the dataset
for uid, is_boat, is_swimmer in contains_boat_swimmer:
    annotations = [
        Annotation(
            labels=[boat_label if is_boat else fish_label],
            bounding_box=box,
            is_instance=True,
        ),
        Annotation(
            labels=[swimmer_label if is_swimmer else fish_label],
            bounding_box=box,
            is_instance=True,
        ),
    ]
    dataset1.add_groundtruth(GroundTruth(datum=Datum(uid=uid), annotations=annotations))

Show all datums in the dataset.

In [4]:
dataset1.get_datums()

[Datum({'uid': 'uid4', 'metadata': {}}),
 Datum({'uid': 'uid3', 'metadata': {}}),
 Datum({'uid': 'uid2', 'metadata': {}}),
 Datum({'uid': 'uid1', 'metadata': {}})]

Query the dataset for images containing just fish

In [5]:
just_fish = dataset1.get_datums(
    Filter(
        datums=And(
            Label.key == "class",
            Label.value != "boat",
            Label.value != "swimmer",
        )
    )
)
assert len(just_fish) == 1
assert just_fish[0].uid == "uid1"

Query the dataset for images containing no swimmers (only boats)

In [6]:
no_swimmers = dataset1.get_datums(
    Filter(
        datums=And(
            Label.key == "class",
            Label.value == "boat",
            Label.value != "swimmer",
        )
    )
)
assert len(no_swimmers) == 1
assert no_swimmers[0].uid == "uid2"

Query the dataset for images containing no boats (only swimmers)

In [7]:
no_boats = dataset1.get_datums(
    Filter(
        datums=And(
            Label.key == "class",
            Label.value != "boat",
            Label.value == "swimmer",
        )
    )
)
assert len(no_boats) == 1
assert no_boats[0].uid == "uid3"

Query the dataset for images contains either swimmers or boats but not both.

In [8]:
no_boats = dataset1.get_datums(
    Filter(
        datums=Or(
            And(
                Label.key == "class",
                Label.value != "boat",
                Label.value == "swimmer",
            ),
            And(
                Label.key == "class",
                Label.value == "boat",
                Label.value != "swimmer",
            )
        )
    )
)
assert len(no_boats) == 2
uids = {datum.uid for datum in no_boats}
assert "uid2" in uids
assert "uid3" in uids

Query the dataset for images containing both swimmers and boats

In [9]:
swimmers_and_boats = dataset1.get_datums(
    Filter(
        datums=And(
            Label.key == "class",
            Label.value == "boat",
            Label.value == "swimmer",
        )
    )
)
assert len(swimmers_and_boats) == 1
assert swimmers_and_boats[0].uid == "uid4"

# Example - Swimmers w/ Boats of different sizes.

This example demonstrates how to create and query a dataset containing images annotated with boats, swimmers, and fish.

In this example, the boats are bounded by either a small bbox (5x5) or large bbox (10x10).

In [10]:
contains_boat_swimmer = (
    ("uid1", False, False),
    ("uid2", True, False),
    ("uid3", False, True),
    ("uid4", True, True),
)

small_box = Box.from_extrema(0, 5, 0, 5)
large_box = Box.from_extrema(0, 10, 0, 10)

swimmer = Label(key="class", value="swimmer")
boat = Label(key="class", value="boat")
fish = Label(key="class", value="fish")

dataset2 = Dataset.create("dataset2")
for uid, is_large_boat, is_swimmer in contains_boat_swimmer:
    dataset2.add_groundtruth(
        GroundTruth(
            datum=Datum(uid=uid),
            annotations=[
                Annotation(
                    labels=[boat],
                    bounding_box=large_box if is_large_boat else small_box,
                    is_instance=True,
                ),
                Annotation(
                    labels=[swimmer if is_swimmer else fish],
                    bounding_box=small_box,
                    is_instance=True,
                ),
            ],
        )
    )

No swimmer, small boats

In [11]:
no_swimmer_small_boats = dataset2.get_datums(
    Filter(
        datums=And(
            Label.key == "class",
            Label.value != "swimmer",
        ),
        annotations=And(
            Label.key == "class",
            Label.value == "boat",
            Annotation.bounding_box.area < 50,
        ),
    )
)
assert len(no_swimmer_small_boats) == 1
assert no_swimmer_small_boats[0].uid == "uid1"

No swimmer, large boats

In [12]:
no_swimmer_large_boats = dataset2.get_datums(
    Filter(
        datums=And(
            Label.key == "class",
            Label.value != "swimmer",
        ),
        annotations=And(
            Label.key == "class",
            Label.value == "boat",
            Annotation.bounding_box.area > 50,
        ),
    )
)
assert len(no_swimmer_large_boats) == 1
assert no_swimmer_large_boats[0].uid == "uid2"

Swimmer with small boats

In [13]:
swimmer_with_small_boats = dataset2.get_datums(
    Filter(
        datums=And(
            Label.key == "class",
            Label.value == "swimmer",
        ),
        annotations=And(
            Label.key == "class",
            Label.value == "boat",
            Annotation.bounding_box.area < 50,
        ),
    )
)
assert len(swimmer_with_small_boats) == 1
assert swimmer_with_small_boats[0].uid == "uid3"

Swimmer with large boat

In [14]:
swimmers_and_boats = dataset2.get_datums(
    Filter(
        datums=And(
            Label.key == "class",
            Label.value == "swimmer",
        ),
        annotations=And(
            Label.key == "class",
            Label.value == "boat",
            Annotation.bounding_box.area > 50,
        ),
    )
)
assert len(swimmers_and_boats) == 1
assert swimmers_and_boats[0].uid == "uid4"

# Example - Geospatial + Time of Year

This example demonstrates how to create and query a dataset containing images annotated with a time and region.

In [18]:
# regions
geojson_alaska = Polygon(
    [
        [
            (
                -170.7603599457809,
                68.84625981507392
            ),
            (
                -170.7603599457809,
                58.53538829807735
            ),
            (
                -141.3435514691004,
                58.53538829807735
            ),
            (
                -141.3435514691004,
                68.84625981507392
            ),
            (
                -170.7603599457809,
                68.84625981507392
            )
        ]
    ]
)
geojson_australia = Polygon(
    [
        [
            (
                113.26697231702212,
                -12.835622232181265
            ),
            (
                113.26697231702212,
                -40.757486033452935
            ),
            (
                157.67091884462127,
                -40.757486033452935
            ),
            (
                157.67091884462127,
                -12.835622232181265
            ),
            (
                113.26697231702212,
                -12.835622232181265
            )
        ]
    ]
)

# cities
geojson_austrailia_sydney = Point((151.27740157112845, -33.78747691475676))
geojson_alaska_anchorage = Point((-149.75306358105365, 61.21554843271193))

In [19]:
map_idx_to_month = {
    1: "january",
    2: "february",
    3: "march",
    4: "april",
    5: "may",
    6: "june",
    7: "july",
    8: "august",
    9: "september",
    10: "october",
    11: "november",
    12: "december"
}

vehicle = Label(key="class", value="vehicle")

dataset3 = Dataset.create("dataset3")
for i in range(1, 13):
    dataset3.add_groundtruth(
        GroundTruth(
            datum=Datum(
                uid=f"alaska_{map_idx_to_month[i]}", 
                metadata={
                    "month": datetime.date(2024, i, 1), 
                    "region": geojson_alaska,
                }
            ),
            annotations=[
                Annotation(
                    labels=[vehicle],
                ),
            ],
        )
    )
    dataset3.add_groundtruth(
        GroundTruth(
            datum=Datum(
                uid=f"austrailia_{map_idx_to_month[i]}", 
                metadata={
                    "month": datetime.date(2024, i, 1),
                    "region": geojson_australia
                }
            ),
            annotations=[
                Annotation(
                    labels=[vehicle],
                ),
            ],
        )
    )

ClientException: [{'type': 'value_error', 'loc': ['body', 0, 'datum', 'metadata'], 'msg': "Value error, Metadata value '{'type': 'Box', 'coordinates': [[(-170.7603599457809, 68.84625981507392), (-170.7603599457809, 58.53538829807735), (-141.3435514691004, 58.53538829807735), (-141.3435514691004, 68.84625981507392), (-170.7603599457809, 68.84625981507392)]]}' failed validation for type 'geojson'. Validation error: Class 'box' is not a supported GeoJSON geometry type.", 'input': {'month': {'type': 'date', 'value': '2024-01-01'}, 'region': {'type': 'geojson', 'value': {'type': 'Box', 'coordinates': [[[-170.7603599457809, 68.84625981507392], [-170.7603599457809, 58.53538829807735], [-141.3435514691004, 58.53538829807735], [-141.3435514691004, 68.84625981507392], [-170.7603599457809, 68.84625981507392]]]}}}, 'ctx': {'error': {}}, 'url': 'https://errors.pydantic.dev/2.6/v/value_error'}]

Find datums where the region is experiencing summer.

Northern Hemisphere (June - September)
Southern Hemisphere (December - March)

In [None]:
march = datetime.date(2024, 3, 1)
june = datetime.date(2024, 6, 1)
september = datetime.date(2024, 9, 1)
december = datetime.date(2024, 12, 1)

summer_time = dataset3.get_datums(
    Filter(
        datums=Or(
            And(
                Datum.metadata["month"] >= june,
                Datum.metadata["month"] <= september,
                Datum.metadata["region"].intersects(geojson_alaska)
            ),
            And(
                Datum.metadata["month"] >= december,
                Datum.metadata["month"] <= march,
                Datum.metadata["region"].intersects(geojson_alaska)
            ),
        )
    )
)
assert len(swimmers_and_boats) == 1
assert swimmers_and_boats[0].uid == "uid4"