# Exploring the V&OR maintenance data

Data is located in the blobstore `experiments` under `hackathon_vlm/onderhoud/`.

In [None]:
import cv2
import geopandas as gpd
import numpy as np
import os

from azure.identity import  DefaultAzureCredential
from azure.storage.blob import  BlobServiceClient


def blob_to_bytes(blobname: str):
    blob_client = container_client.get_blob_client(blobname)
    return blob_client.download_blob().readall()

def bytes_to_cv2(image_bytes):
    '''Convert image bytes to CV2 image
        Args:
            image_bytes: image bytes
        Returns:
            CV2 image
    '''
    image_array = np.frombuffer(image_bytes, dtype="uint8")
    image = cv2.imdecode(image_array, cv2.IMREAD_UNCHANGED)
    return image

def imshow(img):
    import IPython
    _,ret = cv2.imencode('.jpg', img) 
    i = IPython.display.Image(data=ret)
    IPython.display.display(i)


ACCOUNT_URL = "https://cvodataweupgwapeg4pyiw5e.blob.core.windows.net"
CONTAINER_NAME = "experiments"


credential = DefaultAzureCredential()

blob_service_client = BlobServiceClient(account_url=ACCOUNT_URL, credential=credential)
container_client = blob_service_client.get_container_client(CONTAINER_NAME)

## Example: load metadata

In [None]:
# The dataset contaisn of multiple files, we need to download them to explore.
dataset_path_prefix = "hackathon_vlm/onderhoud/2025_Centrum/KLEIN_ONDERHOUD_SDCENTRUM 20250328"

# Where to store the data
target_folder = "local_data/2025_Centrum/"

os.makedirs(target_folder, exist_ok=True)

blob_list = container_client.list_blobs(name_starts_with="hackathon_vlm/onderhoud/2025_Centrum/KLEIN_ONDERHOUD_SDCENTRUM 20250328")
for blob in blob_list:
    blob_bytes = blob_to_bytes(blob.name)
    filename = os.path.join(target_folder, os.path.basename(blob.name))
    with open(filename, "wb") as f:
        f.write(blob_bytes)

In [None]:
# Load data into dataframe
dataset_gdf = gpd.read_file(os.path.join(target_folder, "KLEIN_ONDERHOUD_SDCENTRUM 20250328.shp"))

dataset_gdf.head(5)

In [None]:
# Curb damage categories
dataset_gdf["SCHADEBEEL"].unique()

In [None]:
# Info for a specific image
image_name = "18228489-9471-4aee-bc87-0a8605521007.jpeg"

dataset_gdf[dataset_gdf["FOTO_1"] == image_name]

## Example: show an image

In [None]:
images_path = "hackathon_vlm/onderhoud/2025_Centrum/Foto's klein onderhoud/"
image_name = "18228489-9471-4aee-bc87-0a8605521007.jpeg"

blob_bytes = blob_bytes = blob_to_bytes(os.path.join(images_path, image_name))

img = bytes_to_cv2(blob_bytes)
imshow(img)

In [None]:
# Store image locally
target_folder = "local_data/2025_Centrum/images/"

os.makedirs(target_folder, exist_ok=True)

filename = os.path.join(target_folder, image_name)
with open(filename, "wb") as f:
    f.write(blob_bytes)