# Exploring the V&OR maintenance data

Data is located in the blobstore `experiments` under `hackathon_vlm/onderhoud/`.

In [17]:
import cv2
import geopandas as gpd
import numpy as np
import os

from azure.identity import  DefaultAzureCredential
from azure.storage.blob import  BlobServiceClient


def blob_to_bytes(blobname: str):
    blob_client = container_client.get_blob_client(blobname)
    return blob_client.download_blob().readall()

def bytes_to_cv2(image_bytes):
    '''Convert image bytes to CV2 image
        Args:
            image_bytes: image bytes
        Returns:
            CV2 image
    '''
    image_array = np.frombuffer(image_bytes, dtype="uint8")
    image = cv2.imdecode(image_array, cv2.IMREAD_UNCHANGED)
    return image

def imshow(img):
    import IPython
    _,ret = cv2.imencode('.jpg', img) 
    i = IPython.display.Image(data=ret)
    IPython.display.display(i)


ACCOUNT_URL = "https://cvodataweupgwapeg4pyiw5e.blob.core.windows.net"
CONTAINER_NAME = "experiments"


credential = DefaultAzureCredential()

blob_service_client = BlobServiceClient(account_url=ACCOUNT_URL, credential=credential)
container_client = blob_service_client.get_container_client(CONTAINER_NAME)

## Example: load metadata

In [27]:
# The dataset contaisn of multiple files, we need to download them to explore.
dataset_path_prefix = "hackathon_vlm/onderhoud/2025_Centrum/KLEIN_ONDERHOUD_SDCENTRUM 20250328"

# Where to store the data
target_folder = "local_data/2025_Centrum/"

In [28]:
# Download the files

os.makedirs(target_folder, exist_ok=True)

blob_list = container_client.list_blobs(name_starts_with="hackathon_vlm/onderhoud/2025_Centrum/KLEIN_ONDERHOUD_SDCENTRUM 20250328")
for blob in blob_list:
    blob_bytes = blob_to_bytes(blob.name)
    filename = os.path.join(target_folder, os.path.basename(blob.name))
    with open(filename, "wb") as f:
        f.write(blob_bytes)

In [29]:
# Load data into dataframe
dataset_gdf = gpd.read_file(os.path.join(target_folder, "KLEIN_ONDERHOUD_SDCENTRUM 20250328.shp"))

dataset_gdf.head(5)

Unnamed: 0,ORIG_ID,GEOVISIA_I,GUID,ID,KO_ID,TYPE,TYPE_GEDET,TYPE_EXTRA,KLEUR,STADSDEEL,...,INSP_RONDE,SCHADEBEEL,HOEVEELHEI,EENHEID,OPM_INSPEC,FOTO_1,FOTO_2,PRIO,MELDING_OR,geometry
0,1,5474AF66-54DE-4AB6-A849-3C986F36226C,{5065C5B0-B6F5-4968-91BF-2A6C499DB18B},142747.0,Centrum-2025-1,Elementenverharding,Tegels,Betontegel,Grijs,Centrum,...,2025,plaatselijke verzakking,1,m2,m2,d8f399d7-0ef5-4ba5-af6e-f3288f233acc.jpg,,hoog,False,POINT (122292.765 486164.166)
1,2,6DC9B87E-CC26-42DE-B9C1-082771661EBF,{D5A8AFB3-D50C-4BED-A064-0797499C52C2},1293686.0,Centrum-2025-2,Asfaltverharding,Dichte deklagen,Steenmastiekasfalt,Grijs,Centrum,...,2025,randschade,3,meter,ophoging,8820247c-e928-44e6-812f-8769ee3bf250.jpeg,,laag,False,POINT (122299.63 486165.929)
2,4,E92758FA-B05A-4C91-A039-9B7A1F609606,{C79EE942-F2F8-4043-A293-257BAB60E5F5},187657.0,Centrum-2025-4,Elementenverharding,Betonstraatstenen,Betonsteen,Grijs,Centrum,...,2025,boomwortelschade,1,m2,,18228489-9471-4aee-bc87-0a8605521007.jpeg,,hoog,False,POINT (122256.084 486170.879)
3,5,B35B0070-EC2D-4A16-8F01-105658CDD0A6,{C79EE942-F2F8-4043-A293-257BAB60E5F5},187657.0,Centrum-2025-5,Elementenverharding,Betonstraatstenen,Betonsteen,Grijs,Centrum,...,2025,plaatselijke verzakking,1,m2,,22ae871c-9eac-4ae1-bea4-f10c647e92b2.jpeg,,hoog,False,POINT (122171.062 486143.986)
4,6,ED057E8B-1608-4D0D-A091-D1702BAB6445,{16E0FB1C-82E7-43C4-8B8B-D6040D81D7F7},1293702.0,Centrum-2025-6,Asfaltverharding,Dichte deklagen,Steenmastiekasfalt,Grijs,Centrum,...,2025,dwarsscheur,3,meter,,fb7ce06d-16e9-4ba0-a75d-a0b9e17b2bb6.jpeg,,hoog,False,POINT (122267.499 486262.53)


In [30]:
# Print columns of the dataframe
print(dataset_gdf.columns)

Index(['ORIG_ID', 'GEOVISIA_I', 'GUID', 'ID', 'KO_ID', 'TYPE', 'TYPE_GEDET',
       'TYPE_EXTRA', 'KLEUR', 'STADSDEEL', 'OPENBARE_R', 'BUURT', 'WIJK',
       'BEHEERDER', 'INSP_DATUM', 'INSPECTEUR', 'INSP_BEDRI', 'INSP_RONDE',
       'SCHADEBEEL', 'HOEVEELHEI', 'EENHEID', 'OPM_INSPEC', 'FOTO_1', 'FOTO_2',
       'PRIO', 'MELDING_OR', 'geometry'],
      dtype='object')


In [31]:
# Curb damage categories
dataset_gdf["SCHADEBEEL"].unique()

array(['plaatselijke verzakking', 'randschade', 'boomwortelschade',
       'dwarsscheur', 'gebroken elementen', 'niet gevuld boorgat', 'gat',
       'plaatselijke ophoging', 'langsscheur',
       'asfaltsleuf elementen (HOOR)',
       'aansluiting inspectieput vierkant', 'aansluiting kolk',
       'rafeling', 'kantopsluiting', 'losliggende elementen',
       'bezweken plek', 'voegwijdte elementen', 'ontbrekende elementen'],
      dtype=object)

In [32]:
dataset_gdf["TYPE"].unique()

array(['Elementenverharding', 'Asfaltverharding', 'Betonverharding'],
      dtype=object)

In [37]:
# Info for a specific image
image_name = "da812701-a1e3-4448-b5b1-0990c27441c0.jpeg"

# print schadebeel of the image
print(dataset_gdf[dataset_gdf["FOTO_1"] == image_name]["SCHADEBEEL"].values[0])

# print image names that have "SCHADEBEEL" == "ontbrekende elementen"
dataset_gdf[dataset_gdf["SCHADEBEEL"] == "ontbrekende elementen"]["FOTO_1"].values



ontbrekende elementen


array(['da812701-a1e3-4448-b5b1-0990c27441c0.jpeg'], dtype=object)

## Example: show an image

In [None]:
images_path = "hackathon_vlm/onderhoud/2025_Centrum/Foto's klein onderhoud/"

blob_bytes = blob_bytes = blob_to_bytes(os.path.join(images_path, image_name))

img = bytes_to_cv2(blob_bytes)
imshow(img)

In [39]:
# Store image locally
target_folder = "local_data/2025_Centrum/images/"

os.makedirs(target_folder, exist_ok=True)

filename = os.path.join(target_folder, image_name)
with open(filename, "wb") as f:
    f.write(blob_bytes)