# PREPROCESSING IMAGE DATA TO LINKED DATA
In this notebook, we evaluate the image assets in a session repository.
For every jpeg/png with its accompanying xmp or xmp file, an IMAGENODE metadata class is created 
that governs all the metadata of the image (paths, pose, etc.).
As ouput, the method generates RDF GRAPHS (.ttl) files

>This codebase operates on the scan2bim2.yml environment (python 3.8)

In [1]:
# import APIs
import rdflib
from rdflib import Graph, plugin
from rdflib.serializer import Serializer #pip install rdflib-jsonld https://pypi.org/project/rdflib-jsonld/
from rdflib import Graph
from rdflib import URIRef, BNode, Literal
from rdflib.namespace import CSVW, DC, DCAT, DCTERMS, DOAP, FOAF, ODRL2, ORG, OWL, \
                           PROF, PROV, RDF, RDFS, SDO, SH, SKOS, SOSA, SSN, TIME, \
                           VOID, XMLNS, XSD
import uuid    
import os.path, time
import importlib
import PIL
import PIL.Image as PILimage
from PIL import ImageDraw, ImageFont, ImageEnhance
from PIL.ExifTags import TAGS, GPSTAGS
import xml.etree.ElementTree as ET
import numpy as np
import open3d as o3d

#IMPORT MODULES
import Algorithms.linkeddatatools as ld
import Classes.pointcloudnode as pc
import Algorithms.scan2bim as s2b
import Classes.imagenode as im

Jupyter environment detected. Enabling Open3D WebVisualizer.
[Open3D INFO] WebRTC GUI backend enabled.
[Open3D INFO] WebRTCWindowSystem: HTTP handshake server disabled.


## 1. INITIALIZE SESSION

In [2]:
## INPUTS
project_path= "K:\\Projects\\2025-03 Project FWO SB Jelle\\7.Data\\21-11 House Maarten\\RAW data"#"D:\\Data\\2018-06 Werfopvolging Academiestraat Gent" 
session_path = project_path + "\\session_21-12-08" #"\\week 22"
bim_folderpath = project_path + "\\BIM"
pcd_folderpath = session_path + "\\PCD"
img_folderpath = session_path + "\\IMG"
ortho_folderpath =session_path + "\\ORTHO"
mesh_folderpath = session_path + "\\MESH"

## session initialization missing

## 2. READ PRESENT JPEG/PNG/XMP/XML CONTAINING IMAGES + METADATA
An IMAGENODE is created per image in the session

In [3]:
# Retrieve all image files in the session along with their xml metadata
importlib.reload(ld)
importlib.reload(s2b)
importlib.reload(im)

all_session_file_paths=ld.getListOfFiles(session_path) 
img_file_paths=[] # these are string .e57file paths
img_xmp_paths=[] # (RealityCapture) these are string .xmp file paths
img_xml_paths=[] # (MetaShape) these are string .xml file paths
imgnodelist=[]
exif_counter=0
xmp_counter=0
xml_counter=0

for file_path in all_session_file_paths:        
    if file_path.endswith(".JPG"): #file_path.endswith(".png") or
        #create imgnode
        print('creating ImageNode from '+file_path)
        imgnode=im.ImageNode()
        imgnode.session_path=session_path
        imgnode.timestamp=ld.get_timestamp(file_path) # this is a long float nr
        imgnode.name=ld.get_filename(file_path)
        imgnode.img_path=file_path
        imgnode.img=PILimage.open(file_path)    
        img_file_paths.append(file_path) 

        test_xmp_path=file_path.replace('JPG','xmp')
        test_xml_path=file_path.replace('JPG','xml')
        
        #1. see if image itself contains some exif data
        imgnode.get_exif_data()
        if imgnode.exif_data is not None:
            imgnode.set_exif_data()
            if imgnode.GlobalPose.SphericalTranslation.lat is not None:
                imgnode.coordinate_system='geospatial-wgs84'
                exif_counter +=1
            #report the nr of images that had exif data

        # 2. if there is a matching xmp file, overwrite the overlapping information   
        if test_xmp_path in all_session_file_paths:
            img_xmp_paths.append(test_xmp_path)  
            imgnode.read_img_xmp(test_xmp_path)  # check if Coordinates are absolute for WGS84?
            xmp_counter+=1  
            #report the number of images that had xmp information        

        # 3. or, if there is a matching xml file, overwrite the overlapping information   
        elif test_xml_path in all_session_file_paths:
            img_xml_paths.append(test_xml_path)  
            # imgnode.read_img_xml(test_xmp_path)  #not implemented
            xml_counter+=1  

        # create guid if none is present
        if imgnode.guid is None:
            imgnode.guid= '{'+str(uuid.uuid1())+'}'    
       
        imgnodelist.append(imgnode)              
print(str(len(imgnodelist))+' ImageNodes are created.')
print(str(exif_counter)+' have poses in exif data')
print(str(xmp_counter)+' have poses in xmp data')
print(str(xml_counter)+' have poses in xml data (not implemented yet)')
print('Note that these nodes do not yet contain actual data but only the metadata')

0 ImageNodes are created.
0 have poses in exif data
0 have poses in xmp data
0 have poses in xml data (not implemented yet)
Note that these nodes do not yet contain actual data but only the metadata


## 3. CREATE RDF GRAPH FOR All IMAGENODES AND EXPORT THEM TO .TTL
An RDF graph and .ttl file is created for all images in the session
(images themselves are not stored in the graph, only metadata)

In [45]:
importlib.reload(ld)
importlib.reload(s2b)
importlib.reload(im)

fail_counter=0
g=Graph()
for idx,imgnode in enumerate(imgnodelist):
    try:
        imgnode.add_to_rdf_graph()
        g=g+imgnode.rdf_graph
        imgnode.rdf_graph_path=imgnode.session_path+"\\IMG_RGB\\imgGraph.ttl"
    except:
        print('ImagedNode '+str(idx)+' could not be serialized.')
        fail_counter+=1
        continue

#write first image rdf    
g.serialize(destination=str(imgnodelist[0].rdf_graph_path), format='ttl')
print (str(len(imgnodelist)-fail_counter)+' ImageNodes succesfully serialized in: '+str(imgnode.rdf_graph_path))

720 ImageNodes succesfully serialized in: D:\Data\2018-06 Werfopvolging Academiestraat Gent\week 22\PCD\imgGraph.ttl
