In [1]:
%run Helper.ipynb

Num GPUs Available:  1


In [2]:
set_legacy(False)

In [2]:
# Load model
MODEL = load_model(MODEL_HANDLE)



Model loaded!


In [2]:
# To load previously generated object detection results from a file, instead of running the object detection use:
DETECTED_CLASSES = np.load('outputs/statistics/detections_single.npy', allow_pickle=True).item()

In [21]:
# Load images in batches (to prevent memory error)
VALID_PATHS = get_valid_image_paths()
TOTAL = len(VALID_PATHS)

# How many images have been used so far
progress_detection = widgets.IntProgress(min=0, max=TOTAL, description='Inferred: ')
progress_label = widgets.Label(value='0 / ' + str(TOTAL))
display(widgets.HBox([progress_detection, progress_label]))

start_time = time.time()
DETECTED_CLASSES = {}
CLASSES_TOTALS = {}
for current in range(TOTAL):
    # Load current image
    IMAGE = load_image(valid_paths[current])
    
    # Setup detections (which can be used to generate statistics later)
    IMAGE['detection'] = run_inference_for_image(IMAGE, MODEL)
    
    # Save detected classes by image
    current_detections = get_detections_for_image(IMAGE)
    DETECTED_CLASSES.update(current_detections)
    
    # Sum up amount of detections for each class
    for (label, amount) in [(class_label, stats['amount']) for filename, detections in current_detections.items() for class_label, stats in detections.items()]:
        if label in CLASSES_TOTALS:
            CLASSES_TOTALS[label] += amount
        else:
            CLASSES_TOTALS[label] = amount
    
    # Draw detections on images
    draw_detections_on_image(IMAGE)
    
    # Save new images
    output = 'outputs/' + MODEL_NAME + '_all/'
    save_image(output, IMAGE)

    progress_detection.value += 1
    progress_label.value = str(progress_detection.value) + ' / ' + str(TOTAL)
    
runtime = ((time.time() - start_time) / 60)
print("--- Runtime: " + f'{runtime:.2f}' + " minutes ---")

HBox(children=(IntProgress(value=0, description='Inferred: ', max=6990), Label(value='0 / 6990')))

--- Runtime: 260.32 minutes ---


In [39]:
# Write the detections to a numpy file, which makes reading the results from the file easier and saves us the trouble
# of having to run the object detection every time we want to use it
np.save('outputs/statistics/detections_single.npy', DETECTED_CLASSES, allow_pickle=True)

generate_detection_chart(DETECTED_CLASSES, total)

Average detections per image: 3.66


In [3]:
DETECTION_LABELS = get_detected_labels(DETECTED_CLASSES)

In [None]:
# Get babelnet entities for detected labels
get_entity_mapping(DETECTION_LABELS)
save_entity_mapping(DETECTION_LABELS, 'outputs/statistics/mapped_entities.csv')

In [4]:
# Load previously generated mappings
DETECTION_LABELS = [value for key, value in load_saved_mappings('outputs/statistics/mapped_entities corrected.csv').to_dict('index').items()]

In [7]:
# Generate RDF information from object detection
from rdflib import Graph, Literal, RDF, URIRef, Namespace, BNode
from rdflib.namespace import FOAF, OWL, RDFS, XSD, SKOS, DCTERMS
from uuid import uuid4
from datetime import datetime

# Generate uri by urlencoding the filename and prepending a namespace, if given
def generate_uri(filename, namespace=''):
    return URIRef(namespace + parse.quote_plus(filename))

# Map files to an aid contained in their filename
def map_filename(filename):
    # Remove unnecessary prefixes
    filename = filename[filename.find('[')+5:filename.find(']')]
    
    return filename
    
# Get the url of the mapped wikidata entity as a string (if a mapping exists), returns empty string otherwise
def get_mapped_entity(label):
    mapped = next((x for x in DETECTION_LABELS if x['class'] == label.capitalize()), None)
    
    if mapped is None:
        return ''
    elif mapped['exactMatch']:
        return mapped['wikidataid']
    else:
        return mapped['correctMapping']

# Create graph and add namespaces
g = Graph()

OA = Namespace('http://www.w3.org/ns/oa#')
g.namespace_manager.bind('oa', OA)
SLOD = Namespace('http://slod.fiz-karlsruhe.de/')
g.namespace_manager.bind('slod', SLOD)
NIF = Namespace('http://persistence.uni-leipzig.org/nlp2rdf/ontologies/nif-core')
g.namespace_manager.bind('nif', NIF)
WIKIDATA = Namespace('http://www.wikidata.org/entity/')
g.namespace_manager.bind('wd', WIKIDATA)
DCMITYPE = Namespace('http://purl.org/dc/dcmitype/')
g.namespace_manager.bind('dctypes', DCMITYPE)
SCHEMA = Namespace('http://schema.org/')
g.namespace_manager.bind('schema', SCHEMA)
g.namespace_manager.bind('dcterms', DCTERMS)

# Generate date of creation
current_date = datetime.now().strftime('%Y-%m-%dT%H:%M:%SZ')

# Add Object detector
blank_node_detector = BNode(generate_uri('detector'))
g.add((blank_node_detector, RDF.type, SLOD.ObjectDetector))
g.add((blank_node_detector, RDFS.label, Literal(MODEL_NAME, datatype=XSD.string)))

for filename in list(DETECTED_CLASSES.keys()):
    # Add image node
    depictionId = map_filename(filename)
    imageURI = generate_uri(depictionId, SLOD.images + '/slod/')
    g.add((imageURI, RDF.type, DCMITYPE.StillImage))
    g.add((imageURI, SCHEMA.image, generate_uri(depictionId + '.jpg', SLOD.images + '/slod/')))
    g.add((imageURI, DCTERMS.rights, URIRef('https://creativecommons.org/licenses/by/2.0/')))

    # Required to keep indices for a single image unique in the graph
    offset = 1
    # For each detected class on the image
    for label, detection in DETECTED_CLASSES[filename].items():
        # For each bounding box that exists of the current class in the current image
        for index, box in enumerate(detection['boxes']):
            # Create two blank nodes for target and selector
            blank_node_target = BNode(generate_uri('target-' + depictionId + '-' + str(offset)))
            blank_node_selector = BNode(generate_uri('selector-' + depictionId + '-' + str(offset)))
            
            # Add source for current annotation
            g.add((blank_node_target, OA.hasSource, imageURI))
            
            # Add annotation
            annotationURI = generate_uri(depictionId + '-' + str(offset), SLOD.annotations + '/')
            g.add((annotationURI, OA.hasTarget, blank_node_target))
            g.add((annotationURI, RDF.type, OA.Annotation))
            g.add((annotationURI, NIF.confidence, Literal(detection['scores'][index], datatype=XSD.float)))
            g.add((annotationURI, RDFS.label, Literal(label.capitalize(), lang='en')))
            g.add((annotationURI, DCTERMS.created, Literal(current_date, datatype=XSD.dateTime)))
            
            # Add link to object detector
            g.add((annotationURI, DCTERMS.creator, blank_node_detector))
            
            # Add wikidata mapping if existent
            mapped = get_mapped_entity(label)
            if mapped:
                g.add((annotationURI, OA.hasBody, generate_uri(mapped, WIKIDATA)))
                
            # Add Selector
            g.add((blank_node_target, OA.hasSelector, blank_node_selector))
            g.add((blank_node_selector, RDF.value, Literal('xywh=' + str(box[1]) + ',' + str(box[0]) + ',' + str(box[3]-box[1]) + ',' + str(box[2]-box[0]), datatype=XSD.string)))
            g.add((blank_node_selector, RDF.type, OA.FragmentSelector))
            g.add((blank_node_selector, DCTERMS.conformsTo, URIRef('http://www.w3.org/TR/media-frags/')))
            
            # Increase offset
            offset += 1

# print(g.serialize(format='turtle'))
g.serialize(destination='outputs/statistics/rdf.ttl')