# Download the images to a local directory

NOTE: this only needs to be done once for a given set of images. Once they are loaded into the bucket it doesn't need to be run again.

This code uses a list of accession numbers (found as a column in a CSV file) to generate IIIF Image API (v2) URLs for JPEG images that are 1000 pixels in the shortest dimension, then download them into a local directory.

After generating and downloading the images, they need to be uploaded to the Google Cloud bucket used in the Vision analysis.

In [75]:
import pandas as pd
import requests
import shutil # high-level file operations
from PIL import Image

# Load the image data into a dataframe
base_path = '/Users/baskausj/github/vandycite/gallery_buchanan/image_analysis/'
download_path = '/Users/baskausj/Downloads/'

# Load the source image data into a dataframe
source_image_dataframe = pd.read_csv(base_path + 'combined_images.csv', dtype=str)
# Set the commons_id column as the index
source_image_dataframe = source_image_dataframe.set_index('commons_id')

source_image_dataframe.head()

Unnamed: 0_level_0,local_filename,qid,accession_number,rank,kilobytes,height,width,photo_inception,extension,directory,label_en,commons_image_name,iiif_manifest,manifest_label,upload_notes
commons_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
M122562148,1956-001.tif,Q102961253,1956.001,primary,73531,2611,4805,2020-07-23,tif,1956,A group of muffs and other articles of dress o...,A group of muffs and other articles of dress o...,https://iiif-manifest.library.vanderbilt.edu/g...,,
M122617251,1956.002.tif,Q103296446,1956.002,primary,86058,4471,3284,2012-10-17,tif,1956,A Flower Piece (after Jan van Huysum),A Flower Piece (after Jan van Huysum) - Vander...,https://iiif-manifest.library.vanderbilt.edu/g...,,
M122641532,1956-003.tif,Q103297456,1956.003,primary,47806,3600,2265,2020-08-18,tif,1956,Bishop Hacket,Bishop Hacket - Vanderbilt Fine Arts Gallery -...,https://iiif-manifest.library.vanderbilt.edu/g...,,
M122695514,1956.004.jpg,Q103310070,1956.004,primary,122,696,452,2010-02-22,jpg,1956,The Raising of Lazarus (after Leandro Bassano),The Raising of Lazarus (after Leandro Bassano)...,https://iiif-manifest.library.vanderbilt.edu/g...,,
M122611522,1956.006.jpg,Q102974173,1956.006,primary,58,365,436,2010-02-22,jpg,1956,The Farmhouse by the Water,The Farmhouse by the Water - Vanderbilt Fine A...,https://iiif-manifest.library.vanderbilt.edu/g...,,


In [78]:

# Import CSV data as a dataframe.
accession_dataframe = pd.read_csv(base_path + 'test_accession_numbers.csv', dtype=str)

# Create a dataframe to hold the accession numbers and dimensions
accession_dimensions_dataframe = pd.DataFrame(columns=['accession_number', 'height', 'width'])

# Loop through the dataframe rows and download the images.
for index, row in accession_dataframe.iterrows():
    accession_number = row['accession_number']
    print(accession_number)

    # Look up the image data in the source image dataframe.
    # In cases where there are two images, we want the primary image.
    image_series = source_image_dataframe.loc[(source_image_dataframe['accession_number'] == accession_number) & (source_image_dataframe['rank'] == 'primary')]
    manifest_url = image_series['iiif_manifest'][0]

    # get the manifest from the manifest url
    manifest = requests.get(manifest_url).json()
    #print(json.dumps(manifest, indent=2))
    service_url = manifest['sequences'][0]['canvases'][0]['images'][0]['resource']['service']['@id']
    # Because of the error in original manifests, replace version 3 with version 2 in the URL.
    service_url = service_url.replace('/3/', '/2/')
    #print('service_url', service_url)

    # Determine the maximum and minimum dimensions of the image.
    height = image_series['height'][0]
    #print('height', height)
    width = image_series['width'][0]
    #print('width', width)
    shortest_dimension = min(int(height), int(width))
    longest_dimension = max(int(height), int(width))
    #print('shortest_dimension', shortest_dimension)

    # We want to know what the largest dimension needs to be for the shortest dimension to be 1000 pixels.
    # If that calculation makes the longest dimension longer than the actual longest dimension, 
    # then we want to use the actual longest dimension.
    # If the shortest dimension is already less than 1000 pixels, then we will just use the longest dimension as is.
    if shortest_dimension > 1000:
        size = int(1000 * (longest_dimension / shortest_dimension))
        if size > longest_dimension:
            size = longest_dimension
    else:
        size = longest_dimension
    #print('size', size)

    # construct the image url using the "!" size option, that keeps the aspect ratio but sizes to the maximum dimension.
    image_url = service_url + '/full/!' + str(size) + ',' + str(size) + '/0/default.jpg'
    print('image_url', image_url)
    print()
        
    # retrieve the image from the IIIF server
    image_object = requests.get(image_url, stream=True).raw

    # Find the image dimensions
    image = Image.open(image_object)
    reduced_width, reduced_height = image.size

    # save the image as a JPEG file]
    with open(download_path + 'google_vision_images/' + accession_number + '.jpg', 'wb') as out_file:
        shutil.copyfileobj(image_object, out_file)

    # Add the accession number and dimensions to the dataframe
    accession_dimensions_dataframe = accession_dimensions_dataframe.append({'accession_number': accession_number, 'max_height': height, 'max_width': width, 'height': reduced_height, 'width': reduced_width}, ignore_index=True)

# Save the dataframe to a CSV file
accession_dimensions_dataframe.to_csv(base_path + 'accession_dimensions.csv', index=False)

print('done')

1986.076
image_url https://iiif.library.vanderbilt.edu/iiif/2/gallery%2F1986%2F1986.076.tif/full/!1339,1339/0/default.jpg

1979.0324P
image_url https://iiif.library.vanderbilt.edu/iiif/2/gallery%2F1979%2F1979.0324P.tif/full/!1217,1217/0/default.jpg

done


# Google Cloud Vision image analysis

The first cell retrieves the service key, creates a credentials object, then uses it to authenticate and create a `client` object.

In [58]:
# Here's the landing page for Google Cloud Vision
# https://cloud.google.com/vision/
# From it you can try the api by dragging and dropping an image into the browser. You can then 
# view the JSON response, which was helpfule at first to understand the structure of the response.

# The following tutorial contains critical information about enabling the API and creating a role
# for the service account to allow it access. This is followed by creating a service account key.
# https://cloud.google.com/vision/docs/detect-labels-image-client-libraries

# I didn't actually do this tutorial, but it was useful to understand the order of operations that
# needed to be done prior to writing to the API.
# https://www.cloudskillsboost.google/focuses/2457?parent=catalog&utm_source=vision&utm_campaign=cloudapi&utm_medium=webpage
# Because I'm using the Python client library, the part about setting up the request body was irrelevant. 
# But the stuff about uploading the files to the bucket, making it publicly accessible, etc. was helpful.
import json
import pandas as pd

# Imports the Google Cloud client library
# Reference for Google Cloud Vision Python client https://cloud.google.com/python/docs/reference/vision/latest
from google.cloud import vision
from google.cloud import vision_v1
from google.cloud.vision_v1 import AnnotateImageResponse

# Import from Google oauth library
from google.oauth2 import service_account

# Extract the annotation data from a hit and turn it into a row in the dataframe
def extract_object_localization_data(accession_number, annotation, width, height):
    #print('annotation', annotation)
    description = annotation['name']
    score = annotation['score']
    left_x = annotation['boundingPoly']['normalizedVertices'][0]['x']
    top_y = annotation['boundingPoly']['normalizedVertices'][0]['y']
    right_x = annotation['boundingPoly']['normalizedVertices'][2]['x']
    bottom_y = annotation['boundingPoly']['normalizedVertices'][2]['y']
    #print('description', description)
    #print('score', score)
    #print('left_x', left_x)
    #print('top_y', top_y)
    #print('right_x', right_x)
    #print('bottom_y', bottom_y)
    #print()

    row = {'accession_number': accession_number, 'description': description, 'score': score, 'rel_left_x': left_x, 'rel_right_x': right_x, 'rel_top_y': top_y, 'rel_bottom_y': bottom_y, 'abs_left_x': round(left_x * width), 'abs_right_x': round(right_x * width), 'abs_top_y': round(top_y * height), 'abs_bottom_y': round(bottom_y * height)}
    return row

def extract_face_detection_data(accession_number, annotation, width, height):
    score = annotation['detectionConfidence']
    left_x = annotation['boundingPoly']['vertices'][0]['x']
    top_y = annotation['boundingPoly']['vertices'][0]['y']
    right_x = annotation['boundingPoly']['vertices'][2]['x']
    bottom_y = annotation['boundingPoly']['vertices'][2]['y']
    roll_angle = annotation['rollAngle']
    pan_angle = annotation['panAngle']
    tilt_angle = annotation['tiltAngle']
    landmarking_confidence = annotation['landmarkingConfidence']
    joy_likelihood = annotation['joyLikelihood']
    sorrow_likelihood = annotation['sorrowLikelihood']
    anger_likelihood = annotation['angerLikelihood']
    surprise_likelihood = annotation['surpriseLikelihood']
    under_exposed_likelihood = annotation['underExposedLikelihood']
    blurred_likelihood = annotation['blurredLikelihood']
    headwear_likelihood = annotation['headwearLikelihood']

    row = {'accession_number': accession_number, 'score': score, 
           'rel_left_x': left_x / width, 'rel_right_x': right_x / width, 'rel_top_y': top_y / height, 'rel_bottom_y': bottom_y /height,
           'abs_left_x': left_x, 'abs_right_x': right_x, 'abs_top_y': top_y, 'abs_bottom_y': bottom_y,
           'roll_angle': roll_angle, 'pan_angle': pan_angle, 'tilt_angle': tilt_angle,
           'landmarking_confidence': landmarking_confidence, 'joy_likelihood': joy_likelihood, 
           'sorrow_likelihood': sorrow_likelihood, 'anger_likelihood': anger_likelihood, 
           'surprise_likelihood': surprise_likelihood, 'under_exposed_likelihood': under_exposed_likelihood,
           'blurred_likelihood': blurred_likelihood, 'headwear_likelihood': headwear_likelihood}
    return row

def extract_label_detection_data(accession_number, annotation):
    mid = annotation['mid']
    description = annotation['description']
    score = annotation['score']
    topicality = annotation['topicality']
    row = {'accession_number': accession_number, 'mid': mid, 'description': description, 'score': score, 'topicality': topicality}
    return row

def extract_text_detection_data(accession_number, annotation, width, height):
    locale = annotation['locale']
    description = annotation['description']
    left_x = annotation['boundingPoly']['vertices'][0]['x']
    top_y = annotation['boundingPoly']['vertices'][0]['y']
    right_x = annotation['boundingPoly']['vertices'][2]['x']
    bottom_y = annotation['boundingPoly']['vertices'][2]['y']
    row = {'accession_number': accession_number, 'locale': locale, 'description': description, 
           'rel_left_x': left_x / width, 'rel_right_x': right_x / width, 'rel_top_y': top_y / height, 'rel_bottom_y': bottom_y / height,
           'abs_left_x': left_x, 'abs_right_x': right_x, 'abs_top_y': top_y, 'abs_bottom_y': bottom_y,
           }
    return row

# Customize for your own computer
user_dir = 'baskausj' # Enter your user directory name here
base_path = '/Users/baskausj/github/vandycite/gallery_buchanan/image_analysis/' # Location of the accession number data file
annotations_base_url = 'https://baskaufs.github.io/iiif/baskauf/'

# Set the path to the service account key
key_path = '/Users/' + user_dir + '/image-analysis-376619-193859a33600.json'

# Create a credentials object from the service account key
credentials = service_account.Credentials.from_service_account_file(
    key_path, scopes=["https://www.googleapis.com/auth/cloud-platform"],
)

# API documentation https://cloud.google.com/python/docs/reference/vision/latest/google.cloud.vision_v1.services.image_annotator.ImageAnnotatorClient#methods
# The first two versions have no arguments and the credentials are loaded from the environment variable.
#client = vision.ImageAnnotatorClient()
# Used this specific v1 to get the JSON conversion to work
#client = vision_v1.ImageAnnotatorClient()
# Use this line instead of the one above to load the credentials directly from the file
client = vision_v1.ImageAnnotatorClient(credentials=credentials)


Load the source data from a CSV. The critical column needed here is the `accession_number` column, since it is the one that was used to construct the image file name for the uploaded test images.

In [90]:
# Import CSV data as a dataframe.
accession_dataframe = pd.read_csv(base_path + 'accession_dimensions.csv', dtype=str)
accession_dataframe.head()

Unnamed: 0,accession_number,max_height,max_width,height,width
0,1956.012,768,1019,768,1019
1,1956.043,4266,3311,1288,1000
2,1971.002,2925,4401,1000,1504
3,1979.0016,2944,4445,999,1509
4,1979.0071,1780,2460,1000,1382


In [46]:
# This cell is for testing the API with a single image
# Don't run this cell if you want to run the whole dataframe
accession_dataframe = accession_dataframe.head(1)

Loop through all of the accession numbers and perform the analysis on each of the images.

In [60]:
# Create a new dataframe to hold the annotations
object_localization_dataframe = pd.DataFrame(columns=['accession_number', 'description', 'score', 'rel_left_x', 'rel_right_x', 'rel_top_y', 'rel_bottom_y', 'abs_left_x', 'abs_right_x', 'abs_top_y', 'abs_bottom_y'])
face_detection_dataframe = pd.DataFrame(columns=['accession_number', 'score', 'rel_left_x', 'rel_right_x', 'rel_top_y', 'rel_bottom_y', 'abs_left_x', 'abs_right_x', 'abs_top_y', 'abs_bottom_y', 'roll_angle', 'pan_angle', 'tilt_angle', 'landmarking, confidence', 'joy_likelihood', 'sorrow_likelihood', 'anger_likelihood', 'surprise_likelihood', 'under_exposed_likelihood', 'blurred_likelihood', 'headwear_likelihood'])
label_detection_dataframe = pd.DataFrame(columns=['accession_number', 'mid', 'description', 'score', 'topicality'])
text_detection_dataframe = pd.DataFrame(columns=['accession_number', 'locale', 'description', 'rel_left_x', 'rel_right_x', 'rel_top_y', 'rel_bottom_y', 'abs_left_x', 'abs_right_x', 'abs_top_y', 'abs_bottom_y'])

# Loop through the dataframe rows and download the images.
for index, row in accession_dataframe.iterrows():
    accession_number = row['accession_number']
    print('accession_number', accession_number)
    width = int(row['width'])
    height = int(row['height'])

    # To access the images, they should be stored in a Google Cloud Storage bucket that is set up for public access.
    # It's also possible to use a publicly accessible URL, but that seems to be unreliable.
    # The storage costs for a few images are negligible.

    # Construct the path to the image file
    image_uri = 'gs://vu-gallery/' + accession_number + '.jpg'
    #print('image_uri', image_uri)
    
    # Here is the API documentation for the Feature object.
    # https://cloud.google.com/vision/docs/reference/rest/v1/Feature
    #analysis_type = vision.Feature.Type.FACE_DETECTION
    #analysis_type = vision.Feature.Type.LABEL_DETECTION
    #analysis_type = vision.Feature.Type.OBJECT_LOCALIZATION

    # This API documentation isn't exactly the one for the .annotate_image method, but it's close enough.
    # https://cloud.google.com/vision/docs/reference/rest/v1/projects.images/annotate
    # In particular, it links to the AnnotateImageRequest object, which is what we need to pass to the annotate_image method.
    response = client.annotate_image({
    'image': {'source': {'image_uri': image_uri}},
    'features': [
        {'type_': vision.Feature.Type.OBJECT_LOCALIZATION},
        {'type_': vision.Feature.Type.FACE_DETECTION},
        {'type_': vision.Feature.Type.LABEL_DETECTION},
        {'type_': vision.Feature.Type.TEXT_DETECTION}  
        ]
    })

    # The API response is a protobuf object, which is not JSON serializable.
    # So we need to convert it to a JSON serializable object.
    # Solution from https://stackoverflow.com/a/65728119
    response_json = AnnotateImageResponse.to_json(response)

    # The structure of the response is detailed in the API documentation here:
    # https://cloud.google.com/vision/docs/reference/rest/v1/AnnotateImageResponse
    # The various bits are detailed for each feature type.
    # Here's the documentation for entity annotations, with a link to the BoundyPoly object.
    # https://cloud.google.com/vision/docs/reference/rest/v1/AnnotateImageResponse#EntityAnnotation
    response_struct = json.loads(response_json)

    # Object localization
    # -------------------

    for annotation in response_struct['localizedObjectAnnotations']:
        row = extract_object_localization_data(accession_number, annotation, width, height)
        object_localization_dataframe = object_localization_dataframe.append(row, ignore_index=True)
    
    # Write the annotations to a CSV file after every image in case the process is interrupted.
    object_localization_dataframe.to_csv(base_path + 'object_localization.csv', index=False)
    
    # Face detection
    # --------------
    '''
    analysis_type = vision.Feature.Type.FACE_DETECTION
    response = client.annotate_image({
    'image': {'source': {'image_uri': image_uri}},
    'features': [{'type_': analysis_type}]
    })
    response_json = AnnotateImageResponse.to_json(response)
    response_struct = json.loads(response_json)
    '''
    for annotation in response_struct['faceAnnotations']:
        row = extract_face_detection_data(accession_number, annotation, width, height)
        face_detection_dataframe = face_detection_dataframe.append(row, ignore_index=True)
    
    # Write the annotations to a CSV file after every image in case the process is interrupted.
    face_detection_dataframe.to_csv(base_path + 'face_detection.csv', index=False)
    
    # Label detection
    # ---------------
    '''
    analysis_type = vision.Feature.Type.LABEL_DETECTION
    response = client.annotate_image({
    'image': {'source': {'image_uri': image_uri}},
    'features': [{'type_': analysis_type}]
    })
    response_json = AnnotateImageResponse.to_json(response)
    response_struct = json.loads(response_json)
    # print(json.dumps(response_struct, indent=2))
    '''
    for annotation in response_struct['labelAnnotations']:
        row = extract_label_detection_data(accession_number, annotation)
        label_detection_dataframe = label_detection_dataframe.append(row, ignore_index=True)
    
    # Write the annotations to a CSV file after every image in case the process is interrupted.
    label_detection_dataframe.to_csv(base_path + 'label_detection.csv', index=False)
    
    # Text detection
    # --------------
    '''
    analysis_type = vision.Feature.Type.TEXT_DETECTION
    response = client.annotate_image({
    'image': {'source': {'image_uri': image_uri}},
    'features': [{'type_': analysis_type}]
    })
    response_json = AnnotateImageResponse.to_json(response)
    response_struct = json.loads(response_json)
    #print(json.dumps(response_struct, indent=2))
    '''
    for annotation in response_struct['textAnnotations']:
        row = extract_text_detection_data(accession_number, annotation, width, height)
        text_detection_dataframe = text_detection_dataframe.append(row, ignore_index=True)

    # Write the annotations to a CSV file after every image in case the process is interrupted.
    text_detection_dataframe.to_csv(base_path + 'text_detection.csv', index=False)

print('done')

accession_number 1956.012
accession_number 1956.043
accession_number 1971.002
accession_number 1979.0016
accession_number 1979.0071
accession_number 1979.0190P
accession_number 1979.0236P
accession_number 1979.0238P
accession_number 1979.0243P
accession_number 1979.0249P
accession_number 1979.0282P
accession_number 1979.0322P
accession_number 1979.0524P
accession_number 1979.0553
accession_number 1979.0634P
accession_number 1979.0721P
accession_number 1979.0840P
accession_number 1979.1178P
accession_number 1979.1216P
accession_number 1979.1225P
accession_number 1979.1232P
accession_number 1980.037
accession_number 1986.054
accession_number 1992.114
accession_number 1995.010
accession_number 2002.009
accession_number 2003.036
accession_number 2017.001.058
accession_number 2018.002
done


# Create IIIF annotation file

In [61]:
# Open the object localization CSV file and display the first few rows.
object_localization_dataframe = pd.read_csv(base_path + 'object_localization.csv')
object_localization_dataframe.head()



Unnamed: 0,accession_number,description,score,rel_left_x,rel_right_x,rel_top_y,rel_bottom_y,abs_left_x,abs_right_x,abs_top_y,abs_bottom_y
0,1956.012,Animal,0.774044,0.12765,0.322942,0.503521,0.736281,130,329,387,565
1,1956.012,Person,0.759243,0.29328,0.674474,0.089172,0.885919,299,687,68,680
2,1956.012,Animal,0.706436,0.064499,0.136466,0.488693,0.711381,66,139,375,546
3,1956.012,Animal,0.698519,0.178926,0.380322,0.607171,0.854251,182,388,466,656
4,1956.012,Animal,0.647411,0.296356,0.6623,0.127383,0.887515,302,675,98,682


In [62]:
accession_dataframe = pd.read_csv(base_path + 'accession_dimensions.csv', dtype=str)
accession_dataframe.head()


Unnamed: 0,accession_number,max_height,max_width,height,width
0,1956.012,768,1019,768,1019
1,1956.043,4266,3311,1288,1000
2,1971.002,2925,4401,1000,1504
3,1979.0016,2944,4445,999,1509
4,1979.0071,1780,2460,1000,1382


In [49]:

# This line is for testing the API with a single image
accession_dataframe = accession_dataframe.head(1)

To create the annotations, we need to convert the relative dimensions to the absolute pixel dimensions based on the canvas size.

The canvas size is given as the dimensions of the full-sized image, which is reported as `max_height` and `max_width` in the dimensions CSV.

In [66]:
# Loop through each accession number and create an annotation for each localized object.
for image_index, image_row in accession_dataframe.iterrows():
    print('Processing image ' + str(image_index + 1) + ' of ' + str(len(accession_dataframe)))
    # Build the resources list for the annotations.
    resources = []
    
    # Loop through each object in the image.
    for object_index, object_row in object_localization_dataframe.iterrows():
        if object_row['accession_number'] != image_row['accession_number']:
            continue

        # Create a W3C fragment selector for the annotation.
        # https://www.w3.org/TR/annotation-model/#fragment-selector
        # Calculate the upper left x and y in absolute canvas coordinates.
        x = str(round(object_row['rel_left_x'] * float(image_row['max_width'])))
        y = str(round(object_row['rel_top_y'] * float(image_row['max_height'])))

        # Calculate the width and height in absolute canvas coordinates.
        width = str(round((object_row['rel_right_x'] - object_row['rel_left_x']) * float(image_row['max_width'])))
        height = str(round((object_row['rel_bottom_y'] - object_row['rel_top_y']) * float(image_row['max_height'])))

        fragment_selector = 'xywh=' + x + ',' + y + ',' + width + ',' + height

        # Build the annotation.
        on_value = {
            '@type': 'oa:SpecificResource',
            'full': 'https://iiif-manifest.library.vanderbilt.edu/gallery/' + image_row['accession_number'].split('.')[0] + '/' + image_row['accession_number'] + '.json_1',
            'selector': {
                'type': 'oa:FragmentSelector',
                'value': fragment_selector
            },
            'within': {
                '@id': 'https://iiif-manifest.library.vanderbilt.edu/gallery/' + image_row['accession_number'].split('.')[0] + '/' + image_row['accession_number'] + '.json',
                '@type': 'sc:Manifest'
            }
        }
        resource_value = {
            '@type': 'dctypes:Text',
            'format': 'text/plain',
            'chars': object_row['description']
        }

        annotation = {
            '@context': 'http://iiif.io/api/presentation/2/context.json',
            '@id': 'https://iiif-manifest.library.vanderbilt.edu/gallery/' + image_row['accession_number'].split('.')[0] + '/' + image_row['accession_number'] + '/annotation/' + str(object_index),
            '@type': 'oa:Annotation',
            'motivation': [
                'oa:commenting'
            ],
            'on': on_value,
            'resource': [
                resource_value
            ]
        }
        resources.append(annotation)
    
    annotations = {
        "@context": "http://www.shared-canvas.org/ns/context.json",
        "@id": annotations_base_url + image_row['accession_number'].split('.')[0] + "/" + image_row['accession_number'] + "_annotations.json",
        "@type": "sc:AnnotationList",
        "resources": resources
    }

    # Write the annotations to a JSON file.
    with open(base_path + 'annotations/' + image_row['accession_number'] + '_annotations.json', 'w') as outfile:
        output_text = json.dumps(annotations, indent=2)
        outfile.write(output_text)

print('done')





Processing image 1 of 29
Processing image 2 of 29
Processing image 3 of 29
Processing image 4 of 29
Processing image 5 of 29
Processing image 6 of 29
Processing image 7 of 29
Processing image 8 of 29
Processing image 9 of 29
Processing image 10 of 29
Processing image 11 of 29
Processing image 12 of 29
Processing image 13 of 29
Processing image 14 of 29
Processing image 15 of 29
Processing image 16 of 29
Processing image 17 of 29
Processing image 18 of 29
Processing image 19 of 29
Processing image 20 of 29
Processing image 21 of 29
Processing image 22 of 29
Processing image 23 of 29
Processing image 24 of 29
Processing image 25 of 29
Processing image 26 of 29
Processing image 27 of 29
Processing image 28 of 29
Processing image 29 of 29
done


# Add the link from the manifest to the annotation URL

In [91]:
# Step through each image in the accession dimensions CSV file.
for image_index, image_row in accession_dataframe.iterrows():
    print(image_row['accession_number'])

    # Look up the manifest URL for the image in the source image dataframe.
    manifest_url = source_image_dataframe.loc[source_image_dataframe['accession_number'] == image_row['accession_number'], 'iiif_manifest'].iloc[0]
    
    # Get the manifest JSON.
    manifest_response = requests.get(manifest_url)
    manifest_json = manifest_response.json()
    
    # Create otherContent dictionary.
    other_content = [
        {
        '@id': annotations_base_url + image_row['accession_number'].split('.')[0] + "/" + image_row['accession_number'] + "_annotations.json",
        '@type': 'sc:AnnotationList'
        }
    ]

    # Add the otherContent dictionary to the manifest.
    manifest_json['sequences'][0]['canvases'][0]['otherContent'] = other_content

    # Write the manifest to a JSON file.
    with open(base_path + 'manifests/' + image_row['accession_number'] + '.json', 'w') as outfile:
        text = json.dumps(manifest_json, indent=4)
        outfile.write(text)

print('done')



1956.012
1956.043
1971.002
1979.0016
1979.0071
1979.0190P
1979.0236P
1979.0238P
1979.0243P
1979.0249P
1979.0282P
1979.0322P
1979.0524P
1979.0553
1979.0634P
1979.0721P
1979.0840P
1979.1178P
1979.1216P
1979.1225P
1979.1232P
1980.037
1986.054
1992.114
1995.010
2002.009
2003.036
2017.001.058
2018.002
done
