In [1]:
# Here's the landing page for Google Cloud Vision
# https://cloud.google.com/vision/
# From it you can try the api by dragging and dropping an image into the browser. You can then 
# view the JSON response, which was helpfule at first to understand the structure of the response.

# The following tutorial contains critical information about enabling the API and creating a role
# for the service account to allow it access. This is followed by creating a service account key.
# https://cloud.google.com/vision/docs/detect-labels-image-client-libraries

# I didn't actually do this tutorial, but it was useful to understand the order of operations that
# needed to be done prior to writing to the API.
# https://www.cloudskillsboost.google/focuses/2457?parent=catalog&utm_source=vision&utm_campaign=cloudapi&utm_medium=webpage
# Because I'm using the Python client library, the part about setting up the request body was irrelevant. 
# But the stuff about uploading the files to the bucket, making it publicly accessible, etc. was helpful.
import io
import os
import json

# Imports the Google Cloud client library
# Reference for Google Cloud Vision Python client https://cloud.google.com/python/docs/reference/vision/latest
from google.cloud import vision
from google.cloud import vision_v1
from google.cloud.vision_v1 import AnnotateImageResponse

# Import from Google oauth library
from google.oauth2 import service_account

The following cell only needs to be run if the credentials are being loaded from the environmental variable. It seems cleaner to load the file directly into the script as credentials (in the next cell).

In [5]:
os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = '/Users/baskausj/image-analysis-376619-193859a33600.json'
print(os.environ['GOOGLE_APPLICATION_CREDENTIALS'])


/Users/baskausj/image-analysis-376619-193859a33600.json


Use this cell in preference to the one above to load the credentials directly into the script as a credentials object.

In [2]:
key_path = '/Users/baskausj/image-analysis-376619-193859a33600.json'

# Create a credentials object
credentials = service_account.Credentials.from_service_account_file(
    key_path, scopes=["https://www.googleapis.com/auth/cloud-platform"],
)


In [3]:
# API documentation https://cloud.google.com/python/docs/reference/vision/latest/google.cloud.vision_v1.services.image_annotator.ImageAnnotatorClient#methods
# The first two versions have no arguments and the credentials are loaded from the environment variable.
#client = vision.ImageAnnotatorClient()
# Used this specific v1 to get the JSON conversion to work
#client = vision_v1.ImageAnnotatorClient()
# Use this line instead of the one above to load the credentials directly from the file
client = vision_v1.ImageAnnotatorClient(credentials=credentials)

# To access the images, they should be stored in a Google Cloud Storage bucket that is set up for public access.
# It's also possible to use a publicly accessible URL, but that seems to be unreliable.
# The storage costs for a few images are negligible.
#image_uri = 'gs://vu-gallery/1979.0326P.jpg' # landscape
#image_uri = 'gs://vu-gallery/card.jpg' # business card
#image_uri = 'gs://vu-gallery/1979.0655P.jpg' # St. Sebastian
#image_uri = 'gs://vu-gallery/2004.017.jpg' # sketch
image_uri = 'gs://vu-gallery/1974.027.jpg' # sketch of artist with dog

# Here is the API documentation for the Feature object.
# https://cloud.google.com/vision/docs/reference/rest/v1/Feature
#analysis_type = vision.Feature.Type.FACE_DETECTION
#analysis_type = vision.Feature.Type.LABEL_DETECTION
analysis_type = vision.Feature.Type.OBJECT_LOCALIZATION

# This API documentation isn't exactly the one for the .annotate_image method, but it's close enough.
# https://cloud.google.com/vision/docs/reference/rest/v1/projects.images/annotate
# In particular, it links to the AnnotateImageRequest object, which is what we need to pass to the annotate_image method.
response = client.annotate_image({
  'image': {'source': {'image_uri': image_uri}},
  'features': [{'type_': analysis_type}]
})


In [4]:
# The API response is a protobuf object, which is not JSON serializable.
# So we need to convert it to a JSON serializable object.
# Solution from https://stackoverflow.com/a/65728119
response_json = AnnotateImageResponse.to_json(response)

# The structure of the response is detailed in the API documentation here:
# https://cloud.google.com/vision/docs/reference/rest/v1/AnnotateImageResponse
# The various bits are detailed for each feature type.
# Here's the documentation for entity annotations, with a link to the BoundyPoly object.
# https://cloud.google.com/vision/docs/reference/rest/v1/AnnotateImageResponse#EntityAnnotation
response_struct = json.loads(response_json)
print(response_json)

{
  "localizedObjectAnnotations": [
    {
      "mid": "/m/0bt9lr",
      "name": "Dog",
      "score": 0.8362394,
      "boundingPoly": {
        "normalizedVertices": [
          {
            "x": 0.60463864,
            "y": 0.38959154
          },
          {
            "x": 0.98275715,
            "y": 0.38959154
          },
          {
            "x": 0.98275715,
            "y": 0.91793966
          },
          {
            "x": 0.60463864,
            "y": 0.91793966
          }
        ],
        "vertices": []
      },
      "languageCode": ""
    },
    {
      "mid": "/m/09j2d",
      "name": "Clothing",
      "score": 0.609878,
      "boundingPoly": {
        "normalizedVertices": [
          {
            "x": 0.13458158,
            "y": 0.2999172
          },
          {
            "x": 0.6982942,
            "y": 0.2999172
          },
          {
            "x": 0.6982942,
            "y": 0.81593204
          },
          {
            "x": 0.13458158,
      