In [None]:
# Copyright 2025 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# Quick start with Model Garden - MedASR

<table><tbody><tr>
  <td style="text-align: center">
    <a href="https://console.cloud.google.com/vertex-ai/colab/import/https:%2F%2Fraw.githubusercontent.com%2FGoogle-Health%2Fmedasr%2Fmain%2Fnotebooks%2Fquick_start_with_model_garden.ipynb">
      <img alt="Google Cloud Colab Enterprise logo" src="https://lh3.googleusercontent.com/JmcxdQi-qOpctIvWKgPtrzZdJJK-J3sWE1RsfjZNwshCFgE_9fULcNpuXYTilIR2hjwN" width="32px"><br> Run in Colab Enterprise
    </a>
  </td>
  <td style="text-align: center">
    <a href="https://github.com/google-health/medasr/blob/main/notebooks/quick_start_with_model_garden.ipynb">
      <img alt="GitHub logo" src="https://github.githubassets.com/assets/GitHub-Mark-ea2971cee799.png" width="32px"><br> View on GitHub
    </a>
  </td>
</tr></tbody></table>

## Overview

This notebook demonstrates how to use MedASR in Vertex AI to transcribe medical audio to text using online inference.

**Online inferences** are synchronous requests that are made to the endpoint deployed from Model Garden and are served with low latency. Online inferences are useful if the model outputs are being used in production. The cost for online inference is based on the time a virtual machine spends waiting in an active state (an endpoint with a deployed model) to handle inference requests.

Vertex AI makes it easy to serve your model and make it accessible to the world. Learn more about [Vertex AI](https://cloud.google.com/vertex-ai/docs/start/introduction-unified-platform).

### Objectives

- Deploy MedASR to a Vertex AI Endpoint and get online inferences.

### Costs

This tutorial uses billable components of Google Cloud:

* Vertex AI
* Cloud Storage

Learn about [Vertex AI pricing](https://cloud.google.com/vertex-ai/pricing), [Cloud Storage pricing](https://cloud.google.com/storage/pricing), and use the [Pricing Calculator](https://cloud.google.com/products/calculator/) to generate a cost estimate based on your projected usage.

## Before you begin

In [None]:
# @title Install dependencies and import packages

! pip install -qU --upgrade pip
! pip install -qU 'google-cloud-aiplatform>=1.101.0' jiwer levenshtein

import base64
import json
import os

from google.cloud import aiplatform
from IPython.display import Audio, display

models, endpoints = {}, {}

In [None]:
# @title Set up Google Cloud environment

# @markdown #### Prerequisites

# @markdown 1. Make sure that [billing is enabled](https://cloud.google.com/billing/docs/how-to/modify-project) for your project.

# @markdown 2. Make sure that either the Compute Engine API is enabled or that you have the [Service Usage Admin](https://cloud.google.com/iam/docs/understanding-roles#serviceusage.serviceUsageAdmin) (`roles/serviceusage.serviceUsageAdmin`) role to enable the API.

# @markdown This section sets the default Google Cloud project and region, enables the Compute Engine API (if not already enabled), and initializes the Vertex AI API.

# Get the default project ID.
PROJECT_ID = os.environ["GOOGLE_CLOUD_PROJECT"]

# Get the default region for launching jobs.
REGION = os.environ["GOOGLE_CLOUD_REGION"]

# Enable the Compute Engine API, if not already.
print("Enabling Compute Engine API.")
! gcloud services enable compute.googleapis.com

# Initialize Vertex AI API.
print("Initializing Vertex AI API.")
aiplatform.init(project=PROJECT_ID, location=REGION)

In [None]:
# @title Retrieve sample data

# @markdown This notebook uses a sample medical audio file and transcript.

! gcloud storage cp gs://healthai-us/medasr/test_audio.wav test_audio.wav
with open("test_audio.wav", "rb") as f:
    audio_bytes = f.read()
sample_transcript = "Exam type CT chest PE protocol period. Indication 54 year old female, shortness of breath, evaluate for PE period. Technique standard protocol period. Findings colon. Pulmonary vasculature colon. The main PA is patent period. There are filling defects in the segmental branches of the right lower lobe comma compatible with acute PE period. No saddle embolus period. Lungs colon. No pneumothorax period. Small bilateral effusions comma right greater than left period. New paragraph. Impression colon. Acute segmental PE right lower lobe period."
display(Audio(audio_bytes, autoplay=False))

In [None]:
# @title Define utility functions

# @markdown These functions will be used to evaluate the word error rate (WER) of the generated transcripts.

import re
import jiwer
import Levenshtein

def normalize(s: str) -> str:
  s = s.lower()
  s = re.sub(r"[^ a-z0-9']", ' ', s)
  s = ' '.join(s.split())
  return s

def _colored(text, color):
    if color == 'red':
        return f"\033[91m{text}\033[0m"
    elif color == 'green':
        return f"\033[92m{text}\033[0m"
    return text

def evaluate(
    ref_text: str,
    hyp_text: str,
    delete_color: str = 'red',
    insert_color: str = 'green',
) -> None:
  print('HYP:', hyp_text)
  normalized_ref = normalize(ref_text)
  normalized_hyp = normalize(hyp_text)

  # Calculate word lists early so we can use them for both jiwer and diffs
  ref_words = normalized_ref.split()
  hyp_words = normalized_hyp.split()

  # jiwer.process_words expects a list of strings (sentences) or list of list of words
  measures = jiwer.process_words([normalized_ref], [normalized_hyp])

  # Calculate edit operations using Levenshtein for the colored diff
  edits = Levenshtein.editops(ref_words, hyp_words)

  r = 0 # Index for the reference words for diff building
  diff = ''

  for op, i, j in edits:
    # Add matched words before the current edit
    if r < i:
      diff += ' ' + ' '.join(ref_words[r:i])
    r = i # Update reference index for next iteration

    if op == 'replace':
      diff += (
          f' {_colored(f"{{-{ref_words[i]}-}}", delete_color)}'
          f' {_colored(f"{{+{hyp_words[j]}+}}", insert_color)}'
      )
      r += 1 # Advance reference index after replacement
    elif op == 'insert':
      diff += f' {_colored(f"{{+{hyp_words[j]}+}}", insert_color)}'
      # Reference index `r` does not advance for an insertion
    elif op == 'delete':
      diff += f' {_colored(f"{{-{ref_words[i]}-}}", delete_color)}'
      r += 1 # Advance reference index after deletion

  # Add any remaining matched words from the reference
  if r < len(ref_words):
    diff += ' ' + ' '.join(ref_words[r:])

  print(
      f'WER: {measures.wer * 100:.2f}%: '
      f'insertions {measures.insertions}, deletions {measures.deletions}, substitutions {measures.substitutions}, '
      f'ref tokens {len(ref_words)}'
  )
  print(diff)

## Get online inferences

In [None]:
# @title Import deployed model

# @markdown To get [online inferences](https://cloud.google.com/vertex-ai/docs/predictions/get-online-predictions), you will need a MedASR [Vertex AI Endpoint](https://cloud.google.com/vertex-ai/docs/general/deployment) that has been deployed from Model Garden. If you have not already done so, go to the [MedASR model card](https://console.cloud.google.com/vertex-ai/publishers/google/model-garden/medasr) and click "Deploy model" to deploy the model.

# @markdown Note: Endpoints deployed from Model Garden must be [dedicated endpoints](https://cloud.google.com/vertex-ai/docs/predictions/choose-endpoint-type).

# @markdown This section gets the Vertex AI Endpoint resource that you deployed from Model Garden to use for online inferences.

# @markdown Fill in the endpoint ID and region below. You can find your deployed endpoint on the [Vertex AI Endpoints page](https://console.cloud.google.com/vertex-ai/online-prediction/endpoints).

ENDPOINT_ID = ""  # @param {type: "string", placeholder:"e.g. 123456789"}
ENDPOINT_REGION = ""  # @param {type: "string", placeholder:"e.g. us-central1"}

endpoints["endpoint"] = aiplatform.Endpoint(
    endpoint_name=ENDPOINT_ID,
    project=PROJECT_ID,
    location=ENDPOINT_REGION,
)

In [None]:
# @title Run inference using the Vertex AI SDK

# @markdown This section shows how to send [online prediction](https://cloud.google.com/vertex-ai/docs/predictions/get-online-predictions) requests to your Vertex AI endpoint.

# @markdown Click "Show code" to see more details.

request = {
    "file": base64.b64encode(audio_bytes).decode("utf-8"),
}

response = endpoints["endpoint"].raw_predict(
    body=json.dumps(request).encode("utf-8"),
    headers={
        "Content-Type": "application/json",
    },
)
generated_transcript = json.loads(response.content)["text"]

print(generated_transcript)
evaluate(sample_transcript, generated_transcript)


## Next steps

Explore the other [notebooks](https://github.com/google-health/medasr/blob/main/notebooks) to learn what else you can do with the model.


## Clean up resources

In [None]:
# @markdown  Delete the experiment models and endpoints to recycle the resources
# @markdown  and avoid unnecessary continuous charges that may incur.

# Undeploy model and delete endpoint.
for endpoint in endpoints.values():
    endpoint.delete(force=True)

# Delete models.
for model in models.values():
    model.delete()