In [1]:
import tensorflow as tf
import transformers

from datasets import load_dataset
from transformers import WhisperProcessor, WhisperFeatureExtractor, TFWhisperForConditionalGeneration, WhisperTokenizer

feature_extractor = WhisperFeatureExtractor.from_pretrained("openai/whisper-tiny.en")
tokenizer = WhisperTokenizer.from_pretrained("openai/whisper-tiny.en", predict_timestamps=True)
processor = WhisperProcessor(feature_extractor, tokenizer)
model = TFWhisperForConditionalGeneration.from_pretrained("openai/whisper-tiny.en")
# Loading dataset
ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation")

inputs = feature_extractor(
    ds[0]["audio"]["array"], sampling_rate=ds[0]["audio"]["sampling_rate"], return_tensors="tf"
)
input_features = inputs.input_features

# Generating Transcription
generated_ids = model.generate(input_features=input_features)
print(generated_ids)
transcription = processor.tokenizer.decode(generated_ids[0])
print(transcription)

# Save the model
model.save('/saved_models/tf_whisper_saved')


  from .autonotebook import tqdm as notebook_tqdm





To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development





All PyTorch model weights were used when initializing TFWhisperForConditionalGeneration.

All the weights of TFWhisperForConditionalGeneration were initialized from the PyTorch model.
If your task is similar to the task the model of the checkpoint was trained on, you can already use TFWhisperForConditionalGeneration for predictions without further training.
To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development
Generating validation split: 100%|██████████| 73/73 [00:00<00:00, 674.41 examples/s]
  "class": algorithms.Blowfish,


tf.Tensor(
[[50257 50362  1770    13  2264   346   353   318   262 46329   286   262
   3504  6097    11   290   356   389  9675   284  7062   465 21443    13
  50256]], shape=(1, 25), dtype=int32)
<|startoftranscript|><|notimestamps|> Mr. Quilter is the apostle of the middle classes, and we are glad to welcome his gospel.<|endoftext|>
INFO:tensorflow:Assets written to: /saved_models/tf_whisper_saved\assets


INFO:tensorflow:Assets written to: /saved_models/tf_whisper_saved\assets


In [2]:
import tensorflow as tf
import transformers

from datasets import load_dataset
from transformers import WhisperProcessor, WhisperFeatureExtractor, TFWhisperForConditionalGeneration, WhisperTokenizer

class GenerateModel(tf.Module):
  def __init__(self, model):
    super(GenerateModel, self).__init__()
    self.model = model

  @tf.function(
    input_signature=[
      tf.TensorSpec((1, 80, 3000), tf.float32, name="input_features"),
    ],
  )
  def serving(self, input_features):
    outputs = self.model.generate(
      input_features,
      # change below if you think your output will be bigger
      # aka if you have bigger transcriptions
      # you can make it 200 for example
      max_new_tokens=100,
      return_dict_in_generate=True,
    )
    return {"sequences": outputs["sequences"]}

saved_model_dir = '/saved_models/tf_whisper_saved'
tflite_model_path = 'whisper_english.tflite'

generate_model = GenerateModel(model=model)
tf.saved_model.save(generate_model, saved_model_dir, signatures={"serving_default": generate_model.serving})

# Convert the model
converter = tf.lite.TFLiteConverter.from_saved_model(saved_model_dir)
converter.target_spec.supported_ops = [
  tf.lite.OpsSet.TFLITE_BUILTINS, # enable TensorFlow Lite ops.
  tf.lite.OpsSet.SELECT_TF_OPS # enable TensorFlow ops.
]
# Learn about post training quantization
# https://www.tensorflow.org/lite/performance/post_training_quantization

# Dynamic range quantization which reduces the size of the model to 25%
converter.optimizations = [tf.lite.Optimize.DEFAULT]
# Float16 quantization reduces the size to 50%
#converter.target_spec.supported_types = [tf.float16]
tflite_model = converter.convert()

# Save the model
with open(tflite_model_path, 'wb') as f:
    f.write(tflite_model)

INFO:tensorflow:Assets written to: /saved_models/tf_whisper_saved\assets


INFO:tensorflow:Assets written to: /saved_models/tf_whisper_saved\assets
