In [1]:
import tensorflow as tf

from datasets import load_dataset
from transformers import WhisperProcessor, WhisperFeatureExtractor, WhisperForConditionalGeneration, WhisperTokenizer, TFWhisperForConditionalGeneration

Found Intel OpenMP ('libiomp') and LLVM OpenMP ('libomp') loaded at
the same time. Both libraries are known to be incompatible and this
can cause random crashes or deadlocks on Linux when loaded in the
same Python program.
Using threadpoolctl may cause crashes or deadlocks. For more
information and possible workarounds, please see
    https://github.com/joblib/threadpoolctl/blob/master/multiple_openmp.md



In [2]:
model = TFWhisperForConditionalGeneration.from_pretrained("avalonai/whisper-small-jv")
feature_extractor = WhisperFeatureExtractor.from_pretrained("openai/whisper-small")
tokenizer = WhisperTokenizer.from_pretrained("openai/whisper-small", predict_timestamps=True)
processor = WhisperProcessor(feature_extractor, tokenizer)

All PyTorch model weights were used when initializing TFWhisperForConditionalGeneration.

All the weights of TFWhisperForConditionalGeneration were initialized from the PyTorch model.
If your task is similar to the task the model of the checkpoint was trained on, you can already use TFWhisperForConditionalGeneration for predictions without further training.
Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


In [138]:
model = TFWhisperForConditionalGeneration.from_pretrained("avalonai/whisper-small-jv")

ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation")

inputs = feature_extractor(
    ds[0]["audio"]["array"], sampling_rate=ds[0]["audio"]["sampling_rate"], return_tensors="tf"
)
input_features = inputs.input_features

# Generating Transcription
generated_ids = model.generate(input_features=input_features)
print(generated_ids)
transcription = processor.tokenizer.decode(generated_ids[0], skip_special_tokens=True)
print(transcription)


All PyTorch model weights were used when initializing TFWhisperForConditionalGeneration.

All the weights of TFWhisperForConditionalGeneration were initialized from the PyTorch model.
If your task is similar to the task the model of the checkpoint was trained on, you can already use TFWhisperForConditionalGeneration for predictions without further training.


tf.Tensor(
[[50258 50259 50359 50363  2221  2326   388   391   307   264 31467   306
    295   264 10775  9471   279   293   321   366  5404   281  2928   702
  23163 50257]], shape=(1, 26), dtype=int32)
 Mr Quilter is the Apostle of the Middle Classes and we are glad to welcome his Gospel


In [139]:
model.save('./sane/whisper-jv')



INFO:tensorflow:Assets written to: ./sane/whisper-jv\assets


INFO:tensorflow:Assets written to: ./sane/whisper-jv\assets


In [3]:
class GenerateModel(tf.Module):
  def __init__(self, model):
    super(GenerateModel, self).__init__()
    self.model = model

  @tf.function(
    input_signature=[
      tf.TensorSpec((1, 80, 3000), tf.float32, name="input_features"),
    ],
  )
  def serving(self, input_features):
    outputs = self.model.generate(
      input_features,
      max_new_tokens=450, 
      return_dict_in_generate=True,
    )
    return {"sequences": outputs["sequences"]}

saved_model_dir = './sane/whisper-jv'

In [None]:
tflite_model_path = './sane/whisper-jv-small.tflite'

generate_model = GenerateModel(model=model)
tf.saved_model.save(generate_model, saved_model_dir, signatures={"serving_default": generate_model.serving})

converter = tf.lite.TFLiteConverter.from_saved_model(saved_model_dir)
converter.target_spec.supported_ops = [
  tf.lite.OpsSet.TFLITE_BUILTINS, 
  tf.lite.OpsSet.SELECT_TF_OPS 
]
converter.optimizations = [tf.lite.Optimize.DEFAULT]
tflite_model = converter.convert()

with open(tflite_model_path, 'wb') as f:
    f.write(tflite_model)

In [4]:
# Jgn dipake, ada yg salah

import numpy as np

tflite_model_path = './sane/whisper-jv-small-float16.tflite'

generate_model = GenerateModel(model=model)
tf.saved_model.save(generate_model, saved_model_dir, signatures={"serving_default": generate_model.serving})

converter = tf.lite.TFLiteConverter.from_saved_model(saved_model_dir)
converter.optimizations = [tf.lite.Optimize.DEFAULT]
converter.target_spec.supported_ops = [
    tf.lite.OpsSet.TFLITE_BUILTINS,
    tf.lite.OpsSet.SELECT_TF_OPS
]

converter.optimizations = [tf.lite.Optimize.DEFAULT]
converter.target_spec.supported_types = [tf.float16]

tflite_model = converter.convert()

with open(tflite_model_path, 'wb') as f:
    f.write(tflite_model)



INFO:tensorflow:Assets written to: ./sane/whisper-jv\assets


INFO:tensorflow:Assets written to: ./sane/whisper-jv\assets


In [4]:
# Jgn dipake, ada yg error

tflite_model_path = './sane/whisper-jv-small-int8.tflite'

generate_model = GenerateModel(model=model)
tf.saved_model.save(generate_model, saved_model_dir, signatures={"serving_default": generate_model.serving})

def representative_dataset():
    dataset = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation")
    for i in range(100):
        yield dataset[i]["audio"]["array"]

converter = tf.lite.TFLiteConverter.from_saved_model(saved_model_dir)
converter.optimizations = [tf.lite.Optimize.DEFAULT]
converter.representative_dataset = representative_dataset
converter.target_spec.supported_ops = [tf.lite.OpsSet.TFLITE_BUILTINS_INT8]
converter.inference_input_type = tf.float32  
converter.inference_output_type = tf.float32

tflite_model = converter.convert()

# Save the model
with open(tflite_model_path, 'wb') as f:
    f.write(tflite_model)



INFO:tensorflow:Assets written to: ./sane/whisper-jv\assets


INFO:tensorflow:Assets written to: ./sane/whisper-jv\assets
