# Testing out audio

Lets test out audio model   

In [None]:
%pip install -U torchvision git+https://github.com/huggingface/parler-tts.git
%restart_python

In [None]:
import torch
from parler_tts import ParlerTTSForConditionalGeneration
from transformers import AutoTokenizer
import soundfile as sf

device = "cuda:0" if torch.cuda.is_available() else "cpu"

model = ParlerTTSForConditionalGeneration.from_pretrained("parler-tts/parler-tts-mini-v1").to(device)
tokenizer = AutoTokenizer.from_pretrained("parler-tts/parler-tts-mini-v1")


In [None]:
prompt = "I like cheese?"
description = "Jon's voice is monotone yet slightly fast in delivery, with a very close recording that almost has no background noise."

input_ids = tokenizer(description, return_tensors="pt").input_ids.to(device)
prompt_input_ids = tokenizer(prompt, return_tensors="pt").input_ids.to(device)

generation = model.generate(input_ids=input_ids, prompt_input_ids=prompt_input_ids)
audio_arr = generation.cpu().numpy().squeeze()
sf.write("parler_tts_out.mp3", audio_arr, model.config.sampling_rate)

In [None]:
import IPython
IPython.display.Audio("parler_tts_out.mp3")

# Testing Whisper on Databricks

In [None]:
import os
import requests
import numpy as np
import pandas as pd
import json

In [None]:
db_token = dbutils.notebook.entry_point.getDbutils().notebook().getContext().apiToken().getOrElse(None)

def create_tf_serving_json(data):
    return {'inputs': {name: data[name].tolist() for name in data.keys()} if isinstance(data, dict) else data.tolist()}

def score_model(dataset):
    url = 'https://adb-984752964297111.11.azuredatabricks.net/serving-endpoints/whisper_large_v3-2/invocations'
    headers = {'Authorization': f'Bearer {db_token}', 'Content-Type': 'application/json'}
    ds_dict = {'dataframe_split': dataset.to_dict(orient='split')} if isinstance(dataset, pd.DataFrame) else create_tf_serving_json(dataset)
    data_json = json.dumps(ds_dict, allow_nan=True)
    response = requests.request(method='POST', headers=headers, url=url, data=data_json)
    if response.status_code != 200:
        raise Exception(f'Request failed with status {response.status_code}, {response.text}')
    return response.json()

In [None]:
import base64

def get_mp3_as_base64_string(file_path):
    with open(file_path, 'rb') as file:
        binary_data = file.read()
        base64_string = base64.b64encode(binary_data).decode('utf-8')
    return base64_string
  
audio_file = get_mp3_as_base64_string("parler_tts_out.mp3")
pd_audio_frame = pd.DataFrame({0: [audio_file]})

score_model(pd_audio_frame)